blob: bc2955466e825b23fcdea5b2b5d62014d7fc367b [file] [log] [blame]
using Artifacts
using DerekoVecs
using Test
@testset "DerekoVecs.jl" begin
wpd19 = load(joinpath(artifact"wpd19_10000", "wpd19_10000", "wpd19_10000.vecs"))
@testset "DerekoVecs.jl: loading" begin
@test wpd19.m == 200
@test wpd19.n >= 10000
end
@testset "DerekoVecs.jl: similarities" begin
@test isapprox(DerekoVecs.cos_sim(wpd19, "war", "war"), 1)
@test cos_sim(wpd19, "wurden", "war") == cos_sim(wpd19, "war", "wurden")
@test cos_sim(wpd19, "wurde", "wurden") > cos_sim(wpd19, "wurde", "ich")
@test cos_sim(wpd19, "wurden", "war") == cos_sim(wpd19, wpd19, "war", "wurden")
@test isapprox(cos_sim(wpd19, wpd19, "war"), 1)
@test isapprox(cos_sim(wpd19, wpd19, 50), 1)
@test isapprox(cos_sim(wpd19, 50, 50), 1)
end
@testset "DerekoVecs.jl: knn" begin
@test "dieser" in knn(wpd19, "der", 3)
@test "wurden" in knn(wpd19, "wurde", 3)
end
@testset "DerekoVecs.jl: kld" begin
mykld = kld(wpd19, wpd19)
@test mykld.common_type_count == length(wpd19.vocabdict)
@test isapprox(mykld.common_type_share, 100)
@test isapprox(mykld.kld, 0)
@test wpd19.total_tokens == mykld.common_token_count
@test isapprox(mykld.common_token_share, 100)
end
@testset "DerekoVecs.jl: load freq list only" begin
wpd19_freqlist = load(joinpath(artifact"wpd19_10000", "wpd19_10000", "wpd19_10000.vocab"))
@test wpd19.total_tokens == wpd19_freqlist.total_tokens
@test isapprox(kld(wpd19_freqlist, wpd19).kld, 0)
end
@testset "DerekoVecs.jl: collocation analysis" begin
if (!isnothing(wpd19.cdb))
println(wpd19.vocab[30])
coll = get_collocates(wpd19, "werden")
@test coll[1].ldaf > 10
@test coll[1].ldaf > coll[3].ldaf
end
end
end