| using Artifacts |
| using DerekoVecs |
| using DataFrames |
| using Test |
| |
| |
| @testset "DerekoVecs.jl" begin |
| |
| wpd19 = load(joinpath(artifact"wpd19_10000", "wpd19_10000", "wpd19_10000.vecs")) |
| |
| @testset "DerekoVecs.jl: loading" begin |
| @test wpd19.m == 200 |
| @test wpd19.n >= 10000 |
| end |
| |
| @testset "DerekoVecs.jl: similarities" begin |
| @test isapprox(DerekoVecs.cos_sim(wpd19, "war", "war"), 1) |
| @test cos_sim(wpd19, "wurden", "war") == cos_sim(wpd19, "war", "wurden") |
| @test cos_sim(wpd19, "wurde", "wurden") > cos_sim(wpd19, "wurde", "ich") |
| @test cos_sim(wpd19, "wurden", "war") == cos_sim(wpd19, wpd19, "war", "wurden") |
| @test isapprox(cos_sim(wpd19, wpd19, "war"), 1) |
| @test isapprox(cos_sim(wpd19, wpd19, 50), 1) |
| @test isapprox(cos_sim(wpd19, 50, 50), 1) |
| end |
| |
| @testset "DerekoVecs.jl: knn" begin |
| @test "dieser" in knn(wpd19, "der", 3) |
| @test "wurden" in knn(wpd19, "wurde", 3) |
| end |
| |
| @testset "DerekoVecs.jl: kld" begin |
| mykld = kld(wpd19, wpd19) |
| @test mykld.common_type_count == length(wpd19.vocabdict) |
| @test isapprox(mykld.common_type_share, 100) |
| @test isapprox(mykld.kld, 0) |
| @test wpd19.total_tokens == mykld.common_token_count |
| @test isapprox(mykld.common_token_share, 100) |
| end |
| |
| @testset "DerekoVecs.jl: load freq list only" begin |
| wpd19_freqlist = load(joinpath(artifact"wpd19_10000", "wpd19_10000", "wpd19_10000.vocab")) |
| @test wpd19.total_tokens == wpd19_freqlist.total_tokens |
| @test isapprox(kld(wpd19_freqlist, wpd19).kld, 0) |
| end |
| |
| @testset "DerekoVecs.jl: collocation analysis" begin |
| if (!isnothing(wpd19.cdb)) |
| df = get_collocates(wpd19, "werden") |
| @test df.collocate[1] == "kann" |
| @test df.ldaf[1] > 10 |
| @test df.ldaf[1] > df.ldaf[3] |
| @test nrow(get_collocates(wpd19, 3, 1)) == 1 |
| @test nrow(get_collocates(wpd19, 3, 2)) == 2 |
| end |
| end |
| |
| end |