blob: f1e038818e6edc559ef771e2c123c9ad388ebad6 [file] [log] [blame]
Marc Kupietz41b0f682022-07-21 15:32:45 +02001using Artifacts
Marc Kupietz7101f9e2022-07-21 08:58:19 +02002using DerekoVecs
3using Test
4
Marc Kupietz41b0f682022-07-21 15:32:45 +02005
Marc Kupietz7101f9e2022-07-21 08:58:19 +02006@testset "DerekoVecs.jl" begin
Marc Kupietz41b0f682022-07-21 15:32:45 +02007
8 wpd19 = load(joinpath(artifact"wpd19_10000", "wpd19_10000", "wpd19_10000.vecs"))
9
10 @testset "DerekoVecs.jl: loading" begin
Marc Kupietz5a024f82022-07-28 12:33:57 +020011 @test wpd19.m == 200
12 @test wpd19.n >= 10000
Marc Kupietz41b0f682022-07-21 15:32:45 +020013 end
14
15 @testset "DerekoVecs.jl: similarities" begin
16 @test isapprox(DerekoVecs.cos_sim(wpd19, "war", "war"), 1)
17 @test cos_sim(wpd19, "wurden", "war") == cos_sim(wpd19, "war", "wurden")
18 @test cos_sim(wpd19, "wurde", "wurden") > cos_sim(wpd19, "wurde", "ich")
19 @test cos_sim(wpd19, "wurden", "war") == cos_sim(wpd19, wpd19, "war", "wurden")
Marc Kupietz438a18a2022-07-22 09:24:12 +020020 @test isapprox(cos_sim(wpd19, wpd19, "war"), 1)
21 @test isapprox(cos_sim(wpd19, wpd19, 50), 1)
22 @test isapprox(cos_sim(wpd19, 50, 50), 1)
Marc Kupietz41b0f682022-07-21 15:32:45 +020023 end
24
25 @testset "DerekoVecs.jl: knn" begin
26 @test "dieser" in knn(wpd19, "der", 3)
27 @test "wurden" in knn(wpd19, "wurde", 3)
28 end
29
Marc Kupietz5a024f82022-07-28 12:33:57 +020030 @testset "DerekoVecs.jl: kld" begin
31 mykld = kld(wpd19, wpd19)
32 @test mykld.common_type_count == length(wpd19.vocabdict)
33 @test isapprox(mykld.common_type_share, 100)
34 @test isapprox(mykld.kld, 0)
35 @test wpd19.total_tokens == mykld.common_token_count
36 @test isapprox(mykld.common_token_share, 100)
37 end
38
39 @testset "DerekoVecs.jl: load freq list only" begin
40 wpd19_freqlist = load(joinpath(artifact"wpd19_10000", "wpd19_10000", "wpd19_10000.vocab"))
41 @test wpd19.total_tokens == wpd19_freqlist.total_tokens
42 @test isapprox(kld(wpd19_freqlist, wpd19).kld, 0)
43 end
Marc Kupietze37554d2022-07-29 18:15:03 +020044
45 @testset "DerekoVecs.jl: collocation analysis" begin
46 if (!isnothing(wpd19.cdb))
Marc Kupietz63acc2e2022-07-30 17:21:17 +020047 df = get_collocates(wpd19, "werden")
48 @test df.collocate[1] == "kann"
49 @test df.ldaf[1] > 10
50 @test df.ldaf[1] > df.ldaf[3]
Marc Kupietze37554d2022-07-29 18:15:03 +020051 end
52 end
53
Marc Kupietz7101f9e2022-07-21 08:58:19 +020054end