blob: aa4a771319dc7695905f76bca2621aec87768aee [file] [log] [blame]
Marc Kupietzdbd431a2021-08-29 12:17:45 +02001test_that("collocationScoreQuery works", {
Marc Kupietz83d0af32022-02-24 12:49:28 +01002 skip_if_offline()
Marc Kupietzfabc2392025-02-07 17:03:06 +01003 kco <- new("KorAPConnection", accessToken = NULL, cache = TRUE, verbose = TRUE)
Marc Kupietzdbd431a2021-08-29 12:17:45 +02004 df <- collocationScoreQuery(kco, "Ameisenplage", "heimgesucht", leftContextSize=0, rightContextSize=1)
5 expect_gt(df$logDice, 1)
6 expect_equal(df$ll, ll(df$O1, df$O2, df$O, df$N, df$E, df$w))
7 expect_equal(df$pmi, pmi(df$O1, df$O2, df$O, df$N, df$E, df$w))
8 expect_equal(df$mi2, mi2(df$O1, df$O2, df$O, df$N, df$E, df$w))
9 expect_equal(df$mi3, mi3(df$O1, df$O2, df$O, df$N, df$E, df$w))
10 expect_equal(df$logDice, logDice(df$O1, df$O2, df$O, df$N, df$E, df$w))
11})
12
Marc Kupietz581a29b2021-09-04 20:51:04 +020013
14test_that("collocationAnalysis works and warns about missing token", {
Marc Kupietz83d0af32022-02-24 12:49:28 +010015 skip_if_offline()
16 kco <- new("KorAPConnection",
Marc Kupietz581a29b2021-09-04 20:51:04 +020017 accessToken = NULL,
18 verbose = TRUE)
19 expect_warning(
20 df <-
21 collocationAnalysis(
22 kco,
23 "focus([tt/p=ADJA] {Newstickeritis})",
Marc Kupietz581a29b2021-09-04 20:51:04 +020024 leftContextSize = 1,
25 rightContextSize = 0,
Marc Kupietz581a29b2021-09-04 20:51:04 +020026 ),
27 "access token"
Marc Kupietza3fedbe2021-09-04 20:24:03 +020028 )
Marc Kupietzdbd431a2021-08-29 12:17:45 +020029 expect_gt(df$O, df$E)
Marc Kupietzf9129592025-01-26 19:17:54 +010030 expect_gt(df$logDice, -1)
Marc Kupietzdbd431a2021-08-29 12:17:45 +020031})
32
33test_that("collocationAnalysis on unaccounted strings does not error out", {
Marc Kupietz83d0af32022-02-24 12:49:28 +010034 skip_if_offline()
Marc Kupietz581a29b2021-09-04 20:51:04 +020035 kco <- new("KorAPConnection", accessToken = NULL, verbose = TRUE)
36 expect_warning(
Marc Kupietzbdb95272021-12-22 17:42:21 +010037 df <- collocationAnalysis(kco, "XXXXXXXXAmeisenplage", vc=c("corpusSigle=/WDD17/", "corpusSigle=/WUD17/"), maxRecurse = 2),
Marc Kupietz581a29b2021-09-04 20:51:04 +020038 "access token"
39 )
Marc Kupietzdbd431a2021-08-29 12:17:45 +020040 testthat::expect_equal(nrow(df), 0)
41})
Marc Kupietzd6314b62021-12-22 12:49:09 +010042
Marc Kupietz9707c752025-01-08 20:51:35 +010043test_that("removeWithinSpanWorks", {
Marc Kupietzd6314b62021-12-22 12:49:09 +010044 expect_equal(
45 removeWithinSpan("contains(<base/s=s>, (machen []{0,1} aufmerksam | aufmerksam []{0,1} machen))", "base/s=s"),
46 "(machen []{0,1} aufmerksam | aufmerksam []{0,1} machen)")
47})
Marc Kupietzdbdbb1f2025-02-19 10:33:06 +010048
49
50test_that("mergeDuplicateCollocatesWorksAsExpected", {
51 ldf <- tibble(
52 node = c("focus(in [tt/p=NN] {[tt/l=nehmen]})"),
53 collocate = c("Anspruch"),
54 label = c(""),
55 vc = c(""),
56 query = c("Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]})"),
57 webUIRequestUrl = c(
58 "https://korap.ids-mannheim.de/?q=Anspruch%20focus%28in%20%5btt%2fp%3dNN%5d%20%7b%5btt%2fl%3dnehmen%5d%7d%29&ql=poliqarp"
59 ),
60 w = c(1),
61 leftContextSize = c(1),
62 rightContextSize = c(0),
63 N = c(23578528381.5),
64 O = c(0.5),
65 O1 = c(1168410.5),
66 O2 = c(1296870.5),
67 E = c(64.2651265093014),
68 pmi = c(11.9173498777957),
69 mi2 = c(29.8406639214616),
70 mi3 = c(47.7639779651274),
71 logDice = c(11.6899933757298),
72 ll = c(3717716.74208791)
73 )
74 rdf <- tibble(
75 node = c("focus({[tt/l=nehmen] in} [tt/p=NN])"),
76 collocate = c("Anspruch"),
77 label = c(""),
78 vc = c(""),
79 query = c("focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch"),
80 webUIRequestUrl = c(
81 "https://korap.ids-mannheim.de/?q=focus%28%7b%5btt%2fl%3dnehmen%5d%20in%7d%20%5btt%2fp%3dNN%5d%29%20Anspruch&ql=poliqarp"
82 ),
83 w = c(1),
84 leftContextSize = c(0),
85 rightContextSize = c(1),
86 N = c(23578528381.5),
87 O = c(0.5),
88 O1 = c(17077.5),
89 O2 = c(1296870.5),
90 E = c(0.939299756346416),
91 pmi = c(7.99469408391783),
92 mi2 = c(15.8990457079122),
93 mi3 = c(23.8033973319065),
94 logDice = c(2.57887487309409),
95 ll = c(2181.35986032019)
96 )
97 merged <- mergeDuplicateCollocates(ldf, rdf, smoothingConstant = 0.5)
98 expect_equal(merged$O, 0.5)
99 expect_equal(merged$O1, 1185487.5)
100 expect_equal(merged$O2, 1296870.5)
101 expect_equal(merged$query, "Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]}) | focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch")
102})