blob: a93ca57b61688eb096a24e5fec561e6bc79aa9f5 [file] [log] [blame]
Marc Kupietzdbd431a2021-08-29 12:17:45 +02001test_that("collocationScoreQuery works", {
Marc Kupietz83d0af32022-02-24 12:49:28 +01002 skip_if_offline()
Marc Kupietz617266d2025-02-27 10:43:07 +01003 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = TRUE)
Marc Kupietz7de5f322025-06-04 17:17:22 +02004 df <- collocationScoreQuery(kco, "Ameisenplage", "heimgesucht", leftContextSize = 0, rightContextSize = 1)
Marc Kupietzdbd431a2021-08-29 12:17:45 +02005 expect_gt(df$logDice, 1)
6 expect_equal(df$ll, ll(df$O1, df$O2, df$O, df$N, df$E, df$w))
7 expect_equal(df$pmi, pmi(df$O1, df$O2, df$O, df$N, df$E, df$w))
8 expect_equal(df$mi2, mi2(df$O1, df$O2, df$O, df$N, df$E, df$w))
9 expect_equal(df$mi3, mi3(df$O1, df$O2, df$O, df$N, df$E, df$w))
10 expect_equal(df$logDice, logDice(df$O1, df$O2, df$O, df$N, df$E, df$w))
11})
12
Marc Kupietz581a29b2021-09-04 20:51:04 +020013
14test_that("collocationAnalysis works and warns about missing token", {
Marc Kupietz83d0af32022-02-24 12:49:28 +010015 skip_if_offline()
Marc Kupietz617266d2025-02-27 10:43:07 +010016 kco <- KorAPConnection(
Marc Kupietz7de5f322025-06-04 17:17:22 +020017 accessToken = NULL,
18 verbose = TRUE
19 )
20 expect_warning(
21 df <-
22 collocationAnalysis(
23 kco,
24 "focus([tt/p=ADJA] {Newstickeritis})",
25 leftContextSize = 1,
26 rightContextSize = 0,
27 ),
28 "access token"
29 )
Marc Kupietzdbd431a2021-08-29 12:17:45 +020030 expect_gt(df$O, df$E)
Marc Kupietzf9129592025-01-26 19:17:54 +010031 expect_gt(df$logDice, -1)
Marc Kupietzdbd431a2021-08-29 12:17:45 +020032})
33
34test_that("collocationAnalysis on unaccounted strings does not error out", {
Marc Kupietz83d0af32022-02-24 12:49:28 +010035 skip_if_offline()
Marc Kupietz617266d2025-02-27 10:43:07 +010036 kco <- KorAPConnection(accessToken = NULL, verbose = TRUE)
Marc Kupietz581a29b2021-09-04 20:51:04 +020037 expect_warning(
Marc Kupietz7de5f322025-06-04 17:17:22 +020038 df <- collocationAnalysis(kco, "XXXXXXXXAmeisenplage", vc = c("corpusSigle=/WDD17/", "corpusSigle=/WUD17/"), maxRecurse = 2),
Marc Kupietz581a29b2021-09-04 20:51:04 +020039 "access token"
40 )
Marc Kupietzdbd431a2021-08-29 12:17:45 +020041 testthat::expect_equal(nrow(df), 0)
42})
Marc Kupietzd6314b62021-12-22 12:49:09 +010043
Marc Kupietz7de5f322025-06-04 17:17:22 +020044# test_that("removeWithinSpanWorks", {
Marc Kupietz76dee312025-04-06 16:24:47 +020045# expect_equal(
46# removeWithinSpan("contains(<base/s=s>, (machen []{0,1} aufmerksam | aufmerksam []{0,1} machen))", "base/s=s"),
47# "(machen []{0,1} aufmerksam | aufmerksam []{0,1} machen)")
Marc Kupietz7de5f322025-06-04 17:17:22 +020048# })
Marc Kupietzdbdbb1f2025-02-19 10:33:06 +010049
50
51test_that("mergeDuplicateCollocatesWorksAsExpected", {
Marc Kupietz5057f502025-04-06 16:55:57 +020052 ldf <- tibble::tibble(
Marc Kupietzdbdbb1f2025-02-19 10:33:06 +010053 node = c("focus(in [tt/p=NN] {[tt/l=nehmen]})"),
54 collocate = c("Anspruch"),
55 label = c(""),
56 vc = c(""),
57 query = c("Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]})"),
58 webUIRequestUrl = c(
59 "https://korap.ids-mannheim.de/?q=Anspruch%20focus%28in%20%5btt%2fp%3dNN%5d%20%7b%5btt%2fl%3dnehmen%5d%7d%29&ql=poliqarp"
60 ),
61 w = c(1),
62 leftContextSize = c(1),
63 rightContextSize = c(0),
64 N = c(23578528381.5),
65 O = c(0.5),
66 O1 = c(1168410.5),
67 O2 = c(1296870.5),
68 E = c(64.2651265093014),
69 pmi = c(11.9173498777957),
70 mi2 = c(29.8406639214616),
71 mi3 = c(47.7639779651274),
72 logDice = c(11.6899933757298),
73 ll = c(3717716.74208791)
74 )
Marc Kupietz5057f502025-04-06 16:55:57 +020075 rdf <- tibble::tibble(
Marc Kupietzdbdbb1f2025-02-19 10:33:06 +010076 node = c("focus({[tt/l=nehmen] in} [tt/p=NN])"),
77 collocate = c("Anspruch"),
78 label = c(""),
79 vc = c(""),
80 query = c("focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch"),
81 webUIRequestUrl = c(
82 "https://korap.ids-mannheim.de/?q=focus%28%7b%5btt%2fl%3dnehmen%5d%20in%7d%20%5btt%2fp%3dNN%5d%29%20Anspruch&ql=poliqarp"
83 ),
84 w = c(1),
85 leftContextSize = c(0),
86 rightContextSize = c(1),
87 N = c(23578528381.5),
88 O = c(0.5),
89 O1 = c(17077.5),
90 O2 = c(1296870.5),
91 E = c(0.939299756346416),
92 pmi = c(7.99469408391783),
93 mi2 = c(15.8990457079122),
94 mi3 = c(23.8033973319065),
95 logDice = c(2.57887487309409),
96 ll = c(2181.35986032019)
97 )
98 merged <- mergeDuplicateCollocates(ldf, rdf, smoothingConstant = 0.5)
99 expect_equal(merged$O, 0.5)
100 expect_equal(merged$O1, 1185487.5)
101 expect_equal(merged$O2, 1296870.5)
102 expect_equal(merged$query, "Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]}) | focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch")
103})
Marc Kupietz7de5f322025-06-04 17:17:22 +0200104
105# New tests for improved coverage of collocationAnalysis.R helper functions
106
107test_that("synsemanticStopwords returns German stopwords", {
108 stopwords <- synsemanticStopwords()
109 expect_true(is.character(stopwords))
110 expect_true(length(stopwords) > 50)
111 expect_true("der" %in% stopwords)
112 expect_true("die" %in% stopwords)
113 expect_true("und" %in% stopwords)
114 expect_true("mit" %in% stopwords)
115})
116
117test_that("removeWithinSpan removes span constraints correctly", {
118 # Test basic span removal
119 query1 <- "contains(<base/s=s>, (machen []{0,1} aufmerksam | aufmerksam []{0,1} machen))"
120 result1 <- RKorAPClient:::removeWithinSpan(query1, "base/s=s")
121 expect_equal(result1, "(machen []{0,1} aufmerksam | aufmerksam []{0,1} machen)")
122
123 # Test with different span
124 query2 <- "contains(<p/s=s>, (test query))"
125 result2 <- RKorAPClient:::removeWithinSpan(query2, "p/s=s")
126 expect_equal(result2, "(test query)")
127
128 # Test with empty span - should return original query
129 query3 <- "simple query"
130 result3 <- RKorAPClient:::removeWithinSpan(query3, "")
131 expect_equal(result3, query3)
132
133 # Test with non-matching span
134 query4 <- "contains(<base/s=s>, test)"
135 result4 <- RKorAPClient:::removeWithinSpan(query4, "other/span")
136 expect_equal(result4, query4)
137})
138
139test_that("matches2FreqTable handles empty matches", {
140 empty_matches <- data.frame()
141 result <- RKorAPClient:::matches2FreqTable(empty_matches, index = 0)
142
143 expect_true(is.data.frame(result))
144 expect_equal(nrow(result), 0)
145})
146
147test_that("matches2FreqTable processes single match correctly", {
148 # Create mock matches data
149 mock_matches <- data.frame(
150 tokens = I(list(list(
151 left = c("der", "große"),
152 match = "Test",
153 right = c("ist", "wichtig")
154 ))),
155 stringsAsFactors = FALSE
156 )
157
158 result <- RKorAPClient:::matches2FreqTable(
159 mock_matches,
160 index = 1,
161 leftContextSize = 2,
162 rightContextSize = 2,
163 stopwords = c("der", "ist") # Provide stopwords to avoid empty join
164 )
165
166 expect_true(is.data.frame(result))
167})
168
169test_that("snippet2FreqTable handles empty snippet", {
170 result <- RKorAPClient:::snippet2FreqTable(character(0))
171
172 expect_true(is.data.frame(result))
173 expect_equal(nrow(result), 0)
174})
175
176test_that("snippet2FreqTable processes single snippet correctly", {
177 snippet <- '<span class="context-left">der große </span><span class="match"><mark>Test</mark></span><span class="context-right"> ist wichtig</span>'
178
179 result <- RKorAPClient:::snippet2FreqTable(
180 snippet,
181 leftContextSize = 2,
182 rightContextSize = 2,
183 stopwords = c("der"), # Provide stopwords to avoid empty join
184 verbose = FALSE
185 )
186
187 expect_true(is.data.frame(result))
188})
189
190# Removed hanging findExample tests as they cause infinite wait
191# These tests make API calls that don't complete properly
192
193# Removed hanging collocatesQuery tests as they cause infinite wait
194# These tests were causing the test suite to hang and not terminate
195
196test_that("collocationAnalysis handles exactFrequencies parameter", {
197 skip_if_offline()
198 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
199
200 expect_warning(
201 result <- collocationAnalysis(
202 kco,
203 "Test",
204 exactFrequencies = TRUE,
205 searchHitsSampleLimit = 5,
206 topCollocatesLimit = 5
207 ),
208 "access token"
209 )
210 expect_true(is.data.frame(result))
211})
212
213test_that("collocationAnalysis handles withinSpan parameter", {
214 skip_if_offline()
215 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
216
217 expect_warning(
218 result <- collocationAnalysis(
219 kco,
220 "Test",
221 withinSpan = "base/s=s",
222 exactFrequencies = TRUE,
223 searchHitsSampleLimit = 5,
224 topCollocatesLimit = 5
225 ),
226 "access token"
227 )
228 expect_true(is.data.frame(result))
229})
230
231test_that("collocationAnalysis handles expand parameter", {
232 skip_if_offline()
233 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
234
235 expect_warning(
236 result <- collocationAnalysis(
237 kco,
238 c("Test", "der"),
239 expand = TRUE,
240 searchHitsSampleLimit = 2,
241 topCollocatesLimit = 2
242 ),
243 "access token"
244 )
245 expect_true(is.data.frame(result))
246})
247
248test_that("collocationAnalysis handles stopwords parameter", {
249 skip_if_offline()
250 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
251
252 expect_warning(
253 result <- collocationAnalysis(
254 kco,
255 "Test",
256 stopwords = c("der", "die", "und"),
257 searchHitsSampleLimit = 5,
258 topCollocatesLimit = 5
259 ),
260 "access token"
261 )
262 expect_true(is.data.frame(result))
263})
264
265test_that("collocationAnalysis handles lemmatizeNodeQuery parameter", {
266 skip_if_offline()
267 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
268
269 expect_warning(
270 result <- collocationAnalysis(
271 kco,
272 "laufen",
273 lemmatizeNodeQuery = TRUE,
274 searchHitsSampleLimit = 5,
275 topCollocatesLimit = 5
276 ),
277 "access token"
278 )
279 expect_true(is.data.frame(result))
280})
281
282test_that("collocationAnalysis handles addExamples parameter", {
283 skip_if_offline()
284 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
285
286 expect_warning(
287 result <- collocationAnalysis(
288 kco,
289 "Test",
290 addExamples = TRUE,
291 searchHitsSampleLimit = 3,
292 topCollocatesLimit = 3
293 ),
294 "access token"
295 )
296 expect_true(is.data.frame(result))
297 if (nrow(result) > 0) {
298 expect_true("example" %in% colnames(result))
299 }
300})
301
302test_that("collocationAnalysis handles maxRecurse parameter", {
303 skip_if_offline()
304 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
305
306 expect_warning(
307 result <- collocationAnalysis(
308 kco,
309 "Test",
310 maxRecurse = 1,
311 searchHitsSampleLimit = 2,
312 topCollocatesLimit = 2
313 ),
314 "access token"
315 )
316 expect_true(is.data.frame(result))
317})