blob: 14508f68263067e46c8087e66e43fa567de8d097 [file] [log] [blame]
Marc Kupietzdbd431a2021-08-29 12:17:45 +02001test_that("collocationScoreQuery works", {
Marc Kupietz83d0af32022-02-24 12:49:28 +01002 skip_if_offline()
Marc Kupietz617266d2025-02-27 10:43:07 +01003 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = TRUE)
Marc Kupietz7de5f322025-06-04 17:17:22 +02004 df <- collocationScoreQuery(kco, "Ameisenplage", "heimgesucht", leftContextSize = 0, rightContextSize = 1)
Marc Kupietzdbd431a2021-08-29 12:17:45 +02005 expect_gt(df$logDice, 1)
6 expect_equal(df$ll, ll(df$O1, df$O2, df$O, df$N, df$E, df$w))
7 expect_equal(df$pmi, pmi(df$O1, df$O2, df$O, df$N, df$E, df$w))
8 expect_equal(df$mi2, mi2(df$O1, df$O2, df$O, df$N, df$E, df$w))
9 expect_equal(df$mi3, mi3(df$O1, df$O2, df$O, df$N, df$E, df$w))
10 expect_equal(df$logDice, logDice(df$O1, df$O2, df$O, df$N, df$E, df$w))
11})
12
Marc Kupietz581a29b2021-09-04 20:51:04 +020013
14test_that("collocationAnalysis works and warns about missing token", {
Marc Kupietz83d0af32022-02-24 12:49:28 +010015 skip_if_offline()
Marc Kupietz617266d2025-02-27 10:43:07 +010016 kco <- KorAPConnection(
Marc Kupietz7de5f322025-06-04 17:17:22 +020017 accessToken = NULL,
18 verbose = TRUE
19 )
20 expect_warning(
21 df <-
22 collocationAnalysis(
23 kco,
24 "focus([tt/p=ADJA] {Newstickeritis})",
25 leftContextSize = 1,
26 rightContextSize = 0,
27 ),
28 "access token"
29 )
Marc Kupietzdbd431a2021-08-29 12:17:45 +020030 expect_gt(df$O, df$E)
Marc Kupietzf9129592025-01-26 19:17:54 +010031 expect_gt(df$logDice, -1)
Marc Kupietzdbd431a2021-08-29 12:17:45 +020032})
33
34test_that("collocationAnalysis on unaccounted strings does not error out", {
Marc Kupietz83d0af32022-02-24 12:49:28 +010035 skip_if_offline()
Marc Kupietz617266d2025-02-27 10:43:07 +010036 kco <- KorAPConnection(accessToken = NULL, verbose = TRUE)
Marc Kupietz581a29b2021-09-04 20:51:04 +020037 expect_warning(
Marc Kupietz7de5f322025-06-04 17:17:22 +020038 df <- collocationAnalysis(kco, "XXXXXXXXAmeisenplage", vc = c("corpusSigle=/WDD17/", "corpusSigle=/WUD17/"), maxRecurse = 2),
Marc Kupietz581a29b2021-09-04 20:51:04 +020039 "access token"
40 )
Marc Kupietzdbd431a2021-08-29 12:17:45 +020041 testthat::expect_equal(nrow(df), 0)
42})
Marc Kupietzd6314b62021-12-22 12:49:09 +010043
Marc Kupietz7de5f322025-06-04 17:17:22 +020044# test_that("removeWithinSpanWorks", {
Marc Kupietz76dee312025-04-06 16:24:47 +020045# expect_equal(
46# removeWithinSpan("contains(<base/s=s>, (machen []{0,1} aufmerksam | aufmerksam []{0,1} machen))", "base/s=s"),
47# "(machen []{0,1} aufmerksam | aufmerksam []{0,1} machen)")
Marc Kupietz7de5f322025-06-04 17:17:22 +020048# })
Marc Kupietzdbdbb1f2025-02-19 10:33:06 +010049
50
51test_that("mergeDuplicateCollocatesWorksAsExpected", {
Marc Kupietz5057f502025-04-06 16:55:57 +020052 ldf <- tibble::tibble(
Marc Kupietzdbdbb1f2025-02-19 10:33:06 +010053 node = c("focus(in [tt/p=NN] {[tt/l=nehmen]})"),
54 collocate = c("Anspruch"),
55 label = c(""),
56 vc = c(""),
57 query = c("Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]})"),
58 webUIRequestUrl = c(
59 "https://korap.ids-mannheim.de/?q=Anspruch%20focus%28in%20%5btt%2fp%3dNN%5d%20%7b%5btt%2fl%3dnehmen%5d%7d%29&ql=poliqarp"
60 ),
61 w = c(1),
62 leftContextSize = c(1),
63 rightContextSize = c(0),
64 N = c(23578528381.5),
65 O = c(0.5),
66 O1 = c(1168410.5),
67 O2 = c(1296870.5),
68 E = c(64.2651265093014),
69 pmi = c(11.9173498777957),
70 mi2 = c(29.8406639214616),
71 mi3 = c(47.7639779651274),
72 logDice = c(11.6899933757298),
73 ll = c(3717716.74208791)
74 )
Marc Kupietz5057f502025-04-06 16:55:57 +020075 rdf <- tibble::tibble(
Marc Kupietzdbdbb1f2025-02-19 10:33:06 +010076 node = c("focus({[tt/l=nehmen] in} [tt/p=NN])"),
77 collocate = c("Anspruch"),
78 label = c(""),
79 vc = c(""),
80 query = c("focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch"),
81 webUIRequestUrl = c(
82 "https://korap.ids-mannheim.de/?q=focus%28%7b%5btt%2fl%3dnehmen%5d%20in%7d%20%5btt%2fp%3dNN%5d%29%20Anspruch&ql=poliqarp"
83 ),
84 w = c(1),
85 leftContextSize = c(0),
86 rightContextSize = c(1),
87 N = c(23578528381.5),
88 O = c(0.5),
89 O1 = c(17077.5),
90 O2 = c(1296870.5),
91 E = c(0.939299756346416),
92 pmi = c(7.99469408391783),
93 mi2 = c(15.8990457079122),
94 mi3 = c(23.8033973319065),
95 logDice = c(2.57887487309409),
96 ll = c(2181.35986032019)
97 )
98 merged <- mergeDuplicateCollocates(ldf, rdf, smoothingConstant = 0.5)
99 expect_equal(merged$O, 0.5)
100 expect_equal(merged$O1, 1185487.5)
101 expect_equal(merged$O2, 1296870.5)
102 expect_equal(merged$query, "Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]}) | focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch")
103})
Marc Kupietz7de5f322025-06-04 17:17:22 +0200104
Marc Kupietz5e35d7a2025-10-17 21:21:22 +0200105test_that("add_multi_vc_comparisons adds favorite columns", {
106 sample_result <- tibble::tibble(
107 node = c("n", "n"),
108 collocate = c("c", "c"),
109 vc = c("vc1", "vc2"),
110 label = c("A", "B"),
111 N = c(100, 100),
112 O = c(10, 20),
113 O1 = c(50, 50),
114 O2 = c(30, 30),
115 E = c(5, 5),
116 w = c(2, 2),
117 leftContextSize = c(1, 1),
118 rightContextSize = c(1, 1),
119 frequency = c(10, 20),
120 logDice = c(5, 7),
121 pmi = c(2, 3)
122 )
123
Marc Kupietz77852b22025-10-19 11:35:34 +0200124 enriched <- RKorAPClient:::add_multi_vc_comparisons(sample_result)
Marc Kupietz5e35d7a2025-10-17 21:21:22 +0200125
126 expect_true(all(c(
127 "winner_logDice",
128 "winner_logDice_value",
129 "runner_up_logDice",
Marc Kupietz28a29842025-10-18 12:25:09 +0200130 "runner_up_logDice_value",
131 "max_delta_logDice",
132 "winner_rank_logDice",
133 "winner_rank_logDice_value",
134 "runner_up_rank_logDice",
135 "runner_up_rank_logDice_value",
136 "loser_rank_logDice",
137 "loser_rank_logDice_value",
138 "max_delta_rank_logDice",
Marc Kupietz130a2a22025-10-18 16:09:23 +0200139 "winner_percentile_rank_logDice",
140 "winner_percentile_rank_logDice_value",
141 "runner_up_percentile_rank_logDice",
142 "runner_up_percentile_rank_logDice_value",
143 "loser_percentile_rank_logDice",
144 "loser_percentile_rank_logDice_value",
145 "max_delta_percentile_rank_logDice",
Marc Kupietz28a29842025-10-18 12:25:09 +0200146 "winner_rank_pmi",
147 "winner_rank_pmi_value",
148 "runner_up_rank_pmi",
149 "runner_up_rank_pmi_value",
150 "loser_rank_pmi",
151 "loser_rank_pmi_value",
152 "max_delta_rank_pmi",
Marc Kupietz130a2a22025-10-18 16:09:23 +0200153 "winner_percentile_rank_pmi",
154 "winner_percentile_rank_pmi_value",
155 "runner_up_percentile_rank_pmi",
156 "runner_up_percentile_rank_pmi_value",
157 "loser_percentile_rank_pmi",
158 "loser_percentile_rank_pmi_value",
159 "max_delta_percentile_rank_pmi",
Marc Kupietz28a29842025-10-18 12:25:09 +0200160 "rank_A_logDice",
161 "rank_B_logDice",
162 "rank_A_pmi",
163 "rank_B_pmi",
Marc Kupietz130a2a22025-10-18 16:09:23 +0200164 "percentile_rank_A_logDice",
165 "percentile_rank_B_logDice",
166 "percentile_rank_A_pmi",
167 "percentile_rank_B_pmi",
Marc Kupietz28a29842025-10-18 12:25:09 +0200168 "delta_rank_logDice",
Marc Kupietz130a2a22025-10-18 16:09:23 +0200169 "delta_rank_pmi",
170 "delta_percentile_rank_logDice",
171 "delta_percentile_rank_pmi"
Marc Kupietz5e35d7a2025-10-17 21:21:22 +0200172 ) %in% colnames(enriched)))
173
174 expect_true(all(enriched$winner_logDice == "B"))
175 expect_true(all(enriched$runner_up_logDice == "A"))
176 expect_true(all(enriched$winner_logDice_value >= enriched$runner_up_logDice_value))
Marc Kupietz130a2a22025-10-18 16:09:23 +0200177 expect_true(all(enriched$percentile_rank_A_logDice == 1))
178 expect_true(all(enriched$percentile_rank_B_logDice == 1))
179 expect_true(all(enriched$delta_percentile_rank_logDice == 0))
Marc Kupietz5e35d7a2025-10-17 21:21:22 +0200180})
181
Marc Kupietzb2862d42025-10-18 10:17:49 +0200182test_that("add_multi_vc_comparisons handles more than two labels", {
183 sample_result <- tibble::tibble(
184 node = rep("n", 3),
185 collocate = rep("c", 3),
186 vc = c("vc1", "vc2", "vc3"),
187 label = c("A", "B", "C"),
188 N = rep(100, 3),
189 O = c(10, 30, 5),
190 O1 = rep(50, 3),
191 O2 = rep(30, 3),
192 E = rep(5, 3),
193 w = rep(2, 3),
194 leftContextSize = rep(1, 3),
195 rightContextSize = rep(1, 3),
196 frequency = c(10, 30, 5),
197 logDice = c(5, 8, 4),
198 pmi = c(2, 3, 1)
199 )
200
Marc Kupietz77852b22025-10-19 11:35:34 +0200201 enriched <- RKorAPClient:::add_multi_vc_comparisons(sample_result)
Marc Kupietzb2862d42025-10-18 10:17:49 +0200202 expect_equal(enriched$winner_logDice[1], "B")
203 expect_equal(enriched$winner_logDice_value[1], 8)
204 expect_equal(enriched$runner_up_logDice[1], "A")
205 expect_equal(enriched$runner_up_logDice_value[1], 5)
206 expect_equal(enriched$loser_logDice[1], "C")
207 expect_equal(enriched$loser_logDice_value[1], 4)
208 expect_equal(enriched$max_delta_logDice[1], 4)
209})
210
Marc Kupietz28a29842025-10-18 12:25:09 +0200211test_that("add_multi_vc_comparisons computes rank deltas", {
212 base_tbl <- tidyr::expand_grid(
213 label = c("A", "B", "C"),
214 collocate = c("c1", "c2", "c3")
215 ) |>
216 dplyr::mutate(
217 node = "n",
218 vc = paste0("vc", label),
219 N = 100,
220 O = 10,
221 O1 = 50,
222 O2 = 40,
223 E = 5,
224 w = 2,
225 leftContextSize = 1,
226 rightContextSize = 1,
227 frequency = 10,
228 logDice = dplyr::case_when(
229 label == "A" & collocate == "c1" ~ 9,
230 label == "A" & collocate == "c2" ~ 6,
231 label == "A" & collocate == "c3" ~ 3,
232 label == "B" & collocate == "c1" ~ 7,
233 label == "B" & collocate == "c2" ~ 9,
234 label == "B" & collocate == "c3" ~ 5,
235 label == "C" & collocate == "c1" ~ 4,
236 label == "C" & collocate == "c2" ~ 6,
237 label == "C" & collocate == "c3" ~ 8,
238 TRUE ~ 0
239 )
240 )
241
Marc Kupietz77852b22025-10-19 11:35:34 +0200242 enriched <- RKorAPClient:::add_multi_vc_comparisons(base_tbl)
Marc Kupietz28a29842025-10-18 12:25:09 +0200243 target_row <- enriched |>
244 dplyr::filter(collocate == "c1") |>
245 dplyr::slice_head(n = 1)
246
247 expect_equal(target_row$rank_A_logDice, 1)
248 expect_equal(target_row$rank_B_logDice, 2)
249 expect_equal(target_row$rank_C_logDice, 3)
250 expect_equal(target_row$winner_rank_logDice, "A")
251 expect_equal(target_row$winner_rank_logDice_value, 1)
252 expect_equal(target_row$runner_up_rank_logDice, "B")
253 expect_equal(target_row$runner_up_rank_logDice_value, 2)
254 expect_equal(target_row$loser_rank_logDice, "C")
255 expect_equal(target_row$loser_rank_logDice_value, 3)
256 expect_equal(target_row$max_delta_rank_logDice, 2)
257})
258
259test_that("add_multi_vc_comparisons imputes missing ranks for max delta", {
260 sample_result <- tibble::tibble(
261 node = c("n", "n"),
262 collocate = c("c", "c"),
263 vc = c("vc1", "vc2"),
264 label = c("A", "B"),
265 N = c(100, 100),
266 O = c(10, 10),
267 O1 = c(50, 50),
268 O2 = c(30, 30),
269 E = c(5, 5),
270 w = c(2, 2),
271 leftContextSize = c(1, 1),
272 rightContextSize = c(1, 1),
273 frequency = c(10, 10),
274 logDice = c(5, NA)
275 )
276
Marc Kupietz77852b22025-10-19 11:35:34 +0200277 enriched <- RKorAPClient:::add_multi_vc_comparisons(sample_result)
Marc Kupietz28a29842025-10-18 12:25:09 +0200278
279 expect_equal(enriched$rank_A_logDice[1], 1)
280 expect_true(is.na(enriched$rank_B_logDice[1]))
281 expect_equal(enriched$winner_rank_logDice[1], "A")
282 expect_equal(enriched$loser_rank_logDice[1], "B")
283 expect_equal(enriched$loser_rank_logDice_value[1], 2)
284 expect_equal(enriched$max_delta_rank_logDice[1], 1)
285})
286
Marc Kupietz9894a372025-10-18 14:51:29 +0200287test_that("adaptive missing score imputation respects measure-specific scales", {
288 sample_result <- tibble::tibble(
289 node = c("n", "n", "n"),
290 collocate = c("c", "c", "c"),
291 vc = c("vc1", "vc2", "vc3"),
292 label = c("A", "B", "C"),
293 N = c(100, 100, 100),
294 O = c(12, 9, 7),
295 O1 = c(60, 40, 30),
296 O2 = c(33, 22, 18),
297 E = c(6, 6, 6),
298 w = c(2, 2, 2),
299 leftContextSize = c(1, 1, 1),
300 rightContextSize = c(1, 1, 1),
301 frequency = c(15, 11, 9),
302 logDice = c(-0.31, NA, -0.12),
303 pmi = c(-1.65, NA, -0.48),
304 ll = c(12.4, NA, 7.9)
305 )
306
307 enriched <- RKorAPClient:::add_multi_vc_comparisons(
308 sample_result,
Marc Kupietz9894a372025-10-18 14:51:29 +0200309 missingScoreQuantile = 0.05
310 )
311
312 row_a <- dplyr::filter(enriched, label == "A") |> dplyr::slice_head(n = 1)
313
314 expect_false(is.na(row_a$logDice_B))
315 expect_false(is.na(row_a$pmi_B))
316 expect_false(is.na(row_a$ll_B))
317
318 expect_lt(row_a$logDice_B, min(sample_result$logDice, na.rm = TRUE))
319 expect_lt(row_a$pmi_B, min(sample_result$pmi, na.rm = TRUE))
320 expect_lte(row_a$ll_B, min(sample_result$ll, na.rm = TRUE))
321
322 expect_gt(row_a$max_delta_logDice, 0)
323 expect_gt(row_a$winner_logDice_value - row_a$loser_logDice_value, 0)
324})
325
Marc Kupietz7de5f322025-06-04 17:17:22 +0200326# New tests for improved coverage of collocationAnalysis.R helper functions
327
328test_that("synsemanticStopwords returns German stopwords", {
329 stopwords <- synsemanticStopwords()
330 expect_true(is.character(stopwords))
331 expect_true(length(stopwords) > 50)
332 expect_true("der" %in% stopwords)
333 expect_true("die" %in% stopwords)
334 expect_true("und" %in% stopwords)
335 expect_true("mit" %in% stopwords)
336})
337
338test_that("removeWithinSpan removes span constraints correctly", {
339 # Test basic span removal
340 query1 <- "contains(<base/s=s>, (machen []{0,1} aufmerksam | aufmerksam []{0,1} machen))"
341 result1 <- RKorAPClient:::removeWithinSpan(query1, "base/s=s")
342 expect_equal(result1, "(machen []{0,1} aufmerksam | aufmerksam []{0,1} machen)")
343
344 # Test with different span
345 query2 <- "contains(<p/s=s>, (test query))"
346 result2 <- RKorAPClient:::removeWithinSpan(query2, "p/s=s")
347 expect_equal(result2, "(test query)")
348
349 # Test with empty span - should return original query
350 query3 <- "simple query"
351 result3 <- RKorAPClient:::removeWithinSpan(query3, "")
352 expect_equal(result3, query3)
353
354 # Test with non-matching span
355 query4 <- "contains(<base/s=s>, test)"
356 result4 <- RKorAPClient:::removeWithinSpan(query4, "other/span")
357 expect_equal(result4, query4)
358})
359
360test_that("matches2FreqTable handles empty matches", {
361 empty_matches <- data.frame()
362 result <- RKorAPClient:::matches2FreqTable(empty_matches, index = 0)
363
364 expect_true(is.data.frame(result))
365 expect_equal(nrow(result), 0)
366})
367
368test_that("matches2FreqTable processes single match correctly", {
369 # Create mock matches data
370 mock_matches <- data.frame(
371 tokens = I(list(list(
372 left = c("der", "große"),
373 match = "Test",
374 right = c("ist", "wichtig")
375 ))),
376 stringsAsFactors = FALSE
377 )
378
379 result <- RKorAPClient:::matches2FreqTable(
380 mock_matches,
381 index = 1,
382 leftContextSize = 2,
383 rightContextSize = 2,
384 stopwords = c("der", "ist") # Provide stopwords to avoid empty join
385 )
386
387 expect_true(is.data.frame(result))
388})
389
390test_that("snippet2FreqTable handles empty snippet", {
391 result <- RKorAPClient:::snippet2FreqTable(character(0))
392
393 expect_true(is.data.frame(result))
394 expect_equal(nrow(result), 0)
395})
396
397test_that("snippet2FreqTable processes single snippet correctly", {
398 snippet <- '<span class="context-left">der große </span><span class="match"><mark>Test</mark></span><span class="context-right"> ist wichtig</span>'
399
400 result <- RKorAPClient:::snippet2FreqTable(
401 snippet,
402 leftContextSize = 2,
403 rightContextSize = 2,
404 stopwords = c("der"), # Provide stopwords to avoid empty join
405 verbose = FALSE
406 )
407
408 expect_true(is.data.frame(result))
409})
410
Marc Kupietz0a292632025-10-19 14:04:36 +0200411test_that("inject_focus_into_query adds focus wrappers when span queries lack them", {
412 unfocused <- "contains(<base/s=s>, (Anspruch [tt/l=nehmen] | [tt/l=nehmen] Anspruch))"
413 focused <- RKorAPClient:::inject_focus_into_query(unfocused)
414
415 expect_equal(
416 focused,
417 "contains(<base/s=s>, (focus({Anspruch [tt/l=nehmen]}) | focus({[tt/l=nehmen] Anspruch})))"
418 )
419})
420
421test_that("inject_focus_into_query leaves existing focus segments unchanged", {
422 already_focused <- "contains(<base/s=s>, (focus({Anspruch [tt/l=nehmen]})))"
423 expect_identical(
424 RKorAPClient:::inject_focus_into_query(already_focused),
425 already_focused
426 )
427})
428
Marc Kupietz7de5f322025-06-04 17:17:22 +0200429# Removed hanging findExample tests as they cause infinite wait
430# These tests make API calls that don't complete properly
431
432# Removed hanging collocatesQuery tests as they cause infinite wait
433# These tests were causing the test suite to hang and not terminate
434
435test_that("collocationAnalysis handles exactFrequencies parameter", {
436 skip_if_offline()
437 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
438
439 expect_warning(
440 result <- collocationAnalysis(
441 kco,
442 "Test",
443 exactFrequencies = TRUE,
444 searchHitsSampleLimit = 5,
445 topCollocatesLimit = 5
446 ),
447 "access token"
448 )
449 expect_true(is.data.frame(result))
450})
451
452test_that("collocationAnalysis handles withinSpan parameter", {
453 skip_if_offline()
454 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
455
456 expect_warning(
457 result <- collocationAnalysis(
458 kco,
459 "Test",
460 withinSpan = "base/s=s",
461 exactFrequencies = TRUE,
462 searchHitsSampleLimit = 5,
463 topCollocatesLimit = 5
464 ),
465 "access token"
466 )
467 expect_true(is.data.frame(result))
468})
469
470test_that("collocationAnalysis handles expand parameter", {
471 skip_if_offline()
472 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
473
474 expect_warning(
475 result <- collocationAnalysis(
476 kco,
477 c("Test", "der"),
478 expand = TRUE,
479 searchHitsSampleLimit = 2,
480 topCollocatesLimit = 2
481 ),
482 "access token"
483 )
484 expect_true(is.data.frame(result))
485})
486
Marc Kupietze34a8be2025-10-17 20:13:42 +0200487test_that("collocationAnalysis honors named vc labels", {
488 skip_if_offline()
489 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
490
491 named_vc <- c(
492 Western = "textType=/.*Western.*/ & pubDate in 2012",
493 Erotic = "textType=/.*(Erotik|Gay).*/ & pubDate in 2012",
494 Historic = "textType=/.*Historisch.*/ & pubDate in 2012"
495 )
496
497 expect_warning(
498 result <- collocationAnalysis(
499 kco,
500 "[tt/l=treffen]",
501 vc = named_vc,
502 searchHitsSampleLimit = 2,
503 topCollocatesLimit = 2
504 ),
505 "access token"
506 )
507
508 if (nrow(result) > 0) {
509 expect_true("label" %in% colnames(result))
510 expect_setequal(unique(result$label), names(named_vc))
511 }
512})
513
Marc Kupietz7de5f322025-06-04 17:17:22 +0200514test_that("collocationAnalysis handles stopwords parameter", {
515 skip_if_offline()
516 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
517
518 expect_warning(
519 result <- collocationAnalysis(
520 kco,
521 "Test",
522 stopwords = c("der", "die", "und"),
523 searchHitsSampleLimit = 5,
524 topCollocatesLimit = 5
525 ),
526 "access token"
527 )
528 expect_true(is.data.frame(result))
529})
530
531test_that("collocationAnalysis handles lemmatizeNodeQuery parameter", {
532 skip_if_offline()
533 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
534
535 expect_warning(
536 result <- collocationAnalysis(
537 kco,
538 "laufen",
539 lemmatizeNodeQuery = TRUE,
540 searchHitsSampleLimit = 5,
541 topCollocatesLimit = 5
542 ),
543 "access token"
544 )
545 expect_true(is.data.frame(result))
546})
547
548test_that("collocationAnalysis handles addExamples parameter", {
549 skip_if_offline()
550 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
551
552 expect_warning(
553 result <- collocationAnalysis(
554 kco,
555 "Test",
556 addExamples = TRUE,
557 searchHitsSampleLimit = 3,
558 topCollocatesLimit = 3
559 ),
560 "access token"
561 )
562 expect_true(is.data.frame(result))
563 if (nrow(result) > 0) {
564 expect_true("example" %in% colnames(result))
565 }
566})
567
568test_that("collocationAnalysis handles maxRecurse parameter", {
569 skip_if_offline()
570 kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
571
572 expect_warning(
573 result <- collocationAnalysis(
574 kco,
575 "Test",
576 maxRecurse = 1,
577 searchHitsSampleLimit = 2,
578 topCollocatesLimit = 2
579 ),
580 "access token"
581 )
582 expect_true(is.data.frame(result))
583})