Blame - tests/testthat/test-collocations.R - KorAP/RKorAPClient

blob: 14508f68263067e46c8087e66e43fa567de8d097 [file] [log] [blame]

Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1	test_that("collocationScoreQuery works", {
Marc Kupietz	83d0af3	2022-02-24 12:49:28 +0100	[diff] [blame]	2	skip_if_offline()
Marc Kupietz	617266d	2025-02-27 10:43:07 +0100	[diff] [blame]	3	kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = TRUE)
Marc Kupietz	7de5f32	2025-06-04 17:17:22 +0200	[diff] [blame]	4	df <- collocationScoreQuery(kco, "Ameisenplage", "heimgesucht", leftContextSize = 0, rightContextSize = 1)
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	5	expect_gt(df$logDice, 1)
				6	expect_equal(df$ll, ll(df$O1, df$O2, df$O, df$N, df$E, df$w))
				7	expect_equal(df$pmi, pmi(df$O1, df$O2, df$O, df$N, df$E, df$w))
				8	expect_equal(df$mi2, mi2(df$O1, df$O2, df$O, df$N, df$E, df$w))
				9	expect_equal(df$mi3, mi3(df$O1, df$O2, df$O, df$N, df$E, df$w))
				10	expect_equal(df$logDice, logDice(df$O1, df$O2, df$O, df$N, df$E, df$w))
				11	})
				12
Marc Kupietz	581a29b	2021-09-04 20:51:04 +0200	[diff] [blame]	13
				14	test_that("collocationAnalysis works and warns about missing token", {
Marc Kupietz	83d0af3	2022-02-24 12:49:28 +0100	[diff] [blame]	15	skip_if_offline()
Marc Kupietz	617266d	2025-02-27 10:43:07 +0100	[diff] [blame]	16	kco <- KorAPConnection(
Marc Kupietz	7de5f32	2025-06-04 17:17:22 +0200	[diff] [blame]	17	accessToken = NULL,
				18	verbose = TRUE
				19	)
				20	expect_warning(
				21	df <-
				22	collocationAnalysis(
				23	kco,
				24	"focus([tt/p=ADJA] {Newstickeritis})",
				25	leftContextSize = 1,
				26	rightContextSize = 0,
				27	),
				28	"access token"
				29	)
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	30	expect_gt(df$O, df$E)
Marc Kupietz	f912959	2025-01-26 19:17:54 +0100	[diff] [blame]	31	expect_gt(df$logDice, -1)
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	32	})
				33
				34	test_that("collocationAnalysis on unaccounted strings does not error out", {
Marc Kupietz	83d0af3	2022-02-24 12:49:28 +0100	[diff] [blame]	35	skip_if_offline()
Marc Kupietz	617266d	2025-02-27 10:43:07 +0100	[diff] [blame]	36	kco <- KorAPConnection(accessToken = NULL, verbose = TRUE)
Marc Kupietz	581a29b	2021-09-04 20:51:04 +0200	[diff] [blame]	37	expect_warning(
Marc Kupietz	7de5f32	2025-06-04 17:17:22 +0200	[diff] [blame]	38	df <- collocationAnalysis(kco, "XXXXXXXXAmeisenplage", vc = c("corpusSigle=/WDD17/", "corpusSigle=/WUD17/"), maxRecurse = 2),
Marc Kupietz	581a29b	2021-09-04 20:51:04 +0200	[diff] [blame]	39	"access token"
				40	)
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	41	testthat::expect_equal(nrow(df), 0)
				42	})
Marc Kupietz	d6314b6	2021-12-22 12:49:09 +0100	[diff] [blame]	43
Marc Kupietz	7de5f32	2025-06-04 17:17:22 +0200	[diff] [blame]	44	# test_that("removeWithinSpanWorks", {
Marc Kupietz	76dee31	2025-04-06 16:24:47 +0200	[diff] [blame]	45	# expect_equal(
				46	# removeWithinSpan("contains(<base/s=s>, (machen []{0,1} aufmerksam \| aufmerksam []{0,1} machen))", "base/s=s"),
				47	# "(machen []{0,1} aufmerksam \| aufmerksam []{0,1} machen)")
Marc Kupietz	7de5f32	2025-06-04 17:17:22 +0200	[diff] [blame]	48	# })
Marc Kupietz	dbdbb1f	2025-02-19 10:33:06 +0100	[diff] [blame]	49
				50
				51	test_that("mergeDuplicateCollocatesWorksAsExpected", {
Marc Kupietz	5057f50	2025-04-06 16:55:57 +0200	[diff] [blame]	52	ldf <- tibble::tibble(
Marc Kupietz	dbdbb1f	2025-02-19 10:33:06 +0100	[diff] [blame]	53	node = c("focus(in [tt/p=NN] {[tt/l=nehmen]})"),
				54	collocate = c("Anspruch"),
				55	label = c(""),
				56	vc = c(""),
				57	query = c("Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]})"),
				58	webUIRequestUrl = c(
				59	"https://korap.ids-mannheim.de/?q=Anspruch%20focus%28in%20%5btt%2fp%3dNN%5d%20%7b%5btt%2fl%3dnehmen%5d%7d%29&ql=poliqarp"
				60	),
				61	w = c(1),
				62	leftContextSize = c(1),
				63	rightContextSize = c(0),
				64	N = c(23578528381.5),
				65	O = c(0.5),
				66	O1 = c(1168410.5),
				67	O2 = c(1296870.5),
				68	E = c(64.2651265093014),
				69	pmi = c(11.9173498777957),
				70	mi2 = c(29.8406639214616),
				71	mi3 = c(47.7639779651274),
				72	logDice = c(11.6899933757298),
				73	ll = c(3717716.74208791)
				74	)
Marc Kupietz	5057f50	2025-04-06 16:55:57 +0200	[diff] [blame]	75	rdf <- tibble::tibble(
Marc Kupietz	dbdbb1f	2025-02-19 10:33:06 +0100	[diff] [blame]	76	node = c("focus({[tt/l=nehmen] in} [tt/p=NN])"),
				77	collocate = c("Anspruch"),
				78	label = c(""),
				79	vc = c(""),
				80	query = c("focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch"),
				81	webUIRequestUrl = c(
				82	"https://korap.ids-mannheim.de/?q=focus%28%7b%5btt%2fl%3dnehmen%5d%20in%7d%20%5btt%2fp%3dNN%5d%29%20Anspruch&ql=poliqarp"
				83	),
				84	w = c(1),
				85	leftContextSize = c(0),
				86	rightContextSize = c(1),
				87	N = c(23578528381.5),
				88	O = c(0.5),
				89	O1 = c(17077.5),
				90	O2 = c(1296870.5),
				91	E = c(0.939299756346416),
				92	pmi = c(7.99469408391783),
				93	mi2 = c(15.8990457079122),
				94	mi3 = c(23.8033973319065),
				95	logDice = c(2.57887487309409),
				96	ll = c(2181.35986032019)
				97	)
				98	merged <- mergeDuplicateCollocates(ldf, rdf, smoothingConstant = 0.5)
				99	expect_equal(merged$O, 0.5)
				100	expect_equal(merged$O1, 1185487.5)
				101	expect_equal(merged$O2, 1296870.5)
				102	expect_equal(merged$query, "Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]}) \| focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch")
				103	})
Marc Kupietz	7de5f32	2025-06-04 17:17:22 +0200	[diff] [blame]	104
Marc Kupietz	5e35d7a	2025-10-17 21:21:22 +0200	[diff] [blame]	105	test_that("add_multi_vc_comparisons adds favorite columns", {
				106	sample_result <- tibble::tibble(
				107	node = c("n", "n"),
				108	collocate = c("c", "c"),
				109	vc = c("vc1", "vc2"),
				110	label = c("A", "B"),
				111	N = c(100, 100),
				112	O = c(10, 20),
				113	O1 = c(50, 50),
				114	O2 = c(30, 30),
				115	E = c(5, 5),
				116	w = c(2, 2),
				117	leftContextSize = c(1, 1),
				118	rightContextSize = c(1, 1),
				119	frequency = c(10, 20),
				120	logDice = c(5, 7),
				121	pmi = c(2, 3)
				122	)
				123
Marc Kupietz	77852b2	2025-10-19 11:35:34 +0200	[diff] [blame]	124	enriched <- RKorAPClient:::add_multi_vc_comparisons(sample_result)
Marc Kupietz	5e35d7a	2025-10-17 21:21:22 +0200	[diff] [blame]	125
				126	expect_true(all(c(
				127	"winner_logDice",
				128	"winner_logDice_value",
				129	"runner_up_logDice",
Marc Kupietz	28a2984	2025-10-18 12:25:09 +0200	[diff] [blame]	130	"runner_up_logDice_value",
				131	"max_delta_logDice",
				132	"winner_rank_logDice",
				133	"winner_rank_logDice_value",
				134	"runner_up_rank_logDice",
				135	"runner_up_rank_logDice_value",
				136	"loser_rank_logDice",
				137	"loser_rank_logDice_value",
				138	"max_delta_rank_logDice",
Marc Kupietz	130a2a2	2025-10-18 16:09:23 +0200	[diff] [blame]	139	"winner_percentile_rank_logDice",
				140	"winner_percentile_rank_logDice_value",
				141	"runner_up_percentile_rank_logDice",
				142	"runner_up_percentile_rank_logDice_value",
				143	"loser_percentile_rank_logDice",
				144	"loser_percentile_rank_logDice_value",
				145	"max_delta_percentile_rank_logDice",
Marc Kupietz	28a2984	2025-10-18 12:25:09 +0200	[diff] [blame]	146	"winner_rank_pmi",
				147	"winner_rank_pmi_value",
				148	"runner_up_rank_pmi",
				149	"runner_up_rank_pmi_value",
				150	"loser_rank_pmi",
				151	"loser_rank_pmi_value",
				152	"max_delta_rank_pmi",
Marc Kupietz	130a2a2	2025-10-18 16:09:23 +0200	[diff] [blame]	153	"winner_percentile_rank_pmi",
				154	"winner_percentile_rank_pmi_value",
				155	"runner_up_percentile_rank_pmi",
				156	"runner_up_percentile_rank_pmi_value",
				157	"loser_percentile_rank_pmi",
				158	"loser_percentile_rank_pmi_value",
				159	"max_delta_percentile_rank_pmi",
Marc Kupietz	28a2984	2025-10-18 12:25:09 +0200	[diff] [blame]	160	"rank_A_logDice",
				161	"rank_B_logDice",
				162	"rank_A_pmi",
				163	"rank_B_pmi",
Marc Kupietz	130a2a2	2025-10-18 16:09:23 +0200	[diff] [blame]	164	"percentile_rank_A_logDice",
				165	"percentile_rank_B_logDice",
				166	"percentile_rank_A_pmi",
				167	"percentile_rank_B_pmi",
Marc Kupietz	28a2984	2025-10-18 12:25:09 +0200	[diff] [blame]	168	"delta_rank_logDice",
Marc Kupietz	130a2a2	2025-10-18 16:09:23 +0200	[diff] [blame]	169	"delta_rank_pmi",
				170	"delta_percentile_rank_logDice",
				171	"delta_percentile_rank_pmi"
Marc Kupietz	5e35d7a	2025-10-17 21:21:22 +0200	[diff] [blame]	172	) %in% colnames(enriched)))
				173
				174	expect_true(all(enriched$winner_logDice == "B"))
				175	expect_true(all(enriched$runner_up_logDice == "A"))
				176	expect_true(all(enriched$winner_logDice_value >= enriched$runner_up_logDice_value))
Marc Kupietz	130a2a2	2025-10-18 16:09:23 +0200	[diff] [blame]	177	expect_true(all(enriched$percentile_rank_A_logDice == 1))
				178	expect_true(all(enriched$percentile_rank_B_logDice == 1))
				179	expect_true(all(enriched$delta_percentile_rank_logDice == 0))
Marc Kupietz	5e35d7a	2025-10-17 21:21:22 +0200	[diff] [blame]	180	})
				181
Marc Kupietz	b2862d4	2025-10-18 10:17:49 +0200	[diff] [blame]	182	test_that("add_multi_vc_comparisons handles more than two labels", {
				183	sample_result <- tibble::tibble(
				184	node = rep("n", 3),
				185	collocate = rep("c", 3),
				186	vc = c("vc1", "vc2", "vc3"),
				187	label = c("A", "B", "C"),
				188	N = rep(100, 3),
				189	O = c(10, 30, 5),
				190	O1 = rep(50, 3),
				191	O2 = rep(30, 3),
				192	E = rep(5, 3),
				193	w = rep(2, 3),
				194	leftContextSize = rep(1, 3),
				195	rightContextSize = rep(1, 3),
				196	frequency = c(10, 30, 5),
				197	logDice = c(5, 8, 4),
				198	pmi = c(2, 3, 1)
				199	)
				200
Marc Kupietz	77852b2	2025-10-19 11:35:34 +0200	[diff] [blame]	201	enriched <- RKorAPClient:::add_multi_vc_comparisons(sample_result)
Marc Kupietz	b2862d4	2025-10-18 10:17:49 +0200	[diff] [blame]	202	expect_equal(enriched$winner_logDice[1], "B")
				203	expect_equal(enriched$winner_logDice_value[1], 8)
				204	expect_equal(enriched$runner_up_logDice[1], "A")
				205	expect_equal(enriched$runner_up_logDice_value[1], 5)
				206	expect_equal(enriched$loser_logDice[1], "C")
				207	expect_equal(enriched$loser_logDice_value[1], 4)
				208	expect_equal(enriched$max_delta_logDice[1], 4)
				209	})
				210
Marc Kupietz	28a2984	2025-10-18 12:25:09 +0200	[diff] [blame]	211	test_that("add_multi_vc_comparisons computes rank deltas", {
				212	base_tbl <- tidyr::expand_grid(
				213	label = c("A", "B", "C"),
				214	collocate = c("c1", "c2", "c3")
				215	) \|>
				216	dplyr::mutate(
				217	node = "n",
				218	vc = paste0("vc", label),
				219	N = 100,
				220	O = 10,
				221	O1 = 50,
				222	O2 = 40,
				223	E = 5,
				224	w = 2,
				225	leftContextSize = 1,
				226	rightContextSize = 1,
				227	frequency = 10,
				228	logDice = dplyr::case_when(
				229	label == "A" & collocate == "c1" ~ 9,
				230	label == "A" & collocate == "c2" ~ 6,
				231	label == "A" & collocate == "c3" ~ 3,
				232	label == "B" & collocate == "c1" ~ 7,
				233	label == "B" & collocate == "c2" ~ 9,
				234	label == "B" & collocate == "c3" ~ 5,
				235	label == "C" & collocate == "c1" ~ 4,
				236	label == "C" & collocate == "c2" ~ 6,
				237	label == "C" & collocate == "c3" ~ 8,
				238	TRUE ~ 0
				239	)
				240	)
				241
Marc Kupietz	77852b2	2025-10-19 11:35:34 +0200	[diff] [blame]	242	enriched <- RKorAPClient:::add_multi_vc_comparisons(base_tbl)
Marc Kupietz	28a2984	2025-10-18 12:25:09 +0200	[diff] [blame]	243	target_row <- enriched \|>
				244	dplyr::filter(collocate == "c1") \|>
				245	dplyr::slice_head(n = 1)
				246
				247	expect_equal(target_row$rank_A_logDice, 1)
				248	expect_equal(target_row$rank_B_logDice, 2)
				249	expect_equal(target_row$rank_C_logDice, 3)
				250	expect_equal(target_row$winner_rank_logDice, "A")
				251	expect_equal(target_row$winner_rank_logDice_value, 1)
				252	expect_equal(target_row$runner_up_rank_logDice, "B")
				253	expect_equal(target_row$runner_up_rank_logDice_value, 2)
				254	expect_equal(target_row$loser_rank_logDice, "C")
				255	expect_equal(target_row$loser_rank_logDice_value, 3)
				256	expect_equal(target_row$max_delta_rank_logDice, 2)
				257	})
				258
				259	test_that("add_multi_vc_comparisons imputes missing ranks for max delta", {
				260	sample_result <- tibble::tibble(
				261	node = c("n", "n"),
				262	collocate = c("c", "c"),
				263	vc = c("vc1", "vc2"),
				264	label = c("A", "B"),
				265	N = c(100, 100),
				266	O = c(10, 10),
				267	O1 = c(50, 50),
				268	O2 = c(30, 30),
				269	E = c(5, 5),
				270	w = c(2, 2),
				271	leftContextSize = c(1, 1),
				272	rightContextSize = c(1, 1),
				273	frequency = c(10, 10),
				274	logDice = c(5, NA)
				275	)
				276
Marc Kupietz	77852b2	2025-10-19 11:35:34 +0200	[diff] [blame]	277	enriched <- RKorAPClient:::add_multi_vc_comparisons(sample_result)
Marc Kupietz	28a2984	2025-10-18 12:25:09 +0200	[diff] [blame]	278
				279	expect_equal(enriched$rank_A_logDice[1], 1)
				280	expect_true(is.na(enriched$rank_B_logDice[1]))
				281	expect_equal(enriched$winner_rank_logDice[1], "A")
				282	expect_equal(enriched$loser_rank_logDice[1], "B")
				283	expect_equal(enriched$loser_rank_logDice_value[1], 2)
				284	expect_equal(enriched$max_delta_rank_logDice[1], 1)
				285	})
				286
Marc Kupietz	9894a37	2025-10-18 14:51:29 +0200	[diff] [blame]	287	test_that("adaptive missing score imputation respects measure-specific scales", {
				288	sample_result <- tibble::tibble(
				289	node = c("n", "n", "n"),
				290	collocate = c("c", "c", "c"),
				291	vc = c("vc1", "vc2", "vc3"),
				292	label = c("A", "B", "C"),
				293	N = c(100, 100, 100),
				294	O = c(12, 9, 7),
				295	O1 = c(60, 40, 30),
				296	O2 = c(33, 22, 18),
				297	E = c(6, 6, 6),
				298	w = c(2, 2, 2),
				299	leftContextSize = c(1, 1, 1),
				300	rightContextSize = c(1, 1, 1),
				301	frequency = c(15, 11, 9),
				302	logDice = c(-0.31, NA, -0.12),
				303	pmi = c(-1.65, NA, -0.48),
				304	ll = c(12.4, NA, 7.9)
				305	)
				306
				307	enriched <- RKorAPClient:::add_multi_vc_comparisons(
				308	sample_result,
Marc Kupietz	9894a37	2025-10-18 14:51:29 +0200	[diff] [blame]	309	missingScoreQuantile = 0.05
				310	)
				311
				312	row_a <- dplyr::filter(enriched, label == "A") \|> dplyr::slice_head(n = 1)
				313
				314	expect_false(is.na(row_a$logDice_B))
				315	expect_false(is.na(row_a$pmi_B))
				316	expect_false(is.na(row_a$ll_B))
				317
				318	expect_lt(row_a$logDice_B, min(sample_result$logDice, na.rm = TRUE))
				319	expect_lt(row_a$pmi_B, min(sample_result$pmi, na.rm = TRUE))
				320	expect_lte(row_a$ll_B, min(sample_result$ll, na.rm = TRUE))
				321
				322	expect_gt(row_a$max_delta_logDice, 0)
				323	expect_gt(row_a$winner_logDice_value - row_a$loser_logDice_value, 0)
				324	})
				325
Marc Kupietz	7de5f32	2025-06-04 17:17:22 +0200	[diff] [blame]	326	# New tests for improved coverage of collocationAnalysis.R helper functions
				327
				328	test_that("synsemanticStopwords returns German stopwords", {
				329	stopwords <- synsemanticStopwords()
				330	expect_true(is.character(stopwords))
				331	expect_true(length(stopwords) > 50)
				332	expect_true("der" %in% stopwords)
				333	expect_true("die" %in% stopwords)
				334	expect_true("und" %in% stopwords)
				335	expect_true("mit" %in% stopwords)
				336	})
				337
				338	test_that("removeWithinSpan removes span constraints correctly", {
				339	# Test basic span removal
				340	query1 <- "contains(<base/s=s>, (machen []{0,1} aufmerksam \| aufmerksam []{0,1} machen))"
				341	result1 <- RKorAPClient:::removeWithinSpan(query1, "base/s=s")
				342	expect_equal(result1, "(machen []{0,1} aufmerksam \| aufmerksam []{0,1} machen)")
				343
				344	# Test with different span
				345	query2 <- "contains(<p/s=s>, (test query))"
				346	result2 <- RKorAPClient:::removeWithinSpan(query2, "p/s=s")
				347	expect_equal(result2, "(test query)")
				348
				349	# Test with empty span - should return original query
				350	query3 <- "simple query"
				351	result3 <- RKorAPClient:::removeWithinSpan(query3, "")
				352	expect_equal(result3, query3)
				353
				354	# Test with non-matching span
				355	query4 <- "contains(<base/s=s>, test)"
				356	result4 <- RKorAPClient:::removeWithinSpan(query4, "other/span")
				357	expect_equal(result4, query4)
				358	})
				359
				360	test_that("matches2FreqTable handles empty matches", {
				361	empty_matches <- data.frame()
				362	result <- RKorAPClient:::matches2FreqTable(empty_matches, index = 0)
				363
				364	expect_true(is.data.frame(result))
				365	expect_equal(nrow(result), 0)
				366	})
				367
				368	test_that("matches2FreqTable processes single match correctly", {
				369	# Create mock matches data
				370	mock_matches <- data.frame(
				371	tokens = I(list(list(
				372	left = c("der", "große"),
				373	match = "Test",
				374	right = c("ist", "wichtig")
				375	))),
				376	stringsAsFactors = FALSE
				377	)
				378
				379	result <- RKorAPClient:::matches2FreqTable(
				380	mock_matches,
				381	index = 1,
				382	leftContextSize = 2,
				383	rightContextSize = 2,
				384	stopwords = c("der", "ist") # Provide stopwords to avoid empty join
				385	)
				386
				387	expect_true(is.data.frame(result))
				388	})
				389
				390	test_that("snippet2FreqTable handles empty snippet", {
				391	result <- RKorAPClient:::snippet2FreqTable(character(0))
				392
				393	expect_true(is.data.frame(result))
				394	expect_equal(nrow(result), 0)
				395	})
				396
				397	test_that("snippet2FreqTable processes single snippet correctly", {
				398	snippet <- '<span class="context-left">der große </span><span class="match"><mark>Test</mark></span><span class="context-right"> ist wichtig</span>'
				399
				400	result <- RKorAPClient:::snippet2FreqTable(
				401	snippet,
				402	leftContextSize = 2,
				403	rightContextSize = 2,
				404	stopwords = c("der"), # Provide stopwords to avoid empty join
				405	verbose = FALSE
				406	)
				407
				408	expect_true(is.data.frame(result))
				409	})
				410
Marc Kupietz	0a29263	2025-10-19 14:04:36 +0200	[diff] [blame^]	411	test_that("inject_focus_into_query adds focus wrappers when span queries lack them", {
				412	unfocused <- "contains(<base/s=s>, (Anspruch [tt/l=nehmen] \| [tt/l=nehmen] Anspruch))"
				413	focused <- RKorAPClient:::inject_focus_into_query(unfocused)
				414
				415	expect_equal(
				416	focused,
				417	"contains(<base/s=s>, (focus({Anspruch [tt/l=nehmen]}) \| focus({[tt/l=nehmen] Anspruch})))"
				418	)
				419	})
				420
				421	test_that("inject_focus_into_query leaves existing focus segments unchanged", {
				422	already_focused <- "contains(<base/s=s>, (focus({Anspruch [tt/l=nehmen]})))"
				423	expect_identical(
				424	RKorAPClient:::inject_focus_into_query(already_focused),
				425	already_focused
				426	)
				427	})
				428
Marc Kupietz	7de5f32	2025-06-04 17:17:22 +0200	[diff] [blame]	429	# Removed hanging findExample tests as they cause infinite wait
				430	# These tests make API calls that don't complete properly
				431
				432	# Removed hanging collocatesQuery tests as they cause infinite wait
				433	# These tests were causing the test suite to hang and not terminate
				434
				435	test_that("collocationAnalysis handles exactFrequencies parameter", {
				436	skip_if_offline()
				437	kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
				438
				439	expect_warning(
				440	result <- collocationAnalysis(
				441	kco,
				442	"Test",
				443	exactFrequencies = TRUE,
				444	searchHitsSampleLimit = 5,
				445	topCollocatesLimit = 5
				446	),
				447	"access token"
				448	)
				449	expect_true(is.data.frame(result))
				450	})
				451
				452	test_that("collocationAnalysis handles withinSpan parameter", {
				453	skip_if_offline()
				454	kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
				455
				456	expect_warning(
				457	result <- collocationAnalysis(
				458	kco,
				459	"Test",
				460	withinSpan = "base/s=s",
				461	exactFrequencies = TRUE,
				462	searchHitsSampleLimit = 5,
				463	topCollocatesLimit = 5
				464	),
				465	"access token"
				466	)
				467	expect_true(is.data.frame(result))
				468	})
				469
				470	test_that("collocationAnalysis handles expand parameter", {
				471	skip_if_offline()
				472	kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
				473
				474	expect_warning(
				475	result <- collocationAnalysis(
				476	kco,
				477	c("Test", "der"),
				478	expand = TRUE,
				479	searchHitsSampleLimit = 2,
				480	topCollocatesLimit = 2
				481	),
				482	"access token"
				483	)
				484	expect_true(is.data.frame(result))
				485	})
				486
Marc Kupietz	e34a8be	2025-10-17 20:13:42 +0200	[diff] [blame]	487	test_that("collocationAnalysis honors named vc labels", {
				488	skip_if_offline()
				489	kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
				490
				491	named_vc <- c(
				492	Western = "textType=/.Western./ & pubDate in 2012",
				493	Erotic = "textType=/.(Erotik\|Gay)./ & pubDate in 2012",
				494	Historic = "textType=/.Historisch./ & pubDate in 2012"
				495	)
				496
				497	expect_warning(
				498	result <- collocationAnalysis(
				499	kco,
				500	"[tt/l=treffen]",
				501	vc = named_vc,
				502	searchHitsSampleLimit = 2,
				503	topCollocatesLimit = 2
				504	),
				505	"access token"
				506	)
				507
				508	if (nrow(result) > 0) {
				509	expect_true("label" %in% colnames(result))
				510	expect_setequal(unique(result$label), names(named_vc))
				511	}
				512	})
				513
Marc Kupietz	7de5f32	2025-06-04 17:17:22 +0200	[diff] [blame]	514	test_that("collocationAnalysis handles stopwords parameter", {
				515	skip_if_offline()
				516	kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
				517
				518	expect_warning(
				519	result <- collocationAnalysis(
				520	kco,
				521	"Test",
				522	stopwords = c("der", "die", "und"),
				523	searchHitsSampleLimit = 5,
				524	topCollocatesLimit = 5
				525	),
				526	"access token"
				527	)
				528	expect_true(is.data.frame(result))
				529	})
				530
				531	test_that("collocationAnalysis handles lemmatizeNodeQuery parameter", {
				532	skip_if_offline()
				533	kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
				534
				535	expect_warning(
				536	result <- collocationAnalysis(
				537	kco,
				538	"laufen",
				539	lemmatizeNodeQuery = TRUE,
				540	searchHitsSampleLimit = 5,
				541	topCollocatesLimit = 5
				542	),
				543	"access token"
				544	)
				545	expect_true(is.data.frame(result))
				546	})
				547
				548	test_that("collocationAnalysis handles addExamples parameter", {
				549	skip_if_offline()
				550	kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
				551
				552	expect_warning(
				553	result <- collocationAnalysis(
				554	kco,
				555	"Test",
				556	addExamples = TRUE,
				557	searchHitsSampleLimit = 3,
				558	topCollocatesLimit = 3
				559	),
				560	"access token"
				561	)
				562	expect_true(is.data.frame(result))
				563	if (nrow(result) > 0) {
				564	expect_true("example" %in% colnames(result))
				565	}
				566	})
				567
				568	test_that("collocationAnalysis handles maxRecurse parameter", {
				569	skip_if_offline()
				570	kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
				571
				572	expect_warning(
				573	result <- collocationAnalysis(
				574	kco,
				575	"Test",
				576	maxRecurse = 1,
				577	searchHitsSampleLimit = 2,
				578	topCollocatesLimit = 2
				579	),
				580	"access token"
				581	)
				582	expect_true(is.data.frame(result))
				583	})