Blame - tests/testthat/test-fetchAnnotations.R - KorAP/RKorAPClient

blob: 20a73cab825937abc00ce3bf3506ebda7a2b8819 [file] [log] [blame]

Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1	test_that("fetchAnnotations works with valid matches", {
				2	skip_if_offline()
				3
				4	kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
				5	q <- kco %>%
				6	corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
				7	fetchNext(maxFetch = 2)
				8
				9	# Skip test if no matches found
				10	skip_if(is.null(q@collectedMatches) \|\| nrow(q@collectedMatches) == 0, "No matches found for test query")
				11
				12	# Test that structured annotation columns are initially absent
				13	expect_false("atokens" %in% colnames(q@collectedMatches))
				14	expect_false("pos" %in% colnames(q@collectedMatches))
				15
				16	# Test that matchID is preserved in collectedMatches
				17	expect_true("matchID" %in% colnames(q@collectedMatches))
				18	expect_true(all(!is.na(q@collectedMatches$matchID)))
				19
				20	# Test fetchAnnotations with default foundry
				21	q_with_annotations <- fetchAnnotations(q, verbose = FALSE)
				22
				23	# Check that structured annotation columns are now populated
				24	expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches))
				25	expect_true("pos" %in% colnames(q_with_annotations@collectedMatches))
				26
				27	# Check that the structured columns have left/match/right components
				28	expect_true(all(c("left", "match", "right") %in% names(q_with_annotations@collectedMatches$atokens)))
				29	expect_true(all(c("left", "match", "right") %in% names(q_with_annotations@collectedMatches$pos)))
				30
				31	# Test fetchAnnotations with specific foundry
				32	q_with_tt <- fetchAnnotations(q, foundry = "tt", verbose = FALSE)
				33	expect_true("atokens" %in% colnames(q_with_tt@collectedMatches))
				34	expect_true("pos" %in% colnames(q_with_tt@collectedMatches))
				35
				36	# Test that annotations contain actual content (regression test for URL construction)
				37	if (nrow(q_with_tt@collectedMatches) > 0) {
				38	# Check that the first match has populated annotation data
				39	expect_true(length(q_with_tt@collectedMatches$atokens$left[[1]]) > 0 \|\|
				40	length(q_with_tt@collectedMatches$atokens$match[[1]]) > 0 \|\|
				41	length(q_with_tt@collectedMatches$atokens$right[[1]]) > 0)
				42	expect_true(length(q_with_tt@collectedMatches$pos$left[[1]]) > 0 \|\|
				43	length(q_with_tt@collectedMatches$pos$match[[1]]) > 0 \|\|
				44	length(q_with_tt@collectedMatches$pos$right[[1]]) > 0)
				45	}
				46	})
				47
				48	test_that("fetchAnnotations handles empty matches gracefully", {
				49	kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
				50
				51	# Create a query object with no collected matches
				52	q <- KorAPQuery(
				53	korapConnection = kco,
				54	collectedMatches = NULL
				55	)
				56
				57	# Should warn and return original object
				58	expect_warning(
				59	result <- fetchAnnotations(q, verbose = FALSE),
				60	"No collected matches found"
				61	)
				62	expect_identical(result, q)
				63	})
				64
				65	test_that("fetchAnnotations preserves original object structure", {
				66	skip_if_offline()
				67
				68	kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
				69	q <- kco %>%
				70	corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
				71	fetchNext(maxFetch = 1)
				72
				73	# Skip test if no matches found
				74	skip_if(is.null(q@collectedMatches) \|\| nrow(q@collectedMatches) == 0, "No matches found for test query")
				75
				76	q_original <- q
				77	q_with_annotations <- fetchAnnotations(q, verbose = FALSE)
				78
				79	# Check that all original slots are preserved
				80	expect_identical(q_with_annotations@korapConnection, q_original@korapConnection)
				81	expect_identical(q_with_annotations@request, q_original@request)
				82	expect_identical(q_with_annotations@vc, q_original@vc)
				83	expect_identical(q_with_annotations@totalResults, q_original@totalResults)
				84
				85	# collectedMatches should have additional annotation columns
				86	expect_true(nrow(q_with_annotations@collectedMatches) == nrow(q_original@collectedMatches))
				87	expect_true(ncol(q_with_annotations@collectedMatches) > ncol(q_original@collectedMatches))
				88
				89	# Original columns should be preserved
				90	original_cols <- colnames(q_original@collectedMatches)
				91	expect_true(all(original_cols %in% colnames(q_with_annotations@collectedMatches)))
				92
				93	# New annotation columns should be present
				94	expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches))
				95	expect_true("pos" %in% colnames(q_with_annotations@collectedMatches))
				96	})
				97
				98	test_that("fetchAnnotations returns structured left/match/right format", {
				99	skip_if_offline()
				100
				101	kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
				102	q <- kco %>%
				103	corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
				104	fetchNext(maxFetch = 1)
				105
				106	# Skip test if no matches found
				107	skip_if(is.null(q@collectedMatches) \|\| nrow(q@collectedMatches) == 0, "No matches found for test query")
				108
				109	q_with_annotations <- fetchAnnotations(q, foundry = "tt", verbose = FALSE)
				110
				111	# Test that structured annotation columns exist
				112	expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches))
				113	expect_true("pos" %in% colnames(q_with_annotations@collectedMatches))
				114
				115	# Test the structure of annotation columns
				116	atokens <- q_with_annotations@collectedMatches$atokens
				117	pos <- q_with_annotations@collectedMatches$pos
				118
				119	expect_true(is.data.frame(atokens))
				120	expect_true(is.data.frame(pos))
				121
				122	expect_true(all(c("left", "match", "right") %in% names(atokens)))
				123	expect_true(all(c("left", "match", "right") %in% names(pos)))
				124
				125	# Test that each component is a list column
				126	expect_true(is.list(atokens$left))
				127	expect_true(is.list(atokens$match))
				128	expect_true(is.list(atokens$right))
				129	expect_true(is.list(pos$left))
				130	expect_true(is.list(pos$match))
				131	expect_true(is.list(pos$right))
				132
				133	# Test that the first match has actual data
				134	if (nrow(q_with_annotations@collectedMatches) > 0) {
				135	# At least one of left/match/right should have content
				136	total_tokens <- length(atokens$left[[1]]) + length(atokens$match[[1]]) + length(atokens$right[[1]])
				137	expect_true(total_tokens > 0)
				138
				139	total_pos <- length(pos$left[[1]]) + length(pos$match[[1]]) + length(pos$right[[1]])
				140	expect_true(total_pos > 0)
				141
				142	# Token count should match POS count
				143	expect_equal(total_tokens, total_pos)
				144
				145	# Match tokens should not be empty (since we found a match)
				146	expect_true(length(atokens$match[[1]]) > 0)
				147	expect_true(length(pos$match[[1]]) > 0)
				148	}
				149	})
				150
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	151	test_that("parser covers full span across multiple <mark> blocks", {
				152	# Local, offline test to ensure correct match extraction when multiple
				153	# <mark>…</mark> segments occur within the match span.
				154	xml_snippet <- '<span class="context-left"></span>
				155	<span class="match">
				156	<span title="tt/l:Wir"><span title="tt/p:PPER">Wir</span></span>
				157	<mark>
				158	<span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span>
				159	</mark>
				160	<span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span>
				161	<mark>
				162	<span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span>
				163	<span title="tt/l:Plan"><span title="tt/p:NN">Plan</span></span>
				164	</mark>
				165	</span>
				166	<span class="context-right"></span>'
				167
				168	parsed <- parse_xml_annotations_structured(xml_snippet)
				169
				170	# Left context contains the pre-mark token
				171	expect_equal(parsed$atokens$left, c("Wir"))
				172
				173	# Match should include everything from the first <mark> to the last </mark>,
				174	# including tokens between them
				175	expect_equal(parsed$atokens$match, c("können", "alles", "außer", "Plan"))
				176
				177	# POS and lemma lengths align with tokens in each section
				178	expect_length(parsed$pos$match, length(parsed$atokens$match))
				179	expect_length(parsed$lemma$match, length(parsed$atokens$match))
				180	expect_equal(parsed$pos$match, c("VVFIN", "PIS", "APPR", "NN"))
				181	expect_equal(parsed$lemma$match, c("können", "alles", "außer", "Plan"))
				182
				183	# Right context should be empty in this snippet
				184	expect_length(parsed$atokens$right, 0)
				185	})
				186
				187	test_that("parser keeps tokens separated across   between spans", {
				188	xml_snippet <- '<span class="context-left"></span>
				189	<span class="match">
				190	<mark><span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span></mark> <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span><mark><span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span></mark>
				191	</span>
				192	<span class="context-right"></span>'
				193
				194	parsed <- parse_xml_annotations_structured(xml_snippet)
				195	expect_equal(parsed$atokens$match, c("können", "alles", "außer"))
				196	expect_equal(parsed$pos$match, c("VVFIN", "PIS", "APPR"))
				197	expect_equal(parsed$lemma$match, c("können", "alles", "außer"))
				198	})
				199
				200	test_that("parser keeps tokens separated across punctuation between spans", {
				201	xml_snippet <- '<span class="context-left"></span>
				202	<span class="match">
				203	<mark><span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span></mark>, <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span><mark><span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span></mark>
				204	</span>
				205	<span class="context-right"></span>'
				206
				207	parsed <- parse_xml_annotations_structured(xml_snippet)
				208	expect_equal(parsed$atokens$match, c("können", "alles", "außer"))
				209	expect_equal(parsed$pos$match, c("VVFIN", "PIS", "APPR"))
				210	expect_equal(parsed$lemma$match, c("können", "alles", "außer"))
				211	})
				212
				213	test_that("online: fetchAnnotations aligns pos/lemma with tokens for complex query", {
				214	skip_if_offline()
				215
				216	kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
				217	q <- kco %>%
				218	corpusQuery('[orth="[wW]ir"] können alles [orth="ausser" \| orth="außer"] [tt/pos=NN]',
				219	metadataOnly = FALSE,
				220	fields = c("textSigle", "snippet", "tokens")) %>%
				221	fetchNext(maxFetch = 20)
				222
				223	skip_if(is.null(q@collectedMatches) \|\| nrow(q@collectedMatches) == 0, "No matches found for online test")
				224
				225	q2 <- fetchAnnotations(q, foundry = "tt", verbose = FALSE)
				226
				227	# For each match, POS and lemma counts must equal token count in the match span
				228	for (i in seq_len(nrow(q2@collectedMatches))) {
				229	tt <- q2@collectedMatches$tokens$match[[i]]
				230	pp <- q2@collectedMatches$pos$match[[i]]
				231	ll <- q2@collectedMatches$lemma$match[[i]]
				232	expect_equal(length(tt), length(pp))
				233	expect_equal(length(tt), length(ll))
				234	}
				235	})
				236
				237	test_that("fetchAnnotations aligns tokens and annotations across multiple <mark> blocks (stubbed API)", {
				238	# Define a minimal dummy KorAPConnection-like S4 class for offline testing
				239	setClass('DummyKCO', slots = c(apiUrl='character', verbose='logical'))
				240	setMethod('apiCall', 'DummyKCO', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
				241	list(snippet = '<span class="context-left"></span> <span class="match"> <span title="tt/l:Wir"><span title="tt/p:PPER">Wir</span></span> <mark> <span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span> </mark> <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span> <mark> <span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span> <span title="tt/l:Plan"><span title="tt/p:NN">Plan</span></span> </mark> </span> <span class="context-right"></span>')
				242	})
				243
				244	# Build a minimal KorAPQuery with a dummy connection and a single match row
				245	kco <- new('DummyKCO', apiUrl = 'http://dummy/', verbose = FALSE)
				246	df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 5, matchID = 'match-A/B/C-p1-5', stringsAsFactors = FALSE)
				247	q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
				248
				249	q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
				250
				251	# Expect full match span to be covered
				252	expect_equal(q2@collectedMatches$atokens$left[[1]], c('Wir'))
				253	expect_equal(q2@collectedMatches$atokens$match[[1]], c('können','alles','außer','Plan'))
				254	expect_equal(q2@collectedMatches$pos$match[[1]], c('VVFIN','PIS','APPR','NN'))
				255	expect_equal(q2@collectedMatches$lemma$match[[1]], c('können','alles','außer','Plan'))
				256
				257	# Alignment checks
				258	expect_length(q2@collectedMatches$pos$match[[1]], length(q2@collectedMatches$atokens$match[[1]]))
				259	expect_length(q2@collectedMatches$lemma$match[[1]], length(q2@collectedMatches$atokens$match[[1]]))
				260	})
				261
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	262	test_that("matchID is preserved in collectedMatches", {
				263	skip_if_offline()
				264
				265	kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
				266	q <- kco %>%
				267	corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
				268	fetchNext(maxFetch = 1)
				269
				270	# Skip test if no matches found
				271	skip_if(is.null(q@collectedMatches) \|\| nrow(q@collectedMatches) == 0, "No matches found for test query")
				272
				273	# Check that matchID is present and valid
				274	expect_true("matchID" %in% colnames(q@collectedMatches))
				275	expect_true(all(!is.na(q@collectedMatches$matchID)))
				276
				277	# Verify matchID format (should contain position information)
				278	expect_true(all(grepl("-p\\d+-\\d+", q@collectedMatches$matchID)))
				279
				280	# Verify that matchStart and matchEnd are correctly extracted from matchID
				281	for (i in seq_len(nrow(q@collectedMatches))) {
				282	match_id <- q@collectedMatches$matchID[i]
				283	positions <- gsub(".-p(\\d+)-(\\d+).", "\\1 \\2", match_id)
				284	expected_start <- as.integer(stringr::word(positions, 1))
				285	expected_end <- as.integer(stringr::word(positions, 2)) - 1
				286
				287	expect_equal(q@collectedMatches$matchStart[i], expected_start)
				288	expect_equal(q@collectedMatches$matchEnd[i], expected_end)
				289	}
				290	})
Marc Kupietz	7ff770e	2025-07-18 19:07:10 +0200	[diff] [blame]	291
				292	test_that("fetchAnnotations handles morphological annotations with pipe separators", {
				293	skip_if_offline()
				294
				295	kco <- KorAPConnection("https://korap.dnb.de", verbose = FALSE, cache = FALSE, accessToken = NULL)
				296	q <- kco %>%
				297	auth() %>%
				298	corpusQuery("Ameisenplage", metadataOnly = FALSE) %>%
				299	fetchNext(maxFetch = 1)
				300
				301	# Skip test if no matches found
				302	skip_if(is.null(q@collectedMatches) \|\| nrow(q@collectedMatches) == 0, "No matches found for test query")
				303
				304	# Test with marmot foundry which provides morphological annotations
				305	q_with_morph <- fetchAnnotations(q, foundry = "marmot", verbose = FALSE)
				306
				307	# Check that morphological annotation columns exist
				308	expect_true("morph" %in% colnames(q_with_morph@collectedMatches))
				309	expect_true("atokens" %in% colnames(q_with_morph@collectedMatches))
				310
				311	# Test the structure of morphological annotation columns
				312	morph <- q_with_morph@collectedMatches$morph
				313	expect_true(is.data.frame(morph))
				314	expect_true(all(c("left", "match", "right") %in% names(morph)))
				315	expect_true(is.list(morph$match))
				316
				317	# Test that morphological features use pipe separators
				318	if (nrow(q_with_morph@collectedMatches) > 0) {
				319	morph_data <- morph$match[[1]]
				320
				321	# Check that we have morphological data
				322	expect_true(length(morph_data) > 0)
				323
				324	# If morphological data exists and is not NA, it should contain pipe separators
				325	# for multiple features (e.g., "case:acc\|gender:fem\|number:sg")
				326	if (!is.na(morph_data[1]) && nchar(morph_data[1]) > 0) {
				327	# Should contain morphological features separated by pipes
				328	expect_true(grepl("^[^\|]+", morph_data[1])) # At least one feature
				329
				330	# If multiple features exist, they should be pipe-separated
				331	if (grepl("\\\|", morph_data[1])) {
				332	features <- unlist(strsplit(morph_data[1], "\\\|"))
				333	expect_true(length(features) > 1)
				334	# Each feature should follow the pattern "attribute:value"
				335	expect_true(all(grepl("^[^:]+:[^:]+$", features)))
				336	}
				337	}
				338	}
				339	})
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	340
Marc Kupietz	cd45218	2025-10-09 13:28:41 +0200	[diff] [blame^]	341	test_that("issue #30 regression keeps all morphological features", {
				342	skip_if_offline()
				343
				344	matches <- KorAPConnection(verbose = FALSE, cache = FALSE) %>%
				345	corpusQuery("aufgrund einer Ameisenplage", vc = "availability=/CC.*/", metadataOnly = FALSE) %>%
				346	fetchNext(maxFetch = 5) %>%
				347	fetchAnnotations("marmot", verbose = FALSE) %>%
				348	slot("collectedMatches")
				349
				350	skip_if(is.null(matches) \|\| nrow(matches) == 0, "No matches found for issue #30 query")
				351
				352	pos_tail <- sapply(matches[["pos"]][["match"]], tail, 1)
				353	morph_tail <- sapply(matches[["morph"]][["match"]], tail, 1)
				354
				355	# Ensure we actually received morphological annotations for this query
				356	skip_if(all(is.na(morph_tail)), "No morphological annotations returned for issue #30 query")
				357
				358	# POS should still be available for the same tokens (original sanity check from issue report)
				359	expect_true(all(!is.na(pos_tail)))
				360
				361	# Regression assertion: MarMoT morph tail must keep all features, not just the last one
				362	observed_morph <- morph_tail[!is.na(morph_tail)]
				363	expect_true(all(grepl("\\\|", observed_morph)))
				364	expect_true(all(grepl("case:", observed_morph)))
				365	expect_true(all(grepl("gender:", observed_morph)))
				366	expect_true(all(grepl("number:", observed_morph)))
				367	})
				368
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	369	test_that("fetchAnnotations adds missing layer without overwriting existing, and can overwrite when requested", {
				370	# Define a separate dummy connection that serves different snippets by foundry
				371	if (!isClass("DummyKCO2")) setClass('DummyKCO2', slots = c(apiUrl='character', verbose='logical'))
				372	setMethod('apiCall', 'DummyKCO2', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
				373	# Return TT-only snippet by default, and TT+MarMoT morph when foundry=marmot
				374	tt_xml <- '<span class="context-left"></span>
				375	<span class="match">
				376	<mark><span title="tt/l:können tt/p:VVFIN">können</span></mark> <span title="tt/l:alles tt/p:PIS">alles</span><mark><span title="tt/l:außer tt/p:APPR">außer</span></mark>
				377	</span>
				378	<span class="context-right"></span>'
				379	marmot_xml <- '<span class="context-left"></span>
				380	<span class="match">
				381	<mark><span title="tt/l:können tt/p:VVFIN marmot/m:verbform:fin">können</span></mark> <span title="tt/l:alles tt/p:PIS marmot/m:pos:pron">alles</span><mark><span title="tt/l:außer tt/p:APPR marmot/m:pos:adp\|case:acc">außer</span></mark>
				382	</span>
				383	<span class="context-right"></span>'
				384	if (grepl("foundry=marmot", url)) list(snippet = marmot_xml) else list(snippet = tt_xml)
				385	})
				386
				387	# Build query with one match row
				388	kco <- new('DummyKCO2', apiUrl = 'http://dummy/', verbose = FALSE)
				389	df <- data.frame(textSigle = 'X/Y/Z', matchStart = 1, matchEnd = 3, matchID = 'match-X/Y/Z-p1-3', stringsAsFactors = FALSE)
				390	q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
				391
				392	# First call with TT: should populate pos/lemma, morph empty/NA
				393	q1 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
				394	pos_tt <- q1@collectedMatches$pos$match[[1]]
				395	lem_tt <- q1@collectedMatches$lemma$match[[1]]
				396
				397	expect_equal(pos_tt, c('VVFIN','PIS','APPR'))
				398	expect_equal(lem_tt, c('können','alles','außer'))
				399	# Morph should be empty or NA-only at this point
				400	morph1 <- q1@collectedMatches$morph$match[[1]]
				401	expect_true(length(morph1) == 0 \|\| all(is.na(morph1)))
				402
				403	# Second call with marmot: should add morph but keep pos/lemma unchanged when overwrite=FALSE
				404	q2 <- fetchAnnotations(q1, foundry = 'marmot', verbose = FALSE)
				405	expect_equal(q2@collectedMatches$pos$match[[1]], pos_tt)
				406	expect_equal(q2@collectedMatches$lemma$match[[1]], lem_tt)
				407
				408	morph2 <- q2@collectedMatches$morph$match[[1]]
				409	expect_equal(morph2, c('verbform:fin','pos:pron','pos:adp\|case:acc'))
				410
				411	# Corrupt existing POS and ensure overwrite=TRUE repairs it
				412	q2@collectedMatches$pos$match[[1]][1] <- 'DAMAGED'
				413	q3 <- fetchAnnotations(q2, foundry = 'tt', overwrite = TRUE, verbose = FALSE)
				414	expect_equal(q3@collectedMatches$pos$match[[1]][1], 'VVFIN')
				415	})
Marc Kupietz	2baf5c5	2025-09-05 16:41:11 +0200	[diff] [blame]	416
				417	## Additional offline edge-case tests were reverted per request
				418
				419	test_that("annotation_snippet is preserved unless overwrite is TRUE", {
				420	# Reuse DummyKCO2 logic (TT vs marmot snippets)
				421	if (!isClass("DummyKCO3")) setClass('DummyKCO3', slots = c(apiUrl='character', verbose='logical'))
				422	setMethod('apiCall', 'DummyKCO3', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
				423	tt_xml <- '<span class="context-left"></span><span class="match"><mark><span title="tt/l:Test tt/p:NN">Test</span></mark></span><span class="context-right"></span>'
				424	marmot_xml <- '<span class="context-left"></span><span class="match"><mark><span title="tt/l:Test tt/p:NN marmot/m:pos:n">Test</span></mark></span><span class="context-right"></span>'
				425	if (grepl('foundry=marmot', url)) list(snippet = marmot_xml) else list(snippet = tt_xml)
				426	})
				427	kco <- new('DummyKCO3', apiUrl = 'http://dummy/', verbose = FALSE)
				428	df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE)
				429	q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
				430
				431	q1 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
				432	sn1 <- q1@collectedMatches$annotation_snippet[[1]]
				433	q2 <- fetchAnnotations(q1, foundry = 'marmot', verbose = FALSE)
				434	# snippet should be unchanged because overwrite = FALSE
				435	expect_identical(q2@collectedMatches$annotation_snippet[[1]], sn1)
				436	# overwrite = TRUE should replace it
				437	q3 <- fetchAnnotations(q2, foundry = 'marmot', overwrite = TRUE, verbose = FALSE)
				438	expect_false(identical(q3@collectedMatches$annotation_snippet[[1]], sn1))
				439	})
				440
				441	test_that("initializes empty vectors when no snippet is returned", {
				442	# Dummy connection returning a list without 'snippet'
				443	if (!isClass("DummyKCO_NoSnip")) setClass('DummyKCO_NoSnip', slots = c(apiUrl='character', verbose='logical'))
				444	setMethod('apiCall', 'DummyKCO_NoSnip', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
				445	list(status = "ok")
				446	})
				447
				448	kco <- new('DummyKCO_NoSnip', apiUrl = 'http://dummy/', verbose = FALSE)
				449	df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE)
				450	q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
				451
				452	q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
				453
				454	# Expect atokens initialized with empty character vectors
				455	expect_true(is.data.frame(q2@collectedMatches$atokens))
				456	expect_length(q2@collectedMatches$atokens$left[[1]], 0)
				457	expect_length(q2@collectedMatches$atokens$match[[1]], 0)
				458	expect_length(q2@collectedMatches$atokens$right[[1]], 0)
				459
				460	# POS/lemma/morph should also be empty vectors for this row
				461	expect_length(q2@collectedMatches$pos$left[[1]], 0)
				462	expect_length(q2@collectedMatches$lemma$left[[1]], 0)
				463	expect_length(q2@collectedMatches$morph$left[[1]], 0)
				464	})
				465
				466	test_that("initializes NA vectors when API returns NULL", {
				467	# Dummy connection returning NULL (e.g., failed request)
				468	if (!isClass("DummyKCO_NullRes")) setClass('DummyKCO_NullRes', slots = c(apiUrl='character', verbose='logical'))
				469	setMethod('apiCall', 'DummyKCO_NullRes', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
				470	NULL
				471	})
				472
				473	kco <- new('DummyKCO_NullRes', apiUrl = 'http://dummy/', verbose = FALSE)
				474	df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE)
				475	q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
				476
				477	q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
				478
				479	# Expect atokens initialized with NA vectors
				480	expect_true(is.data.frame(q2@collectedMatches$atokens))
				481	expect_true(length(q2@collectedMatches$atokens$left[[1]]) == 1 && all(is.na(q2@collectedMatches$atokens$left[[1]])))
				482	expect_true(length(q2@collectedMatches$atokens$match[[1]]) == 1 && all(is.na(q2@collectedMatches$atokens$match[[1]])))
				483	expect_true(length(q2@collectedMatches$atokens$right[[1]]) == 1 && all(is.na(q2@collectedMatches$atokens$right[[1]])))
				484
				485	# annotation_snippet should also be NA
				486	expect_true(is.na(q2@collectedMatches$annotation_snippet[[1]]))
				487	})
				488
				489	test_that("initializes NA vectors when apiCall errors", {
				490	# Dummy connection throwing an error
				491	if (!isClass("DummyKCO_Error")) setClass('DummyKCO_Error', slots = c(apiUrl='character', verbose='logical'))
				492	setMethod('apiCall', 'DummyKCO_Error', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
				493	stop("boom")
				494	})
				495
				496	kco <- new('DummyKCO_Error', apiUrl = 'http://dummy/', verbose = FALSE)
				497	df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE)
				498	q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
				499
				500	q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
				501
				502	# Expect NA vectors or empty vectors in atokens (implementation may choose either)
				503	is_na_or_empty <- function(x) length(x) == 0 \|\| (length(x) == 1 && all(is.na(x)))
				504	expect_true(is_na_or_empty(q2@collectedMatches$atokens$left[[1]]))
				505	expect_true(is_na_or_empty(q2@collectedMatches$atokens$match[[1]]))
				506	expect_true(is_na_or_empty(q2@collectedMatches$atokens$right[[1]]))
				507	})