blob: 20a73cab825937abc00ce3bf3506ebda7a2b8819 [file] [log] [blame]
Marc Kupietza29f3d42025-07-18 10:14:43 +02001test_that("fetchAnnotations works with valid matches", {
2 skip_if_offline()
3
4 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
5 q <- kco %>%
6 corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
7 fetchNext(maxFetch = 2)
8
9 # Skip test if no matches found
10 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query")
11
12 # Test that structured annotation columns are initially absent
13 expect_false("atokens" %in% colnames(q@collectedMatches))
14 expect_false("pos" %in% colnames(q@collectedMatches))
15
16 # Test that matchID is preserved in collectedMatches
17 expect_true("matchID" %in% colnames(q@collectedMatches))
18 expect_true(all(!is.na(q@collectedMatches$matchID)))
19
20 # Test fetchAnnotations with default foundry
21 q_with_annotations <- fetchAnnotations(q, verbose = FALSE)
22
23 # Check that structured annotation columns are now populated
24 expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches))
25 expect_true("pos" %in% colnames(q_with_annotations@collectedMatches))
26
27 # Check that the structured columns have left/match/right components
28 expect_true(all(c("left", "match", "right") %in% names(q_with_annotations@collectedMatches$atokens)))
29 expect_true(all(c("left", "match", "right") %in% names(q_with_annotations@collectedMatches$pos)))
30
31 # Test fetchAnnotations with specific foundry
32 q_with_tt <- fetchAnnotations(q, foundry = "tt", verbose = FALSE)
33 expect_true("atokens" %in% colnames(q_with_tt@collectedMatches))
34 expect_true("pos" %in% colnames(q_with_tt@collectedMatches))
35
36 # Test that annotations contain actual content (regression test for URL construction)
37 if (nrow(q_with_tt@collectedMatches) > 0) {
38 # Check that the first match has populated annotation data
39 expect_true(length(q_with_tt@collectedMatches$atokens$left[[1]]) > 0 ||
40 length(q_with_tt@collectedMatches$atokens$match[[1]]) > 0 ||
41 length(q_with_tt@collectedMatches$atokens$right[[1]]) > 0)
42 expect_true(length(q_with_tt@collectedMatches$pos$left[[1]]) > 0 ||
43 length(q_with_tt@collectedMatches$pos$match[[1]]) > 0 ||
44 length(q_with_tt@collectedMatches$pos$right[[1]]) > 0)
45 }
46})
47
48test_that("fetchAnnotations handles empty matches gracefully", {
49 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
50
51 # Create a query object with no collected matches
52 q <- KorAPQuery(
53 korapConnection = kco,
54 collectedMatches = NULL
55 )
56
57 # Should warn and return original object
58 expect_warning(
59 result <- fetchAnnotations(q, verbose = FALSE),
60 "No collected matches found"
61 )
62 expect_identical(result, q)
63})
64
65test_that("fetchAnnotations preserves original object structure", {
66 skip_if_offline()
67
68 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
69 q <- kco %>%
70 corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
71 fetchNext(maxFetch = 1)
72
73 # Skip test if no matches found
74 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query")
75
76 q_original <- q
77 q_with_annotations <- fetchAnnotations(q, verbose = FALSE)
78
79 # Check that all original slots are preserved
80 expect_identical(q_with_annotations@korapConnection, q_original@korapConnection)
81 expect_identical(q_with_annotations@request, q_original@request)
82 expect_identical(q_with_annotations@vc, q_original@vc)
83 expect_identical(q_with_annotations@totalResults, q_original@totalResults)
84
85 # collectedMatches should have additional annotation columns
86 expect_true(nrow(q_with_annotations@collectedMatches) == nrow(q_original@collectedMatches))
87 expect_true(ncol(q_with_annotations@collectedMatches) > ncol(q_original@collectedMatches))
88
89 # Original columns should be preserved
90 original_cols <- colnames(q_original@collectedMatches)
91 expect_true(all(original_cols %in% colnames(q_with_annotations@collectedMatches)))
92
93 # New annotation columns should be present
94 expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches))
95 expect_true("pos" %in% colnames(q_with_annotations@collectedMatches))
96})
97
98test_that("fetchAnnotations returns structured left/match/right format", {
99 skip_if_offline()
100
101 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
102 q <- kco %>%
103 corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
104 fetchNext(maxFetch = 1)
105
106 # Skip test if no matches found
107 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query")
108
109 q_with_annotations <- fetchAnnotations(q, foundry = "tt", verbose = FALSE)
110
111 # Test that structured annotation columns exist
112 expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches))
113 expect_true("pos" %in% colnames(q_with_annotations@collectedMatches))
114
115 # Test the structure of annotation columns
116 atokens <- q_with_annotations@collectedMatches$atokens
117 pos <- q_with_annotations@collectedMatches$pos
118
119 expect_true(is.data.frame(atokens))
120 expect_true(is.data.frame(pos))
121
122 expect_true(all(c("left", "match", "right") %in% names(atokens)))
123 expect_true(all(c("left", "match", "right") %in% names(pos)))
124
125 # Test that each component is a list column
126 expect_true(is.list(atokens$left))
127 expect_true(is.list(atokens$match))
128 expect_true(is.list(atokens$right))
129 expect_true(is.list(pos$left))
130 expect_true(is.list(pos$match))
131 expect_true(is.list(pos$right))
132
133 # Test that the first match has actual data
134 if (nrow(q_with_annotations@collectedMatches) > 0) {
135 # At least one of left/match/right should have content
136 total_tokens <- length(atokens$left[[1]]) + length(atokens$match[[1]]) + length(atokens$right[[1]])
137 expect_true(total_tokens > 0)
138
139 total_pos <- length(pos$left[[1]]) + length(pos$match[[1]]) + length(pos$right[[1]])
140 expect_true(total_pos > 0)
141
142 # Token count should match POS count
143 expect_equal(total_tokens, total_pos)
144
145 # Match tokens should not be empty (since we found a match)
146 expect_true(length(atokens$match[[1]]) > 0)
147 expect_true(length(pos$match[[1]]) > 0)
148 }
149})
150
Marc Kupietz560b5912025-09-01 17:36:13 +0200151test_that("parser covers full span across multiple <mark> blocks", {
152 # Local, offline test to ensure correct match extraction when multiple
153 # <mark>…</mark> segments occur within the match span.
154 xml_snippet <- '<span class="context-left"></span>
155 <span class="match">
156 <span title="tt/l:Wir"><span title="tt/p:PPER">Wir</span></span>
157 <mark>
158 <span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span>
159 </mark>
160 <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span>
161 <mark>
162 <span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span>
163 <span title="tt/l:Plan"><span title="tt/p:NN">Plan</span></span>
164 </mark>
165 </span>
166 <span class="context-right"></span>'
167
168 parsed <- parse_xml_annotations_structured(xml_snippet)
169
170 # Left context contains the pre-mark token
171 expect_equal(parsed$atokens$left, c("Wir"))
172
173 # Match should include everything from the first <mark> to the last </mark>,
174 # including tokens between them
175 expect_equal(parsed$atokens$match, c("können", "alles", "außer", "Plan"))
176
177 # POS and lemma lengths align with tokens in each section
178 expect_length(parsed$pos$match, length(parsed$atokens$match))
179 expect_length(parsed$lemma$match, length(parsed$atokens$match))
180 expect_equal(parsed$pos$match, c("VVFIN", "PIS", "APPR", "NN"))
181 expect_equal(parsed$lemma$match, c("können", "alles", "außer", "Plan"))
182
183 # Right context should be empty in this snippet
184 expect_length(parsed$atokens$right, 0)
185})
186
187test_that("parser keeps tokens separated across &nbsp; between spans", {
188 xml_snippet <- '<span class="context-left"></span>
189 <span class="match">
190 <mark><span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span></mark>&nbsp;<span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span><mark><span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span></mark>
191 </span>
192 <span class="context-right"></span>'
193
194 parsed <- parse_xml_annotations_structured(xml_snippet)
195 expect_equal(parsed$atokens$match, c("können", "alles", "außer"))
196 expect_equal(parsed$pos$match, c("VVFIN", "PIS", "APPR"))
197 expect_equal(parsed$lemma$match, c("können", "alles", "außer"))
198})
199
200test_that("parser keeps tokens separated across punctuation between spans", {
201 xml_snippet <- '<span class="context-left"></span>
202 <span class="match">
203 <mark><span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span></mark>, <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span><mark><span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span></mark>
204 </span>
205 <span class="context-right"></span>'
206
207 parsed <- parse_xml_annotations_structured(xml_snippet)
208 expect_equal(parsed$atokens$match, c("können", "alles", "außer"))
209 expect_equal(parsed$pos$match, c("VVFIN", "PIS", "APPR"))
210 expect_equal(parsed$lemma$match, c("können", "alles", "außer"))
211})
212
213test_that("online: fetchAnnotations aligns pos/lemma with tokens for complex query", {
214 skip_if_offline()
215
216 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
217 q <- kco %>%
218 corpusQuery('[orth="[wW]ir"] können alles [orth="ausser" | orth="außer"] [tt/pos=NN]',
219 metadataOnly = FALSE,
220 fields = c("textSigle", "snippet", "tokens")) %>%
221 fetchNext(maxFetch = 20)
222
223 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for online test")
224
225 q2 <- fetchAnnotations(q, foundry = "tt", verbose = FALSE)
226
227 # For each match, POS and lemma counts must equal token count in the match span
228 for (i in seq_len(nrow(q2@collectedMatches))) {
229 tt <- q2@collectedMatches$tokens$match[[i]]
230 pp <- q2@collectedMatches$pos$match[[i]]
231 ll <- q2@collectedMatches$lemma$match[[i]]
232 expect_equal(length(tt), length(pp))
233 expect_equal(length(tt), length(ll))
234 }
235})
236
237test_that("fetchAnnotations aligns tokens and annotations across multiple <mark> blocks (stubbed API)", {
238 # Define a minimal dummy KorAPConnection-like S4 class for offline testing
239 setClass('DummyKCO', slots = c(apiUrl='character', verbose='logical'))
240 setMethod('apiCall', 'DummyKCO', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
241 list(snippet = '<span class="context-left"></span> <span class="match"> <span title="tt/l:Wir"><span title="tt/p:PPER">Wir</span></span> <mark> <span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span> </mark> <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span> <mark> <span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span> <span title="tt/l:Plan"><span title="tt/p:NN">Plan</span></span> </mark> </span> <span class="context-right"></span>')
242 })
243
244 # Build a minimal KorAPQuery with a dummy connection and a single match row
245 kco <- new('DummyKCO', apiUrl = 'http://dummy/', verbose = FALSE)
246 df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 5, matchID = 'match-A/B/C-p1-5', stringsAsFactors = FALSE)
247 q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
248
249 q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
250
251 # Expect full match span to be covered
252 expect_equal(q2@collectedMatches$atokens$left[[1]], c('Wir'))
253 expect_equal(q2@collectedMatches$atokens$match[[1]], c('können','alles','außer','Plan'))
254 expect_equal(q2@collectedMatches$pos$match[[1]], c('VVFIN','PIS','APPR','NN'))
255 expect_equal(q2@collectedMatches$lemma$match[[1]], c('können','alles','außer','Plan'))
256
257 # Alignment checks
258 expect_length(q2@collectedMatches$pos$match[[1]], length(q2@collectedMatches$atokens$match[[1]]))
259 expect_length(q2@collectedMatches$lemma$match[[1]], length(q2@collectedMatches$atokens$match[[1]]))
260})
261
Marc Kupietza29f3d42025-07-18 10:14:43 +0200262test_that("matchID is preserved in collectedMatches", {
263 skip_if_offline()
264
265 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
266 q <- kco %>%
267 corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
268 fetchNext(maxFetch = 1)
269
270 # Skip test if no matches found
271 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query")
272
273 # Check that matchID is present and valid
274 expect_true("matchID" %in% colnames(q@collectedMatches))
275 expect_true(all(!is.na(q@collectedMatches$matchID)))
276
277 # Verify matchID format (should contain position information)
278 expect_true(all(grepl("-p\\d+-\\d+", q@collectedMatches$matchID)))
279
280 # Verify that matchStart and matchEnd are correctly extracted from matchID
281 for (i in seq_len(nrow(q@collectedMatches))) {
282 match_id <- q@collectedMatches$matchID[i]
283 positions <- gsub(".*-p(\\d+)-(\\d+).*", "\\1 \\2", match_id)
284 expected_start <- as.integer(stringr::word(positions, 1))
285 expected_end <- as.integer(stringr::word(positions, 2)) - 1
286
287 expect_equal(q@collectedMatches$matchStart[i], expected_start)
288 expect_equal(q@collectedMatches$matchEnd[i], expected_end)
289 }
290})
Marc Kupietz7ff770e2025-07-18 19:07:10 +0200291
292test_that("fetchAnnotations handles morphological annotations with pipe separators", {
293 skip_if_offline()
294
295 kco <- KorAPConnection("https://korap.dnb.de", verbose = FALSE, cache = FALSE, accessToken = NULL)
296 q <- kco %>%
297 auth() %>%
298 corpusQuery("Ameisenplage", metadataOnly = FALSE) %>%
299 fetchNext(maxFetch = 1)
300
301 # Skip test if no matches found
302 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query")
303
304 # Test with marmot foundry which provides morphological annotations
305 q_with_morph <- fetchAnnotations(q, foundry = "marmot", verbose = FALSE)
306
307 # Check that morphological annotation columns exist
308 expect_true("morph" %in% colnames(q_with_morph@collectedMatches))
309 expect_true("atokens" %in% colnames(q_with_morph@collectedMatches))
310
311 # Test the structure of morphological annotation columns
312 morph <- q_with_morph@collectedMatches$morph
313 expect_true(is.data.frame(morph))
314 expect_true(all(c("left", "match", "right") %in% names(morph)))
315 expect_true(is.list(morph$match))
316
317 # Test that morphological features use pipe separators
318 if (nrow(q_with_morph@collectedMatches) > 0) {
319 morph_data <- morph$match[[1]]
320
321 # Check that we have morphological data
322 expect_true(length(morph_data) > 0)
323
324 # If morphological data exists and is not NA, it should contain pipe separators
325 # for multiple features (e.g., "case:acc|gender:fem|number:sg")
326 if (!is.na(morph_data[1]) && nchar(morph_data[1]) > 0) {
327 # Should contain morphological features separated by pipes
328 expect_true(grepl("^[^|]+", morph_data[1])) # At least one feature
329
330 # If multiple features exist, they should be pipe-separated
331 if (grepl("\\|", morph_data[1])) {
332 features <- unlist(strsplit(morph_data[1], "\\|"))
333 expect_true(length(features) > 1)
334 # Each feature should follow the pattern "attribute:value"
335 expect_true(all(grepl("^[^:]+:[^:]+$", features)))
336 }
337 }
338 }
339})
Marc Kupietz93787d52025-09-03 13:33:25 +0200340
Marc Kupietzcd452182025-10-09 13:28:41 +0200341test_that("issue #30 regression keeps all morphological features", {
342 skip_if_offline()
343
344 matches <- KorAPConnection(verbose = FALSE, cache = FALSE) %>%
345 corpusQuery("aufgrund einer Ameisenplage", vc = "availability=/CC.*/", metadataOnly = FALSE) %>%
346 fetchNext(maxFetch = 5) %>%
347 fetchAnnotations("marmot", verbose = FALSE) %>%
348 slot("collectedMatches")
349
350 skip_if(is.null(matches) || nrow(matches) == 0, "No matches found for issue #30 query")
351
352 pos_tail <- sapply(matches[["pos"]][["match"]], tail, 1)
353 morph_tail <- sapply(matches[["morph"]][["match"]], tail, 1)
354
355 # Ensure we actually received morphological annotations for this query
356 skip_if(all(is.na(morph_tail)), "No morphological annotations returned for issue #30 query")
357
358 # POS should still be available for the same tokens (original sanity check from issue report)
359 expect_true(all(!is.na(pos_tail)))
360
361 # Regression assertion: MarMoT morph tail must keep all features, not just the last one
362 observed_morph <- morph_tail[!is.na(morph_tail)]
363 expect_true(all(grepl("\\|", observed_morph)))
364 expect_true(all(grepl("case:", observed_morph)))
365 expect_true(all(grepl("gender:", observed_morph)))
366 expect_true(all(grepl("number:", observed_morph)))
367})
368
Marc Kupietz93787d52025-09-03 13:33:25 +0200369test_that("fetchAnnotations adds missing layer without overwriting existing, and can overwrite when requested", {
370 # Define a separate dummy connection that serves different snippets by foundry
371 if (!isClass("DummyKCO2")) setClass('DummyKCO2', slots = c(apiUrl='character', verbose='logical'))
372 setMethod('apiCall', 'DummyKCO2', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
373 # Return TT-only snippet by default, and TT+MarMoT morph when foundry=marmot
374 tt_xml <- '<span class="context-left"></span>
375 <span class="match">
376 <mark><span title="tt/l:können tt/p:VVFIN">können</span></mark>&nbsp;<span title="tt/l:alles tt/p:PIS">alles</span><mark><span title="tt/l:außer tt/p:APPR">außer</span></mark>
377 </span>
378 <span class="context-right"></span>'
379 marmot_xml <- '<span class="context-left"></span>
380 <span class="match">
381 <mark><span title="tt/l:können tt/p:VVFIN marmot/m:verbform:fin">können</span></mark>&nbsp;<span title="tt/l:alles tt/p:PIS marmot/m:pos:pron">alles</span><mark><span title="tt/l:außer tt/p:APPR marmot/m:pos:adp|case:acc">außer</span></mark>
382 </span>
383 <span class="context-right"></span>'
384 if (grepl("foundry=marmot", url)) list(snippet = marmot_xml) else list(snippet = tt_xml)
385 })
386
387 # Build query with one match row
388 kco <- new('DummyKCO2', apiUrl = 'http://dummy/', verbose = FALSE)
389 df <- data.frame(textSigle = 'X/Y/Z', matchStart = 1, matchEnd = 3, matchID = 'match-X/Y/Z-p1-3', stringsAsFactors = FALSE)
390 q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
391
392 # First call with TT: should populate pos/lemma, morph empty/NA
393 q1 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
394 pos_tt <- q1@collectedMatches$pos$match[[1]]
395 lem_tt <- q1@collectedMatches$lemma$match[[1]]
396
397 expect_equal(pos_tt, c('VVFIN','PIS','APPR'))
398 expect_equal(lem_tt, c('können','alles','außer'))
399 # Morph should be empty or NA-only at this point
400 morph1 <- q1@collectedMatches$morph$match[[1]]
401 expect_true(length(morph1) == 0 || all(is.na(morph1)))
402
403 # Second call with marmot: should add morph but keep pos/lemma unchanged when overwrite=FALSE
404 q2 <- fetchAnnotations(q1, foundry = 'marmot', verbose = FALSE)
405 expect_equal(q2@collectedMatches$pos$match[[1]], pos_tt)
406 expect_equal(q2@collectedMatches$lemma$match[[1]], lem_tt)
407
408 morph2 <- q2@collectedMatches$morph$match[[1]]
409 expect_equal(morph2, c('verbform:fin','pos:pron','pos:adp|case:acc'))
410
411 # Corrupt existing POS and ensure overwrite=TRUE repairs it
412 q2@collectedMatches$pos$match[[1]][1] <- 'DAMAGED'
413 q3 <- fetchAnnotations(q2, foundry = 'tt', overwrite = TRUE, verbose = FALSE)
414 expect_equal(q3@collectedMatches$pos$match[[1]][1], 'VVFIN')
415})
Marc Kupietz2baf5c52025-09-05 16:41:11 +0200416
417## Additional offline edge-case tests were reverted per request
418
419test_that("annotation_snippet is preserved unless overwrite is TRUE", {
420 # Reuse DummyKCO2 logic (TT vs marmot snippets)
421 if (!isClass("DummyKCO3")) setClass('DummyKCO3', slots = c(apiUrl='character', verbose='logical'))
422 setMethod('apiCall', 'DummyKCO3', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
423 tt_xml <- '<span class="context-left"></span><span class="match"><mark><span title="tt/l:Test tt/p:NN">Test</span></mark></span><span class="context-right"></span>'
424 marmot_xml <- '<span class="context-left"></span><span class="match"><mark><span title="tt/l:Test tt/p:NN marmot/m:pos:n">Test</span></mark></span><span class="context-right"></span>'
425 if (grepl('foundry=marmot', url)) list(snippet = marmot_xml) else list(snippet = tt_xml)
426 })
427 kco <- new('DummyKCO3', apiUrl = 'http://dummy/', verbose = FALSE)
428 df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE)
429 q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
430
431 q1 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
432 sn1 <- q1@collectedMatches$annotation_snippet[[1]]
433 q2 <- fetchAnnotations(q1, foundry = 'marmot', verbose = FALSE)
434 # snippet should be unchanged because overwrite = FALSE
435 expect_identical(q2@collectedMatches$annotation_snippet[[1]], sn1)
436 # overwrite = TRUE should replace it
437 q3 <- fetchAnnotations(q2, foundry = 'marmot', overwrite = TRUE, verbose = FALSE)
438 expect_false(identical(q3@collectedMatches$annotation_snippet[[1]], sn1))
439})
440
441test_that("initializes empty vectors when no snippet is returned", {
442 # Dummy connection returning a list without 'snippet'
443 if (!isClass("DummyKCO_NoSnip")) setClass('DummyKCO_NoSnip', slots = c(apiUrl='character', verbose='logical'))
444 setMethod('apiCall', 'DummyKCO_NoSnip', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
445 list(status = "ok")
446 })
447
448 kco <- new('DummyKCO_NoSnip', apiUrl = 'http://dummy/', verbose = FALSE)
449 df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE)
450 q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
451
452 q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
453
454 # Expect atokens initialized with empty character vectors
455 expect_true(is.data.frame(q2@collectedMatches$atokens))
456 expect_length(q2@collectedMatches$atokens$left[[1]], 0)
457 expect_length(q2@collectedMatches$atokens$match[[1]], 0)
458 expect_length(q2@collectedMatches$atokens$right[[1]], 0)
459
460 # POS/lemma/morph should also be empty vectors for this row
461 expect_length(q2@collectedMatches$pos$left[[1]], 0)
462 expect_length(q2@collectedMatches$lemma$left[[1]], 0)
463 expect_length(q2@collectedMatches$morph$left[[1]], 0)
464})
465
466test_that("initializes NA vectors when API returns NULL", {
467 # Dummy connection returning NULL (e.g., failed request)
468 if (!isClass("DummyKCO_NullRes")) setClass('DummyKCO_NullRes', slots = c(apiUrl='character', verbose='logical'))
469 setMethod('apiCall', 'DummyKCO_NullRes', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
470 NULL
471 })
472
473 kco <- new('DummyKCO_NullRes', apiUrl = 'http://dummy/', verbose = FALSE)
474 df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE)
475 q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
476
477 q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
478
479 # Expect atokens initialized with NA vectors
480 expect_true(is.data.frame(q2@collectedMatches$atokens))
481 expect_true(length(q2@collectedMatches$atokens$left[[1]]) == 1 && all(is.na(q2@collectedMatches$atokens$left[[1]])))
482 expect_true(length(q2@collectedMatches$atokens$match[[1]]) == 1 && all(is.na(q2@collectedMatches$atokens$match[[1]])))
483 expect_true(length(q2@collectedMatches$atokens$right[[1]]) == 1 && all(is.na(q2@collectedMatches$atokens$right[[1]])))
484
485 # annotation_snippet should also be NA
486 expect_true(is.na(q2@collectedMatches$annotation_snippet[[1]]))
487})
488
489test_that("initializes NA vectors when apiCall errors", {
490 # Dummy connection throwing an error
491 if (!isClass("DummyKCO_Error")) setClass('DummyKCO_Error', slots = c(apiUrl='character', verbose='logical'))
492 setMethod('apiCall', 'DummyKCO_Error', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) {
493 stop("boom")
494 })
495
496 kco <- new('DummyKCO_Error', apiUrl = 'http://dummy/', verbose = FALSE)
497 df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE)
498 q <- KorAPQuery(korapConnection = kco, collectedMatches = df)
499
500 q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE)
501
502 # Expect NA vectors or empty vectors in atokens (implementation may choose either)
503 is_na_or_empty <- function(x) length(x) == 0 || (length(x) == 1 && all(is.na(x)))
504 expect_true(is_na_or_empty(q2@collectedMatches$atokens$left[[1]]))
505 expect_true(is_na_or_empty(q2@collectedMatches$atokens$match[[1]]))
506 expect_true(is_na_or_empty(q2@collectedMatches$atokens$right[[1]]))
507})