Marc Kupietz | a29f3d4 | 2025-07-18 10:14:43 +0200 | [diff] [blame] | 1 | test_that("fetchAnnotations works with valid matches", { |
| 2 | skip_if_offline() |
| 3 | |
| 4 | kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL) |
| 5 | q <- kco %>% |
| 6 | corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>% |
| 7 | fetchNext(maxFetch = 2) |
| 8 | |
| 9 | # Skip test if no matches found |
| 10 | skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query") |
| 11 | |
| 12 | # Test that structured annotation columns are initially absent |
| 13 | expect_false("atokens" %in% colnames(q@collectedMatches)) |
| 14 | expect_false("pos" %in% colnames(q@collectedMatches)) |
| 15 | |
| 16 | # Test that matchID is preserved in collectedMatches |
| 17 | expect_true("matchID" %in% colnames(q@collectedMatches)) |
| 18 | expect_true(all(!is.na(q@collectedMatches$matchID))) |
| 19 | |
| 20 | # Test fetchAnnotations with default foundry |
| 21 | q_with_annotations <- fetchAnnotations(q, verbose = FALSE) |
| 22 | |
| 23 | # Check that structured annotation columns are now populated |
| 24 | expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches)) |
| 25 | expect_true("pos" %in% colnames(q_with_annotations@collectedMatches)) |
| 26 | |
| 27 | # Check that the structured columns have left/match/right components |
| 28 | expect_true(all(c("left", "match", "right") %in% names(q_with_annotations@collectedMatches$atokens))) |
| 29 | expect_true(all(c("left", "match", "right") %in% names(q_with_annotations@collectedMatches$pos))) |
| 30 | |
| 31 | # Test fetchAnnotations with specific foundry |
| 32 | q_with_tt <- fetchAnnotations(q, foundry = "tt", verbose = FALSE) |
| 33 | expect_true("atokens" %in% colnames(q_with_tt@collectedMatches)) |
| 34 | expect_true("pos" %in% colnames(q_with_tt@collectedMatches)) |
| 35 | |
| 36 | # Test that annotations contain actual content (regression test for URL construction) |
| 37 | if (nrow(q_with_tt@collectedMatches) > 0) { |
| 38 | # Check that the first match has populated annotation data |
| 39 | expect_true(length(q_with_tt@collectedMatches$atokens$left[[1]]) > 0 || |
| 40 | length(q_with_tt@collectedMatches$atokens$match[[1]]) > 0 || |
| 41 | length(q_with_tt@collectedMatches$atokens$right[[1]]) > 0) |
| 42 | expect_true(length(q_with_tt@collectedMatches$pos$left[[1]]) > 0 || |
| 43 | length(q_with_tt@collectedMatches$pos$match[[1]]) > 0 || |
| 44 | length(q_with_tt@collectedMatches$pos$right[[1]]) > 0) |
| 45 | } |
| 46 | }) |
| 47 | |
| 48 | test_that("fetchAnnotations handles empty matches gracefully", { |
| 49 | kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL) |
| 50 | |
| 51 | # Create a query object with no collected matches |
| 52 | q <- KorAPQuery( |
| 53 | korapConnection = kco, |
| 54 | collectedMatches = NULL |
| 55 | ) |
| 56 | |
| 57 | # Should warn and return original object |
| 58 | expect_warning( |
| 59 | result <- fetchAnnotations(q, verbose = FALSE), |
| 60 | "No collected matches found" |
| 61 | ) |
| 62 | expect_identical(result, q) |
| 63 | }) |
| 64 | |
| 65 | test_that("fetchAnnotations preserves original object structure", { |
| 66 | skip_if_offline() |
| 67 | |
| 68 | kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL) |
| 69 | q <- kco %>% |
| 70 | corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>% |
| 71 | fetchNext(maxFetch = 1) |
| 72 | |
| 73 | # Skip test if no matches found |
| 74 | skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query") |
| 75 | |
| 76 | q_original <- q |
| 77 | q_with_annotations <- fetchAnnotations(q, verbose = FALSE) |
| 78 | |
| 79 | # Check that all original slots are preserved |
| 80 | expect_identical(q_with_annotations@korapConnection, q_original@korapConnection) |
| 81 | expect_identical(q_with_annotations@request, q_original@request) |
| 82 | expect_identical(q_with_annotations@vc, q_original@vc) |
| 83 | expect_identical(q_with_annotations@totalResults, q_original@totalResults) |
| 84 | |
| 85 | # collectedMatches should have additional annotation columns |
| 86 | expect_true(nrow(q_with_annotations@collectedMatches) == nrow(q_original@collectedMatches)) |
| 87 | expect_true(ncol(q_with_annotations@collectedMatches) > ncol(q_original@collectedMatches)) |
| 88 | |
| 89 | # Original columns should be preserved |
| 90 | original_cols <- colnames(q_original@collectedMatches) |
| 91 | expect_true(all(original_cols %in% colnames(q_with_annotations@collectedMatches))) |
| 92 | |
| 93 | # New annotation columns should be present |
| 94 | expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches)) |
| 95 | expect_true("pos" %in% colnames(q_with_annotations@collectedMatches)) |
| 96 | }) |
| 97 | |
| 98 | test_that("fetchAnnotations returns structured left/match/right format", { |
| 99 | skip_if_offline() |
| 100 | |
| 101 | kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL) |
| 102 | q <- kco %>% |
| 103 | corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>% |
| 104 | fetchNext(maxFetch = 1) |
| 105 | |
| 106 | # Skip test if no matches found |
| 107 | skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query") |
| 108 | |
| 109 | q_with_annotations <- fetchAnnotations(q, foundry = "tt", verbose = FALSE) |
| 110 | |
| 111 | # Test that structured annotation columns exist |
| 112 | expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches)) |
| 113 | expect_true("pos" %in% colnames(q_with_annotations@collectedMatches)) |
| 114 | |
| 115 | # Test the structure of annotation columns |
| 116 | atokens <- q_with_annotations@collectedMatches$atokens |
| 117 | pos <- q_with_annotations@collectedMatches$pos |
| 118 | |
| 119 | expect_true(is.data.frame(atokens)) |
| 120 | expect_true(is.data.frame(pos)) |
| 121 | |
| 122 | expect_true(all(c("left", "match", "right") %in% names(atokens))) |
| 123 | expect_true(all(c("left", "match", "right") %in% names(pos))) |
| 124 | |
| 125 | # Test that each component is a list column |
| 126 | expect_true(is.list(atokens$left)) |
| 127 | expect_true(is.list(atokens$match)) |
| 128 | expect_true(is.list(atokens$right)) |
| 129 | expect_true(is.list(pos$left)) |
| 130 | expect_true(is.list(pos$match)) |
| 131 | expect_true(is.list(pos$right)) |
| 132 | |
| 133 | # Test that the first match has actual data |
| 134 | if (nrow(q_with_annotations@collectedMatches) > 0) { |
| 135 | # At least one of left/match/right should have content |
| 136 | total_tokens <- length(atokens$left[[1]]) + length(atokens$match[[1]]) + length(atokens$right[[1]]) |
| 137 | expect_true(total_tokens > 0) |
| 138 | |
| 139 | total_pos <- length(pos$left[[1]]) + length(pos$match[[1]]) + length(pos$right[[1]]) |
| 140 | expect_true(total_pos > 0) |
| 141 | |
| 142 | # Token count should match POS count |
| 143 | expect_equal(total_tokens, total_pos) |
| 144 | |
| 145 | # Match tokens should not be empty (since we found a match) |
| 146 | expect_true(length(atokens$match[[1]]) > 0) |
| 147 | expect_true(length(pos$match[[1]]) > 0) |
| 148 | } |
| 149 | }) |
| 150 | |
Marc Kupietz | 560b591 | 2025-09-01 17:36:13 +0200 | [diff] [blame] | 151 | test_that("parser covers full span across multiple <mark> blocks", { |
| 152 | # Local, offline test to ensure correct match extraction when multiple |
| 153 | # <mark>…</mark> segments occur within the match span. |
| 154 | xml_snippet <- '<span class="context-left"></span> |
| 155 | <span class="match"> |
| 156 | <span title="tt/l:Wir"><span title="tt/p:PPER">Wir</span></span> |
| 157 | <mark> |
| 158 | <span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span> |
| 159 | </mark> |
| 160 | <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span> |
| 161 | <mark> |
| 162 | <span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span> |
| 163 | <span title="tt/l:Plan"><span title="tt/p:NN">Plan</span></span> |
| 164 | </mark> |
| 165 | </span> |
| 166 | <span class="context-right"></span>' |
| 167 | |
| 168 | parsed <- parse_xml_annotations_structured(xml_snippet) |
| 169 | |
| 170 | # Left context contains the pre-mark token |
| 171 | expect_equal(parsed$atokens$left, c("Wir")) |
| 172 | |
| 173 | # Match should include everything from the first <mark> to the last </mark>, |
| 174 | # including tokens between them |
| 175 | expect_equal(parsed$atokens$match, c("können", "alles", "außer", "Plan")) |
| 176 | |
| 177 | # POS and lemma lengths align with tokens in each section |
| 178 | expect_length(parsed$pos$match, length(parsed$atokens$match)) |
| 179 | expect_length(parsed$lemma$match, length(parsed$atokens$match)) |
| 180 | expect_equal(parsed$pos$match, c("VVFIN", "PIS", "APPR", "NN")) |
| 181 | expect_equal(parsed$lemma$match, c("können", "alles", "außer", "Plan")) |
| 182 | |
| 183 | # Right context should be empty in this snippet |
| 184 | expect_length(parsed$atokens$right, 0) |
| 185 | }) |
| 186 | |
| 187 | test_that("parser keeps tokens separated across between spans", { |
| 188 | xml_snippet <- '<span class="context-left"></span> |
| 189 | <span class="match"> |
| 190 | <mark><span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span></mark> <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span><mark><span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span></mark> |
| 191 | </span> |
| 192 | <span class="context-right"></span>' |
| 193 | |
| 194 | parsed <- parse_xml_annotations_structured(xml_snippet) |
| 195 | expect_equal(parsed$atokens$match, c("können", "alles", "außer")) |
| 196 | expect_equal(parsed$pos$match, c("VVFIN", "PIS", "APPR")) |
| 197 | expect_equal(parsed$lemma$match, c("können", "alles", "außer")) |
| 198 | }) |
| 199 | |
| 200 | test_that("parser keeps tokens separated across punctuation between spans", { |
| 201 | xml_snippet <- '<span class="context-left"></span> |
| 202 | <span class="match"> |
| 203 | <mark><span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span></mark>, <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span><mark><span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span></mark> |
| 204 | </span> |
| 205 | <span class="context-right"></span>' |
| 206 | |
| 207 | parsed <- parse_xml_annotations_structured(xml_snippet) |
| 208 | expect_equal(parsed$atokens$match, c("können", "alles", "außer")) |
| 209 | expect_equal(parsed$pos$match, c("VVFIN", "PIS", "APPR")) |
| 210 | expect_equal(parsed$lemma$match, c("können", "alles", "außer")) |
| 211 | }) |
| 212 | |
| 213 | test_that("online: fetchAnnotations aligns pos/lemma with tokens for complex query", { |
| 214 | skip_if_offline() |
| 215 | |
| 216 | kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL) |
| 217 | q <- kco %>% |
| 218 | corpusQuery('[orth="[wW]ir"] können alles [orth="ausser" | orth="außer"] [tt/pos=NN]', |
| 219 | metadataOnly = FALSE, |
| 220 | fields = c("textSigle", "snippet", "tokens")) %>% |
| 221 | fetchNext(maxFetch = 20) |
| 222 | |
| 223 | skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for online test") |
| 224 | |
| 225 | q2 <- fetchAnnotations(q, foundry = "tt", verbose = FALSE) |
| 226 | |
| 227 | # For each match, POS and lemma counts must equal token count in the match span |
| 228 | for (i in seq_len(nrow(q2@collectedMatches))) { |
| 229 | tt <- q2@collectedMatches$tokens$match[[i]] |
| 230 | pp <- q2@collectedMatches$pos$match[[i]] |
| 231 | ll <- q2@collectedMatches$lemma$match[[i]] |
| 232 | expect_equal(length(tt), length(pp)) |
| 233 | expect_equal(length(tt), length(ll)) |
| 234 | } |
| 235 | }) |
| 236 | |
| 237 | test_that("fetchAnnotations aligns tokens and annotations across multiple <mark> blocks (stubbed API)", { |
| 238 | # Define a minimal dummy KorAPConnection-like S4 class for offline testing |
| 239 | setClass('DummyKCO', slots = c(apiUrl='character', verbose='logical')) |
| 240 | setMethod('apiCall', 'DummyKCO', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) { |
| 241 | list(snippet = '<span class="context-left"></span> <span class="match"> <span title="tt/l:Wir"><span title="tt/p:PPER">Wir</span></span> <mark> <span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span> </mark> <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span> <mark> <span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span> <span title="tt/l:Plan"><span title="tt/p:NN">Plan</span></span> </mark> </span> <span class="context-right"></span>') |
| 242 | }) |
| 243 | |
| 244 | # Build a minimal KorAPQuery with a dummy connection and a single match row |
| 245 | kco <- new('DummyKCO', apiUrl = 'http://dummy/', verbose = FALSE) |
| 246 | df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 5, matchID = 'match-A/B/C-p1-5', stringsAsFactors = FALSE) |
| 247 | q <- KorAPQuery(korapConnection = kco, collectedMatches = df) |
| 248 | |
| 249 | q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE) |
| 250 | |
| 251 | # Expect full match span to be covered |
| 252 | expect_equal(q2@collectedMatches$atokens$left[[1]], c('Wir')) |
| 253 | expect_equal(q2@collectedMatches$atokens$match[[1]], c('können','alles','außer','Plan')) |
| 254 | expect_equal(q2@collectedMatches$pos$match[[1]], c('VVFIN','PIS','APPR','NN')) |
| 255 | expect_equal(q2@collectedMatches$lemma$match[[1]], c('können','alles','außer','Plan')) |
| 256 | |
| 257 | # Alignment checks |
| 258 | expect_length(q2@collectedMatches$pos$match[[1]], length(q2@collectedMatches$atokens$match[[1]])) |
| 259 | expect_length(q2@collectedMatches$lemma$match[[1]], length(q2@collectedMatches$atokens$match[[1]])) |
| 260 | }) |
| 261 | |
Marc Kupietz | a29f3d4 | 2025-07-18 10:14:43 +0200 | [diff] [blame] | 262 | test_that("matchID is preserved in collectedMatches", { |
| 263 | skip_if_offline() |
| 264 | |
| 265 | kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL) |
| 266 | q <- kco %>% |
| 267 | corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>% |
| 268 | fetchNext(maxFetch = 1) |
| 269 | |
| 270 | # Skip test if no matches found |
| 271 | skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query") |
| 272 | |
| 273 | # Check that matchID is present and valid |
| 274 | expect_true("matchID" %in% colnames(q@collectedMatches)) |
| 275 | expect_true(all(!is.na(q@collectedMatches$matchID))) |
| 276 | |
| 277 | # Verify matchID format (should contain position information) |
| 278 | expect_true(all(grepl("-p\\d+-\\d+", q@collectedMatches$matchID))) |
| 279 | |
| 280 | # Verify that matchStart and matchEnd are correctly extracted from matchID |
| 281 | for (i in seq_len(nrow(q@collectedMatches))) { |
| 282 | match_id <- q@collectedMatches$matchID[i] |
| 283 | positions <- gsub(".*-p(\\d+)-(\\d+).*", "\\1 \\2", match_id) |
| 284 | expected_start <- as.integer(stringr::word(positions, 1)) |
| 285 | expected_end <- as.integer(stringr::word(positions, 2)) - 1 |
| 286 | |
| 287 | expect_equal(q@collectedMatches$matchStart[i], expected_start) |
| 288 | expect_equal(q@collectedMatches$matchEnd[i], expected_end) |
| 289 | } |
| 290 | }) |
Marc Kupietz | 7ff770e | 2025-07-18 19:07:10 +0200 | [diff] [blame] | 291 | |
| 292 | test_that("fetchAnnotations handles morphological annotations with pipe separators", { |
| 293 | skip_if_offline() |
| 294 | |
| 295 | kco <- KorAPConnection("https://korap.dnb.de", verbose = FALSE, cache = FALSE, accessToken = NULL) |
| 296 | q <- kco %>% |
| 297 | auth() %>% |
| 298 | corpusQuery("Ameisenplage", metadataOnly = FALSE) %>% |
| 299 | fetchNext(maxFetch = 1) |
| 300 | |
| 301 | # Skip test if no matches found |
| 302 | skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query") |
| 303 | |
| 304 | # Test with marmot foundry which provides morphological annotations |
| 305 | q_with_morph <- fetchAnnotations(q, foundry = "marmot", verbose = FALSE) |
| 306 | |
| 307 | # Check that morphological annotation columns exist |
| 308 | expect_true("morph" %in% colnames(q_with_morph@collectedMatches)) |
| 309 | expect_true("atokens" %in% colnames(q_with_morph@collectedMatches)) |
| 310 | |
| 311 | # Test the structure of morphological annotation columns |
| 312 | morph <- q_with_morph@collectedMatches$morph |
| 313 | expect_true(is.data.frame(morph)) |
| 314 | expect_true(all(c("left", "match", "right") %in% names(morph))) |
| 315 | expect_true(is.list(morph$match)) |
| 316 | |
| 317 | # Test that morphological features use pipe separators |
| 318 | if (nrow(q_with_morph@collectedMatches) > 0) { |
| 319 | morph_data <- morph$match[[1]] |
| 320 | |
| 321 | # Check that we have morphological data |
| 322 | expect_true(length(morph_data) > 0) |
| 323 | |
| 324 | # If morphological data exists and is not NA, it should contain pipe separators |
| 325 | # for multiple features (e.g., "case:acc|gender:fem|number:sg") |
| 326 | if (!is.na(morph_data[1]) && nchar(morph_data[1]) > 0) { |
| 327 | # Should contain morphological features separated by pipes |
| 328 | expect_true(grepl("^[^|]+", morph_data[1])) # At least one feature |
| 329 | |
| 330 | # If multiple features exist, they should be pipe-separated |
| 331 | if (grepl("\\|", morph_data[1])) { |
| 332 | features <- unlist(strsplit(morph_data[1], "\\|")) |
| 333 | expect_true(length(features) > 1) |
| 334 | # Each feature should follow the pattern "attribute:value" |
| 335 | expect_true(all(grepl("^[^:]+:[^:]+$", features))) |
| 336 | } |
| 337 | } |
| 338 | } |
| 339 | }) |
Marc Kupietz | 93787d5 | 2025-09-03 13:33:25 +0200 | [diff] [blame] | 340 | |
| 341 | test_that("fetchAnnotations adds missing layer without overwriting existing, and can overwrite when requested", { |
| 342 | # Define a separate dummy connection that serves different snippets by foundry |
| 343 | if (!isClass("DummyKCO2")) setClass('DummyKCO2', slots = c(apiUrl='character', verbose='logical')) |
| 344 | setMethod('apiCall', 'DummyKCO2', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) { |
| 345 | # Return TT-only snippet by default, and TT+MarMoT morph when foundry=marmot |
| 346 | tt_xml <- '<span class="context-left"></span> |
| 347 | <span class="match"> |
| 348 | <mark><span title="tt/l:können tt/p:VVFIN">können</span></mark> <span title="tt/l:alles tt/p:PIS">alles</span><mark><span title="tt/l:außer tt/p:APPR">außer</span></mark> |
| 349 | </span> |
| 350 | <span class="context-right"></span>' |
| 351 | marmot_xml <- '<span class="context-left"></span> |
| 352 | <span class="match"> |
| 353 | <mark><span title="tt/l:können tt/p:VVFIN marmot/m:verbform:fin">können</span></mark> <span title="tt/l:alles tt/p:PIS marmot/m:pos:pron">alles</span><mark><span title="tt/l:außer tt/p:APPR marmot/m:pos:adp|case:acc">außer</span></mark> |
| 354 | </span> |
| 355 | <span class="context-right"></span>' |
| 356 | if (grepl("foundry=marmot", url)) list(snippet = marmot_xml) else list(snippet = tt_xml) |
| 357 | }) |
| 358 | |
| 359 | # Build query with one match row |
| 360 | kco <- new('DummyKCO2', apiUrl = 'http://dummy/', verbose = FALSE) |
| 361 | df <- data.frame(textSigle = 'X/Y/Z', matchStart = 1, matchEnd = 3, matchID = 'match-X/Y/Z-p1-3', stringsAsFactors = FALSE) |
| 362 | q <- KorAPQuery(korapConnection = kco, collectedMatches = df) |
| 363 | |
| 364 | # First call with TT: should populate pos/lemma, morph empty/NA |
| 365 | q1 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE) |
| 366 | pos_tt <- q1@collectedMatches$pos$match[[1]] |
| 367 | lem_tt <- q1@collectedMatches$lemma$match[[1]] |
| 368 | |
| 369 | expect_equal(pos_tt, c('VVFIN','PIS','APPR')) |
| 370 | expect_equal(lem_tt, c('können','alles','außer')) |
| 371 | # Morph should be empty or NA-only at this point |
| 372 | morph1 <- q1@collectedMatches$morph$match[[1]] |
| 373 | expect_true(length(morph1) == 0 || all(is.na(morph1))) |
| 374 | |
| 375 | # Second call with marmot: should add morph but keep pos/lemma unchanged when overwrite=FALSE |
| 376 | q2 <- fetchAnnotations(q1, foundry = 'marmot', verbose = FALSE) |
| 377 | expect_equal(q2@collectedMatches$pos$match[[1]], pos_tt) |
| 378 | expect_equal(q2@collectedMatches$lemma$match[[1]], lem_tt) |
| 379 | |
| 380 | morph2 <- q2@collectedMatches$morph$match[[1]] |
| 381 | expect_equal(morph2, c('verbform:fin','pos:pron','pos:adp|case:acc')) |
| 382 | |
| 383 | # Corrupt existing POS and ensure overwrite=TRUE repairs it |
| 384 | q2@collectedMatches$pos$match[[1]][1] <- 'DAMAGED' |
| 385 | q3 <- fetchAnnotations(q2, foundry = 'tt', overwrite = TRUE, verbose = FALSE) |
| 386 | expect_equal(q3@collectedMatches$pos$match[[1]][1], 'VVFIN') |
| 387 | }) |
Marc Kupietz | 2baf5c5 | 2025-09-05 16:41:11 +0200 | [diff] [blame^] | 388 | |
| 389 | ## Additional offline edge-case tests were reverted per request |
| 390 | |
| 391 | test_that("annotation_snippet is preserved unless overwrite is TRUE", { |
| 392 | # Reuse DummyKCO2 logic (TT vs marmot snippets) |
| 393 | if (!isClass("DummyKCO3")) setClass('DummyKCO3', slots = c(apiUrl='character', verbose='logical')) |
| 394 | setMethod('apiCall', 'DummyKCO3', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) { |
| 395 | tt_xml <- '<span class="context-left"></span><span class="match"><mark><span title="tt/l:Test tt/p:NN">Test</span></mark></span><span class="context-right"></span>' |
| 396 | marmot_xml <- '<span class="context-left"></span><span class="match"><mark><span title="tt/l:Test tt/p:NN marmot/m:pos:n">Test</span></mark></span><span class="context-right"></span>' |
| 397 | if (grepl('foundry=marmot', url)) list(snippet = marmot_xml) else list(snippet = tt_xml) |
| 398 | }) |
| 399 | kco <- new('DummyKCO3', apiUrl = 'http://dummy/', verbose = FALSE) |
| 400 | df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE) |
| 401 | q <- KorAPQuery(korapConnection = kco, collectedMatches = df) |
| 402 | |
| 403 | q1 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE) |
| 404 | sn1 <- q1@collectedMatches$annotation_snippet[[1]] |
| 405 | q2 <- fetchAnnotations(q1, foundry = 'marmot', verbose = FALSE) |
| 406 | # snippet should be unchanged because overwrite = FALSE |
| 407 | expect_identical(q2@collectedMatches$annotation_snippet[[1]], sn1) |
| 408 | # overwrite = TRUE should replace it |
| 409 | q3 <- fetchAnnotations(q2, foundry = 'marmot', overwrite = TRUE, verbose = FALSE) |
| 410 | expect_false(identical(q3@collectedMatches$annotation_snippet[[1]], sn1)) |
| 411 | }) |
| 412 | |
| 413 | test_that("initializes empty vectors when no snippet is returned", { |
| 414 | # Dummy connection returning a list without 'snippet' |
| 415 | if (!isClass("DummyKCO_NoSnip")) setClass('DummyKCO_NoSnip', slots = c(apiUrl='character', verbose='logical')) |
| 416 | setMethod('apiCall', 'DummyKCO_NoSnip', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) { |
| 417 | list(status = "ok") |
| 418 | }) |
| 419 | |
| 420 | kco <- new('DummyKCO_NoSnip', apiUrl = 'http://dummy/', verbose = FALSE) |
| 421 | df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE) |
| 422 | q <- KorAPQuery(korapConnection = kco, collectedMatches = df) |
| 423 | |
| 424 | q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE) |
| 425 | |
| 426 | # Expect atokens initialized with empty character vectors |
| 427 | expect_true(is.data.frame(q2@collectedMatches$atokens)) |
| 428 | expect_length(q2@collectedMatches$atokens$left[[1]], 0) |
| 429 | expect_length(q2@collectedMatches$atokens$match[[1]], 0) |
| 430 | expect_length(q2@collectedMatches$atokens$right[[1]], 0) |
| 431 | |
| 432 | # POS/lemma/morph should also be empty vectors for this row |
| 433 | expect_length(q2@collectedMatches$pos$left[[1]], 0) |
| 434 | expect_length(q2@collectedMatches$lemma$left[[1]], 0) |
| 435 | expect_length(q2@collectedMatches$morph$left[[1]], 0) |
| 436 | }) |
| 437 | |
| 438 | test_that("initializes NA vectors when API returns NULL", { |
| 439 | # Dummy connection returning NULL (e.g., failed request) |
| 440 | if (!isClass("DummyKCO_NullRes")) setClass('DummyKCO_NullRes', slots = c(apiUrl='character', verbose='logical')) |
| 441 | setMethod('apiCall', 'DummyKCO_NullRes', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) { |
| 442 | NULL |
| 443 | }) |
| 444 | |
| 445 | kco <- new('DummyKCO_NullRes', apiUrl = 'http://dummy/', verbose = FALSE) |
| 446 | df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE) |
| 447 | q <- KorAPQuery(korapConnection = kco, collectedMatches = df) |
| 448 | |
| 449 | q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE) |
| 450 | |
| 451 | # Expect atokens initialized with NA vectors |
| 452 | expect_true(is.data.frame(q2@collectedMatches$atokens)) |
| 453 | expect_true(length(q2@collectedMatches$atokens$left[[1]]) == 1 && all(is.na(q2@collectedMatches$atokens$left[[1]]))) |
| 454 | expect_true(length(q2@collectedMatches$atokens$match[[1]]) == 1 && all(is.na(q2@collectedMatches$atokens$match[[1]]))) |
| 455 | expect_true(length(q2@collectedMatches$atokens$right[[1]]) == 1 && all(is.na(q2@collectedMatches$atokens$right[[1]]))) |
| 456 | |
| 457 | # annotation_snippet should also be NA |
| 458 | expect_true(is.na(q2@collectedMatches$annotation_snippet[[1]])) |
| 459 | }) |
| 460 | |
| 461 | test_that("initializes NA vectors when apiCall errors", { |
| 462 | # Dummy connection throwing an error |
| 463 | if (!isClass("DummyKCO_Error")) setClass('DummyKCO_Error', slots = c(apiUrl='character', verbose='logical')) |
| 464 | setMethod('apiCall', 'DummyKCO_Error', function(kco, url, json = TRUE, getHeaders = FALSE, cache = FALSE, timeout = 10) { |
| 465 | stop("boom") |
| 466 | }) |
| 467 | |
| 468 | kco <- new('DummyKCO_Error', apiUrl = 'http://dummy/', verbose = FALSE) |
| 469 | df <- data.frame(textSigle = 'A/B/C', matchStart = 1, matchEnd = 2, matchID = 'match-A/B/C-p1-2', stringsAsFactors = FALSE) |
| 470 | q <- KorAPQuery(korapConnection = kco, collectedMatches = df) |
| 471 | |
| 472 | q2 <- fetchAnnotations(q, foundry = 'tt', verbose = FALSE) |
| 473 | |
| 474 | # Expect NA vectors or empty vectors in atokens (implementation may choose either) |
| 475 | is_na_or_empty <- function(x) length(x) == 0 || (length(x) == 1 && all(is.na(x))) |
| 476 | expect_true(is_na_or_empty(q2@collectedMatches$atokens$left[[1]])) |
| 477 | expect_true(is_na_or_empty(q2@collectedMatches$atokens$match[[1]])) |
| 478 | expect_true(is_na_or_empty(q2@collectedMatches$atokens$right[[1]])) |
| 479 | }) |