Fix fetchAnnotations getting all morph values Now using xml2 package to properly parse the query result resolves #30 Change-Id: I827cae94791f14ce0955352fd64fd7e5683a92e6

commit: cd4521884fd23558757338fcbbf6e67702f9a780 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Thu Oct 09 13:28:41 2025 +0200
committer: Marc Kupietz <kupietz@ids-mannheim.de> Thu Oct 09 14:53:00 2025 +0200
tree: db32df9430c4d52b629dbe9368782d8174f8a9b0
parent: 336c85d5c1d3986e1846c2b7e5dee6a4c40192e9 [diff]
diff --git a/tests/testthat/test-fetchAnnotations.R b/tests/testthat/test-fetchAnnotations.R
index 117f93a..20a73ca 100644
--- a/tests/testthat/test-fetchAnnotations.R
+++ b/tests/testthat/test-fetchAnnotations.R

@@ -338,6 +338,34 @@
   }
 })
 
+test_that("issue #30 regression keeps all morphological features", {
+  skip_if_offline()
+
+  matches <- KorAPConnection(verbose = FALSE, cache = FALSE) %>%
+    corpusQuery("aufgrund einer Ameisenplage", vc = "availability=/CC.*/", metadataOnly = FALSE) %>%
+    fetchNext(maxFetch = 5) %>%
+    fetchAnnotations("marmot", verbose = FALSE) %>%
+    slot("collectedMatches")
+
+  skip_if(is.null(matches) || nrow(matches) == 0, "No matches found for issue #30 query")
+
+  pos_tail <- sapply(matches[["pos"]][["match"]], tail, 1)
+  morph_tail <- sapply(matches[["morph"]][["match"]], tail, 1)
+
+  # Ensure we actually received morphological annotations for this query
+  skip_if(all(is.na(morph_tail)), "No morphological annotations returned for issue #30 query")
+
+  # POS should still be available for the same tokens (original sanity check from issue report)
+  expect_true(all(!is.na(pos_tail)))
+
+  # Regression assertion: MarMoT morph tail must keep all features, not just the last one
+  observed_morph <- morph_tail[!is.na(morph_tail)]
+  expect_true(all(grepl("\\|", observed_morph)))
+  expect_true(all(grepl("case:", observed_morph)))
+  expect_true(all(grepl("gender:", observed_morph)))
+  expect_true(all(grepl("number:", observed_morph)))
+})
+
 test_that("fetchAnnotations adds missing layer without overwriting existing, and can overwrite when requested", {
   # Define a separate dummy connection that serves different snippets by foundry
   if (!isClass("DummyKCO2")) setClass('DummyKCO2', slots = c(apiUrl='character', verbose='logical'))

diff --git a/tests/testthat/test-parse-annotations.R b/tests/testthat/test-parse-annotations.R
index 502f17a..4f65d57 100644
--- a/tests/testthat/test-parse-annotations.R
+++ b/tests/testthat/test-parse-annotations.R

@@ -31,7 +31,7 @@
   parsed <- RKorAPClient:::parse_xml_annotations(xml_snippet)
 
   expect_equal(parsed$token, c("Wir", "können", "alles", "außer", "Plan"))
-  expect_equal(parsed$pos,   c("PPER", "VVFIN", "PIS", "APPR", "NN"))
+  expect_equal(parsed$pos, c("PPER", "VVFIN", "PIS", "APPR", "NN"))
   expect_equal(parsed$lemma, c("Wir", "können", "alles", "außer", "Plan"))
 
   # morph not present in snippet; should be NA-aligned to tokens
@@ -50,9 +50,9 @@
   parsed <- RKorAPClient:::parse_xml_annotations(xml_snippet)
 
   expect_equal(parsed$token, c("Haus", "können", "gehen"))
-  expect_equal(parsed$pos,   c("NN",   "VVFIN", NA))
-  expect_equal(parsed$lemma, c(NA,      "können", "gehen"))
-  expect_equal(parsed$morph, c(NA,      "verbform:fin", NA))
+  expect_equal(parsed$pos, c("NN", "VVFIN", NA))
+  expect_equal(parsed$lemma, c(NA, "können", "gehen"))
+  expect_equal(parsed$morph, c(NA, "verbform:fin", NA))
 
   # Vectors must be equal length
   n <- length(parsed$token)
@@ -61,3 +61,42 @@
   expect_length(parsed$morph, n)
 })
 
+test_that("parsers retain all morphological features from nested spans", {
+  xml_snippet <- '<span class="context-left"></span>
+  <span class="match">
+    <mark>
+      <span title="marmot/m:number:sg">
+        <span title="marmot/m:case:* marmot/m:case:fem">
+          <span title="tt/l:Ameisenplage tt/p:NN">Ameisenplage</span>
+        </span>
+      </span>
+    </mark>
+  </span>
+  <span class="context-right"></span>'
+
+  basic <- RKorAPClient:::parse_xml_annotations(xml_snippet)
+  structured <- RKorAPClient:::parse_xml_annotations_structured(xml_snippet)
+
+  expect_equal(basic$token, "Ameisenplage")
+  expect_equal(structured$atokens$match, "Ameisenplage")
+
+  basic_feats <- unlist(strsplit(basic$morph, "\\|"))
+  structured_feats <- unlist(strsplit(structured$morph$match, "\\|"))
+
+  expect_setequal(basic_feats, c("case:*", "case:fem", "number:sg"))
+  expect_setequal(structured_feats, c("case:*", "case:fem", "number:sg"))
+})
+
+test_that("multiple lemma and POS values are preserved", {
+  xml_snippet <- '<span class="match">
+    <mark><span title="tt/l:gehen tt/l:geh tt/p:VVFIN tt/p:VVINF">gehen</span></mark>
+  </span>'
+
+  basic <- RKorAPClient:::parse_xml_annotations(xml_snippet)
+  structured <- RKorAPClient:::parse_xml_annotations_structured(xml_snippet)
+
+  expect_equal(basic$lemma, "gehen|geh")
+  expect_equal(basic$pos, "VVFIN|VVINF")
+  expect_equal(structured$lemma$match, "gehen|geh")
+  expect_equal(structured$pos$match, "VVFIN|VVINF")
+})
commit	cd4521884fd23558757338fcbbf6e67702f9a780	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Oct 09 13:28:41 2025 +0200
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Oct 09 14:53:00 2025 +0200
tree	db32df9430c4d52b629dbe9368782d8174f8a9b0
parent	336c85d5c1d3986e1846c2b7e5dee6a4c40192e9 [diff]