Add test for parse_xml_annotations

Change-Id: Ib34b18aedd3fd0b52dea99141540bb7da2700b00
diff --git a/tests/testthat/test-parse-annotations.R b/tests/testthat/test-parse-annotations.R
new file mode 100644
index 0000000..502f17a
--- /dev/null
+++ b/tests/testthat/test-parse-annotations.R
@@ -0,0 +1,63 @@
+test_that("parse_xml_annotations returns empty vectors for empty input", {
+  res1 <- RKorAPClient:::parse_xml_annotations(NULL)
+  res2 <- RKorAPClient:::parse_xml_annotations(NA)
+  res3 <- RKorAPClient:::parse_xml_annotations("")
+
+  for (res in list(res1, res2, res3)) {
+    expect_true(is.list(res))
+    expect_named(res, c("token", "lemma", "pos", "morph"))
+    expect_length(res$token, 0)
+    expect_length(res$lemma, 0)
+    expect_length(res$pos, 0)
+    expect_length(res$morph, 0)
+  }
+})
+
+test_that("parse_xml_annotations extracts tokens/pos/lemma across multiple <mark> blocks", {
+  xml_snippet <- '<span class="context-left"></span>
+  <span class="match">
+    <span title="tt/l:Wir"><span title="tt/p:PPER">Wir</span></span>
+    <mark>
+      <span title="tt/l:können"><span title="tt/p:VVFIN">können</span></span>
+    </mark>
+    <span title="tt/l:alles"><span title="tt/p:PIS">alles</span></span>
+    <mark>
+      <span title="tt/l:außer"><span title="tt/p:APPR">außer</span></span>
+      <span title="tt/l:Plan"><span title="tt/p:NN">Plan</span></span>
+    </mark>
+  </span>
+  <span class="context-right"></span>'
+
+  parsed <- RKorAPClient:::parse_xml_annotations(xml_snippet)
+
+  expect_equal(parsed$token, c("Wir", "können", "alles", "außer", "Plan"))
+  expect_equal(parsed$pos,   c("PPER", "VVFIN", "PIS", "APPR", "NN"))
+  expect_equal(parsed$lemma, c("Wir", "können", "alles", "außer", "Plan"))
+
+  # morph not present in snippet; should be NA-aligned to tokens
+  expect_length(parsed$morph, length(parsed$token))
+  expect_true(all(is.na(parsed$morph)))
+})
+
+test_that("parse_xml_annotations handles missing lemma/pos/morph gracefully", {
+  # First token has POS only; second has lemma+POS+morph; third has lemma only
+  xml_snippet <- '<span class="match">
+    <span title="tt/p:NN">Haus</span>
+    <mark><span title="tt/l:können tt/p:VVFIN marmot/m:verbform:fin">können</span></mark>
+    <span title="tt/l:gehen">gehen</span>
+  </span>'
+
+  parsed <- RKorAPClient:::parse_xml_annotations(xml_snippet)
+
+  expect_equal(parsed$token, c("Haus", "können", "gehen"))
+  expect_equal(parsed$pos,   c("NN",   "VVFIN", NA))
+  expect_equal(parsed$lemma, c(NA,      "können", "gehen"))
+  expect_equal(parsed$morph, c(NA,      "verbform:fin", NA))
+
+  # Vectors must be equal length
+  n <- length(parsed$token)
+  expect_length(parsed$lemma, n)
+  expect_length(parsed$pos, n)
+  expect_length(parsed$morph, n)
+})
+