Fix fetchAnnotations getting all morph values
Now using xml2 package to properly parse the query result
resolves #30
Change-Id: I827cae94791f14ce0955352fd64fd7e5683a92e6
diff --git a/tests/testthat/test-parse-annotations.R b/tests/testthat/test-parse-annotations.R
index 502f17a..4f65d57 100644
--- a/tests/testthat/test-parse-annotations.R
+++ b/tests/testthat/test-parse-annotations.R
@@ -31,7 +31,7 @@
parsed <- RKorAPClient:::parse_xml_annotations(xml_snippet)
expect_equal(parsed$token, c("Wir", "können", "alles", "außer", "Plan"))
- expect_equal(parsed$pos, c("PPER", "VVFIN", "PIS", "APPR", "NN"))
+ expect_equal(parsed$pos, c("PPER", "VVFIN", "PIS", "APPR", "NN"))
expect_equal(parsed$lemma, c("Wir", "können", "alles", "außer", "Plan"))
# morph not present in snippet; should be NA-aligned to tokens
@@ -50,9 +50,9 @@
parsed <- RKorAPClient:::parse_xml_annotations(xml_snippet)
expect_equal(parsed$token, c("Haus", "können", "gehen"))
- expect_equal(parsed$pos, c("NN", "VVFIN", NA))
- expect_equal(parsed$lemma, c(NA, "können", "gehen"))
- expect_equal(parsed$morph, c(NA, "verbform:fin", NA))
+ expect_equal(parsed$pos, c("NN", "VVFIN", NA))
+ expect_equal(parsed$lemma, c(NA, "können", "gehen"))
+ expect_equal(parsed$morph, c(NA, "verbform:fin", NA))
# Vectors must be equal length
n <- length(parsed$token)
@@ -61,3 +61,42 @@
expect_length(parsed$morph, n)
})
+test_that("parsers retain all morphological features from nested spans", {
+ xml_snippet <- '<span class="context-left"></span>
+ <span class="match">
+ <mark>
+ <span title="marmot/m:number:sg">
+ <span title="marmot/m:case:* marmot/m:case:fem">
+ <span title="tt/l:Ameisenplage tt/p:NN">Ameisenplage</span>
+ </span>
+ </span>
+ </mark>
+ </span>
+ <span class="context-right"></span>'
+
+ basic <- RKorAPClient:::parse_xml_annotations(xml_snippet)
+ structured <- RKorAPClient:::parse_xml_annotations_structured(xml_snippet)
+
+ expect_equal(basic$token, "Ameisenplage")
+ expect_equal(structured$atokens$match, "Ameisenplage")
+
+ basic_feats <- unlist(strsplit(basic$morph, "\\|"))
+ structured_feats <- unlist(strsplit(structured$morph$match, "\\|"))
+
+ expect_setequal(basic_feats, c("case:*", "case:fem", "number:sg"))
+ expect_setequal(structured_feats, c("case:*", "case:fem", "number:sg"))
+})
+
+test_that("multiple lemma and POS values are preserved", {
+ xml_snippet <- '<span class="match">
+ <mark><span title="tt/l:gehen tt/l:geh tt/p:VVFIN tt/p:VVINF">gehen</span></mark>
+ </span>'
+
+ basic <- RKorAPClient:::parse_xml_annotations(xml_snippet)
+ structured <- RKorAPClient:::parse_xml_annotations_structured(xml_snippet)
+
+ expect_equal(basic$lemma, "gehen|geh")
+ expect_equal(basic$pos, "VVFIN|VVINF")
+ expect_equal(structured$lemma$match, "gehen|geh")
+ expect_equal(structured$pos$match, "VVFIN|VVINF")
+})