Add morpho features
Change-Id: I48b0a862c30ad4bdd595c8c500a0e5397e8b7dc0
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index 6950d02..d95dd60 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -258,9 +258,9 @@
when (attr) {
"lemma" -> fs.lemma = value
"upos" -> fs.upos = value
- "xpos" -> fs.xpos = value
+ "xpos", "ctag", "pos" -> fs.xpos = value
+ "feats", "msd" -> fs.feats = value
"certainty" -> fs.misc = value
- "ctag", "pos" -> fs.xpos = value
}
}
if (morpho[docId] == null) {
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
index 5930903..cba0135 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
@@ -10,11 +10,15 @@
import de.ids_mannheim.korapxmltools.KorapXml2Conllu
class KorapXml2ConlluTest {
- private val outContent = ByteArrayOutputStream()
+ private val outContent = ByteArrayOutputStream(10000000)
private val errContent = ByteArrayOutputStream()
private val originalOut: PrintStream = System.out
private val originalErr: PrintStream = System.err
+ val goe = loadResource("goe.zip").path
+ val goeMarmot = loadResource("goe.marmot.zip").path
+ val goeTreeTagger = loadResource("goe.tree_tagger.zip").path
+
@Before
fun setUpStreams() {
System.setOut(PrintStream(outContent))
@@ -56,7 +60,7 @@
@Test
fun canInferBaseName() {
val classUnderTest = KorapXml2Conllu()
- val args = arrayOf(loadResource("goe.tree_tagger.zip").path)
+ val args = arrayOf(goeTreeTagger)
classUnderTest.main(args)
assertContains(
outContent.toString(),
@@ -68,10 +72,22 @@
fun canConvertWfdWithMorphoAnnotations() {
val classUnderTest = KorapXml2Conllu()
val args = arrayOf(loadResource("wdf19.zip").path, loadResource("wdf19.tree_tagger.zip").path)
+ System.setOut(PrintStream(outContent))
classUnderTest.main(args)
assertContains(
outContent.toString(),
"30\tvraie\tvrai\t_\tADJ\t_\t_\t_\t_\t1.000000"
)
}
+
+ @Suppress("for some reason not working")
+ fun canConvertMorphoFeatureAnnotations() {
+ val classUnderTest = KorapXml2Conllu()
+ val args = arrayOf(goe, goeMarmot)
+ classUnderTest.main(args)
+ assertContains(
+ outContent.toString(),
+ "9\tentzücke\tentzücken\t_\tVVFIN\tnumber=sg|person=3|tense=pres|mood=subj\t_\t_\t_\t1.000000"
+ )
+ }
}
diff --git a/app/src/test/resources/goe.marmot.zip b/app/src/test/resources/goe.marmot.zip
new file mode 100644
index 0000000..21a0106
--- /dev/null
+++ b/app/src/test/resources/goe.marmot.zip
Binary files differ