Improve tt <unknown> replacement
Change-Id: Ibf8f101e823dbef942e021f46a3dfd59c282b2f5
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index fbd0d09..2bb415e 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -471,6 +471,7 @@
private fun extractMorphoSpans(
fsSpans: NodeList
): MutableMap<String, MorphoSpan> {
+ val UNKNOWN = Regex("(UNKNOWN|<unknown>)")
val res: MutableMap<String, MorphoSpan> = HashMap()
IntStream.range(0, fsSpans.length).mapToObj(fsSpans::item).filter { node -> node is Element && node.getAttribute("type") != "alt" }.forEach { node ->
val features = (node as Element).getElementsByTagName("f")
@@ -481,9 +482,9 @@
val value = feature.textContent.trim()
if (value.isEmpty()) return@forEach
when (attr) {
- "lemma" -> if(fs.lemma == "_") fs.lemma = value.replace("UNKNOWN", "--")
+ "lemma" -> if(fs.lemma == "_") fs.lemma = value.replace(UNKNOWN, "--")
"upos" -> fs.upos = value
- "xpos", "ctag", "pos" -> if(fs.xpos == "_") fs.xpos = value.replace("UNKNOWN", "--")
+ "xpos", "ctag", "pos" -> if(fs.xpos == "_") fs.xpos = value.replace(UNKNOWN, "--")
"feats", "msd" -> if(fs.feats == "_" ) fs.feats = value
"type" -> if(fs.feats == "_") fs.feats = feature.getElementsByTagName("symbol").item(0).attributes.getNamedItem("value").textContent.trim()
// "subtype" -> if(fs.feats == "_") fs.feats += ":" + feature.getElementsByTagName("symbol").item(0).attributes.getNamedItem("value").textContent