Make tokens and morpho behaviour more compatible
Change-Id: I374fba14b56258de65948202f3987feab971c9eb
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index 4ab046d..c94360d 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -183,6 +183,7 @@
waitForMorpho: Boolean = false,
) {
try {
+ var waitForMorpho = waitForMorpho
ZipFile(zipFilePath).use { zipFile ->
zipFile.stream().parallel().forEach { zipEntry ->
try {
@@ -222,8 +223,12 @@
}
"morpho.xml" -> {
+ waitForMorpho = true
val fsSpans: NodeList = doc.getElementsByTagName("span")
morpho[docId] = extractMorphoSpans(fsSpans)
+ if (!tokens.containsKey(docId) && sBoundsFromMorpho) {
+ tokens[docId] = extractSpans(fsSpans)
+ }
}
}
if (texts[docId] != null && sentences[docId] != null && tokens[docId] != null && (!waitForMorpho || morpho[docId] != null)) {