Make foundry names more general

Change-Id: I831ca4f3c9ef881ae57b527945a0fb05f8a34c9e
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 5cd2a43..f2f6241 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -469,8 +469,9 @@
         var morphoByFoundry: MutableMap<String, MutableMap<String, MorphoSpan>> = mutableMapOf(),
         var structureSpans: MutableList<StructureSpan> = mutableListOf(),
         var extractedAttributes: MutableMap<String, String> = mutableMapOf(),
-        var corenlpSentencesCollected: Boolean = false,
-        var corenlpConstituencyCollected: Boolean = false
+        var  lpSentencesCollected: Boolean = false,
+        var sentencesCollectedByFoundry: MutableSet<String> = mutableSetOf(),
+        var constituencyCollectedByFoundry: MutableSet<String> = mutableSetOf()
     )
 
     private val BASE_STRUCTURE_FOUNDRIES = setOf("base", "dereko")
@@ -1512,16 +1513,16 @@
 
                     "sentences.xml" -> {
                         LOGGER.fine("Sentences entry foundry=$foundry for $docId from ${zipEntry.name}")
-                        if (outputFormat == OutputFormat.KRILL && foundry.startsWith("corenlp")) {
+                        if (outputFormat == OutputFormat.KRILL) {
                             val sentenceSpans: NodeList = doc.getElementsByTagName("span")
-                            collectCorenlpSentences(docId, sentenceSpans)
+                            collectSentences(docId, foundry, sentenceSpans)
                         }
                     }
 
                     "constituency.xml" -> {
-                        if (outputFormat == OutputFormat.KRILL && foundry.startsWith("corenlp")) {
+                        if (outputFormat == OutputFormat.KRILL) {
                             val constituencySpans: NodeList = doc.getElementsByTagName("span")
-                            collectCorenlpConstituency(docId, constituencySpans)
+                            collectConstituency(docId, foundry, constituencySpans)
                         }
                     }
                 }
@@ -2631,7 +2632,7 @@
         }
     }
 
-    private data class CorenlpConstituencyNode(
+    private data class ConstituencyNode(
         val id: String,
         val from: Int,
         val to: Int,
@@ -2639,7 +2640,7 @@
         val children: MutableList<String> = mutableListOf()
     )
 
-    private fun collectCorenlpSentences(docId: String, spans: NodeList) {
+    private fun collectSentences(docId: String, foundry: String, spans: NodeList) {
         if (outputTexts.contains(docId)) return
 
         val textData = krillData.getOrPut(docId) {
@@ -2647,14 +2648,14 @@
         }
 
         synchronized(textData) {
-            if (textData.corenlpSentencesCollected) return
+            if (textData.sentencesCollectedByFoundry.contains(foundry)) return
             for (i in 0 until spans.length) {
                 val span = spans.item(i) as? Element ?: continue
                 val from = span.getAttribute("from").toIntOrNull() ?: continue
                 val to = span.getAttribute("to").toIntOrNull() ?: continue
                 textData.structureSpans.add(
                     StructureSpan(
-                        layer = "corenlp/s:s",
+                        layer = "$foundry/s:s",
                         from = from,
                         to = to,
                         tokenFrom = -1,
@@ -2664,14 +2665,14 @@
                     )
                 )
             }
-            textData.corenlpSentencesCollected = true
+            textData.sentencesCollectedByFoundry.add(foundry)
         }
     }
 
-    private fun collectCorenlpConstituency(docId: String, spans: NodeList) {
+    private fun collectConstituency(docId: String, foundry: String, spans: NodeList) {
         if (outputTexts.contains(docId)) return
 
-        val nodesById = mutableMapOf<String, CorenlpConstituencyNode>()
+        val nodesById = mutableMapOf<String, ConstituencyNode>()
         val nonRootIds = mutableSetOf<String>()
 
         for (i in 0 until spans.length) {
@@ -2695,7 +2696,7 @@
             }
             if (label.isNullOrBlank()) continue
 
-            val node = CorenlpConstituencyNode(id, from, to, label)
+            val node = ConstituencyNode(id, from, to, label)
 
             val relElements = span.getElementsByTagName("rel")
             for (j in 0 until relElements.length) {
@@ -2725,14 +2726,14 @@
         }
 
         synchronized(textData) {
-            if (textData.corenlpConstituencyCollected) return
-            LOGGER.fine("Collecting corenlp constituency for $docId: ${nodesById.size} nodes, roots=${nodesById.keys.count { it !in nonRootIds }}")
+            if (textData.constituencyCollectedByFoundry.contains(foundry)) return
+            LOGGER.fine("Collecting constituency for $docId from foundry $foundry: ${nodesById.size} nodes, roots=${nodesById.keys.count { it !in nonRootIds }}")
 
             fun traverse(nodeId: String, depth: Int) {
                 val node = nodesById[nodeId] ?: return
                 textData.structureSpans.add(
                     StructureSpan(
-                        layer = "corenlp/c:${node.label}",
+                        layer = "$foundry/c:${node.label}",
                         from = node.from,
                         to = node.to,
                         tokenFrom = -1,
@@ -2748,7 +2749,7 @@
 
             val rootIds = nodesById.keys.filter { it !in nonRootIds }
             rootIds.forEach { traverse(it, 0) }
-            textData.corenlpConstituencyCollected = true
+            textData.constituencyCollectedByFoundry.add(foundry)
         }
     }