Extract annotation classes

Change-Id: I7e3d140fa0942e084c3da5be210ef3e44b74e798
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt
index cfa19e7..4e81986 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt
@@ -1,15 +1,9 @@
 package de.ids_mannheim.korapxmltools
 
-import marmot.morph.MorphTagger
-import marmot.morph.Sentence
-import marmot.morph.Word
-import marmot.util.FileUtils
-import org.maltparser.MaltParserService
-import org.maltparser.core.exception.MaltChainedException
-import org.maltparser.core.syntaxgraph.DependencyStructure
 import java.util.logging.Logger
 
 interface AnnotationToolBridge {
+    val foundry: String
     val model: String
     val logger: Logger
 
@@ -21,85 +15,6 @@
     )
 }
 
-abstract class TaggerToolBridge : AnnotationToolBridge {
-
-    fun tagText(
-        tokens: Array<KorapXml2Conllu.Span>, sentenceSpans: Array<KorapXml2Conllu.Span>?, text: String
-    ): MutableMap<String, KorapXml2Conllu.MorphoSpan> {
-        val sentence_tokens = mutableListOf<String>()
-        val sentence_token_offsets = mutableListOf<String>()
-        val morphoMap = mutableMapOf<String, KorapXml2Conllu.MorphoSpan>()
-        var token_index = 0
-        var sentence_index = 0
-        tokens.forEach { span ->
-            if (span.from >= (sentenceSpans?.get(sentence_index)?.to ?: 11111110)) {
-                tagSentence(sentence_tokens, sentence_token_offsets, morphoMap)
-                sentence_tokens.clear()
-                sentence_token_offsets.clear()
-                sentence_index++
-                token_index = 1
-
-            }
-            sentence_tokens.add(text.substring(span.from, span.to))
-            sentence_token_offsets.add("${span.from}-${span.to}")
-            token_index++
-        }
-        if (sentence_tokens.size > 0) {
-            try {
-                tagSentence(sentence_tokens, sentence_token_offsets, morphoMap)
-            } catch (e: ArrayIndexOutOfBoundsException) {
-                logger.warning("Tagging failed: ${e.message} ${e.stackTrace} ${sentence_tokens.joinToString { " " }}")
-            }
-        }
-        return morphoMap
-    }
-}
-
-abstract class ParserToolBridge : AnnotationToolBridge {
-    fun parseText(
-        tokens: Array<KorapXml2Conllu.Span>,
-        morpho: MutableMap<String, KorapXml2Conllu.MorphoSpan>?,
-        sentenceSpans: Array<KorapXml2Conllu.Span>?,
-        text: String
-    ): MutableMap<String, KorapXml2Conllu.MorphoSpan> {
-        val sentence_tokens = mutableListOf<String>()
-        val sentence_token_offsets = mutableListOf<String>()
-        var token_index = 1
-        var sentence_index = 0
-        tokens.forEach { span ->
-            if (span.from >= (sentenceSpans?.get(sentence_index)?.to ?: 11111110)) {
-                tagSentence(sentence_tokens, sentence_token_offsets, morpho)
-                sentence_tokens.clear()
-                sentence_token_offsets.clear()
-                sentence_index++
-                token_index = 1
-
-            }
-            sentence_tokens.add(
-                "$token_index\t${
-                    text.substring(
-                        span.from, span.to
-                    )
-                }\t_\t${morpho?.get("${span.from}-${span.to}")?.xpos ?: "_"}\t${morpho?.get("${span.from}-${span.to}")?.xpos ?: "_"}\t${
-                    morpho?.get(
-                        "${span.from}-${span.to}"
-                    )?.feats ?: "_"
-                }\t_\t_\t_\t_"
-            )
-            sentence_token_offsets.add("${span.from}-${span.to}")
-            token_index++
-        }
-        if (sentence_tokens.size > 0) {
-            try {
-                tagSentence(sentence_tokens, sentence_token_offsets, morpho)
-            } catch (e: ArrayIndexOutOfBoundsException) {
-                logger.warning("Tagging failed: ${e.message} ${e.stackTrace} ${sentence_tokens.joinToString { " " }}")
-            }
-        }
-        return morpho!!
-    }
-}
-
 
 class AnnotationToolBridgeFactory {
     companion object {
@@ -117,80 +32,3 @@
     }
 }
 
-class MaltParserBridge(override val model: String, override val logger: Logger) : ParserToolBridge() {
-    companion object {
-        fun getFoundry(): String {
-            return "malt"
-        }
-    }
-
-    val tagger: MaltParserService
-
-    init {
-        logger.info("Initializing MaltParser with model $model")
-        tagger = MaltParserService()
-        if (model.contains("/")) {
-            val dirName = model.substringBeforeLast("/")
-            val modelName = model.substringAfterLast("/")
-            logger.info("Loading model $modelName from $dirName")
-            tagger.initializeParserModel("-w $dirName -c $modelName -m parse")
-        } else {
-            tagger.initializeParserModel("-c $model -m parse")
-        }
-        logger.info("Model $model loaded")
-    }
-
-
-    @Throws(MaltChainedException::class)
-    override fun tagSentence(
-        sentenceTokens: MutableList<String>,
-        sentenceTokenOffsets: MutableList<String>,
-        morpho: MutableMap<String, KorapXml2Conllu.MorphoSpan>?
-    ) {
-        val result = tagger.parse(sentenceTokens.toTypedArray())
-
-        (result as DependencyStructure).edges.forEach { edge ->
-            val from = edge.source.index
-            val head = edge.target.index
-            val label = edge.toString()
-            if (label.contains("DEPREL:")) {
-                val rel = edge.toString().substringAfter("DEPREL:")
-                val old = morpho?.get(sentenceTokenOffsets[head - 1])
-                morpho?.set(
-                    sentenceTokenOffsets[head - 1], KorapXml2Conllu.MorphoSpan(
-                        lemma = old?.lemma, xpos = old?.xpos, feats = old?.feats, head = from.toString(), deprel = rel
-                    )
-                )
-            }
-        }
-    }
-}
-
-class MarmotBridge(override val model: String, override val logger: Logger) : TaggerToolBridge() {
-    val tagger: MorphTagger
-
-    init {
-        logger.info("Initializing MarMoT with model $model")
-        tagger = FileUtils.loadFromFile(model)
-        //tagger.setMaxLevel(100)
-        logger.info("Model $model loaded")
-    }
-
-    @Throws(java.lang.ArrayIndexOutOfBoundsException::class, java.lang.Exception::class)
-    override fun tagSentence(
-        sentenceTokens: MutableList<String>,
-        sentenceTokenOffsets: MutableList<String>,
-        morphoMap: MutableMap<String, KorapXml2Conllu.MorphoSpan>?
-    ) {
-        val sentence = Sentence(sentenceTokens.map { Word(it) })
-        var result: List<List<String>>
-        result = tagger.tag(sentence)  // LOGGER.info("Marmot tagger finished")// return
-        for (i in 0 until result.size) {
-            val taggedWord = KorapXml2Conllu.MorphoSpan(
-                xpos = result[i][0].split("|")[0], feats = result[i][1]
-            )
-            morphoMap?.set(sentenceTokenOffsets[i], taggedWord)
-        }
-    }
-
-}
\ No newline at end of file
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index a7ad96a..022923a 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -15,7 +15,6 @@
 import java.util.*
 import java.util.concurrent.Callable
 import java.util.concurrent.ConcurrentHashMap
-import java.util.concurrent.ExecutorService
 import java.util.concurrent.Executors
 import java.util.logging.ConsoleHandler
 import java.util.logging.Level
@@ -25,7 +24,6 @@
 import java.util.regex.Pattern
 import java.util.stream.IntStream
 import java.util.zip.ZipFile
-import javax.swing.text.html.parser.Parser
 import javax.xml.parsers.DocumentBuilder
 import javax.xml.parsers.DocumentBuilderFactory
 import kotlin.math.min
@@ -139,21 +137,19 @@
         description = ["Specify a tagger and a model: ${taggerFoundries}:<path/to/model>."]
     )
     fun setTagWith(tagWith: String) {
-        if (tagWith != null) {
-            val pattern: Pattern = Pattern.compile("(${taggerFoundries}):(.+)")
-            val matcher: Matcher = pattern.matcher(tagWith)
-            if (!matcher.matches()) {
+        val pattern: Pattern = Pattern.compile("(${taggerFoundries}):(.+)")
+        val matcher: Matcher = pattern.matcher(tagWith)
+        if (!matcher.matches()) {
+            throw ParameterException(spec.commandLine(),
+                String.format("Invalid value `%s' for option '--tag-with': "+
+                    "value does not match the expected pattern ${taggerFoundries}:<path/to/model>", tagWith))
+        } else {
+            taggerName = matcher.group(1)
+            taggerModel = matcher.group(2)
+            if (!File(taggerModel).exists()) {
                 throw ParameterException(spec.commandLine(),
-                    String.format("Invalid value `%s' for option '--tag-with': "+
-                        "value does not match the expected pattern ${taggerFoundries}:<path/to/model>", tagWith))
-            } else {
-                taggerName = matcher.group(1)
-                taggerModel = matcher.group(2)
-                if (!File(taggerModel).exists()) {
-                    throw ParameterException(spec.commandLine(),
-                        String.format("Invalid value for option '--tag-with':"+
-                            "model file '%s' does not exist", taggerModel, taggerModel))
-                }
+                    String.format("Invalid value for option '--tag-with':"+
+                        "model file '%s' does not exist", taggerModel, taggerModel))
             }
         }
     }
@@ -166,21 +162,19 @@
         description = ["Specify a parser and a model: ${parserFoundries}:<path/to/model>."]
     )
     fun setParseWith(parseWith: String) {
-        if (parseWith != null) {
-            val pattern: Pattern = Pattern.compile("(${parserFoundries}):(.+)")
-            val matcher: Matcher = pattern.matcher(parseWith)
-            if (!matcher.matches()) {
+        val pattern: Pattern = Pattern.compile("(${parserFoundries}):(.+)")
+        val matcher: Matcher = pattern.matcher(parseWith)
+        if (!matcher.matches()) {
+            throw ParameterException(spec.commandLine(),
+                String.format("Invalid value `%s' for option '--parse-with': "+
+                        "value does not match the expected pattern (${parserFoundries}):<path/to/model>", parseWith))
+        } else {
+            parserName = matcher.group(1)
+            parserModel = matcher.group(2)
+            if (!File(parserModel).exists()) {
                 throw ParameterException(spec.commandLine(),
-                    String.format("Invalid value `%s' for option '--parse-with': "+
-                            "value does not match the expected pattern (${parserFoundries}):<path/to/model>", parseWith))
-            } else {
-                parserName = matcher.group(1)
-                parserModel = matcher.group(2)
-                if (!File(parserModel).exists()) {
-                    throw ParameterException(spec.commandLine(),
-                        String.format("Invalid value for option '--parse-with':"+
-                                "model file '%s' does not exist", parserModel, parserModel))
-                }
+                    String.format("Invalid value for option '--parse-with':"+
+                            "model file '%s' does not exist", parserModel, parserModel))
             }
         }
     }
@@ -302,15 +296,23 @@
         }
     }
 
-    fun processZipEntry(zipFile: ZipFile, foundry: String, zipEntry: java.util.zip.ZipEntry) {
+    fun processZipEntry(zipFile: ZipFile, _foundry: String, zipEntry: java.util.zip.ZipEntry) {
+        var foundry = _foundry
         LOGGER.info("Processing ${zipEntry.name} in thread ${Thread.currentThread().id}")
         if (taggerName != null && !taggerToolBridges.containsKey(Thread.currentThread().id)) {
-            taggerToolBridges[Thread.currentThread().id] =
-                AnnotationToolBridgeFactory.getAnnotationToolBridge(taggerName!!, taggerModel!!, LOGGER) as TaggerToolBridge?
+            val tagger = AnnotationToolBridgeFactory.getAnnotationToolBridge(taggerName!!, taggerModel!!, LOGGER) as TaggerToolBridge?
+            taggerToolBridges[Thread.currentThread().id] = tagger
+            if (tagger != null) {
+                foundry = tagger.foundry
+            }
+
         }
         if (parserName != null && !parserToolBridges.containsKey(Thread.currentThread().id)) {
-            parserToolBridges[Thread.currentThread().id] =
-                AnnotationToolBridgeFactory.getAnnotationToolBridge(parserName!!, parserModel!!, LOGGER) as ParserToolBridge?
+            val parser = AnnotationToolBridgeFactory.getAnnotationToolBridge(parserName!!, parserModel!!, LOGGER) as ParserToolBridge?
+            parserToolBridges[Thread.currentThread().id] = parser
+            if (parser != null) {
+                foundry = "$foundry dependency:${parser.foundry}"
+            }
         }
 
         try {
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/MaltParserBridge.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/MaltParserBridge.kt
new file mode 100644
index 0000000..da8fb05
--- /dev/null
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/MaltParserBridge.kt
@@ -0,0 +1,52 @@
+package de.ids_mannheim.korapxmltools
+
+import org.maltparser.MaltParserService
+import org.maltparser.core.exception.MaltChainedException
+import org.maltparser.core.syntaxgraph.DependencyStructure
+import java.util.logging.Logger
+
+class MaltParserBridge(override val model: String, override val logger: Logger) : ParserToolBridge() {
+    override val foundry = "malt"
+
+    val tagger: MaltParserService
+
+    init {
+        logger.info("Initializing MaltParser with model $model")
+        synchronized(MaltParserService::class.java) {
+            tagger = MaltParserService()
+            if (model.contains("/")) {
+                val dirName = model.substringBeforeLast("/")
+                val modelName = model.substringAfterLast("/")
+                logger.info("Loading model $modelName from $dirName")
+                tagger.initializeParserModel("-w $dirName -c $modelName -m parse")
+            } else {
+                tagger.initializeParserModel("-c $model -m parse")
+            }
+            logger.info("Model $model loaded")
+        }
+    }
+
+    @Throws(MaltChainedException::class)
+    override fun tagSentence(
+        sentenceTokens: MutableList<String>,
+        sentenceTokenOffsets: MutableList<String>,
+        morpho: MutableMap<String, KorapXml2Conllu.MorphoSpan>?
+    ) {
+        val result = tagger.parse(sentenceTokens.toTypedArray())
+
+        (result as DependencyStructure).edges.forEach { edge ->
+            val from = edge.source.index
+            val head = edge.target.index
+            val label = edge.toString()
+            if (label.contains("DEPREL:")) {
+                val rel = edge.toString().substringAfter("DEPREL:")
+                val old = morpho?.get(sentenceTokenOffsets[head - 1])
+                morpho?.set(
+                    sentenceTokenOffsets[head - 1], KorapXml2Conllu.MorphoSpan(
+                        lemma = old?.lemma, xpos = old?.xpos, feats = old?.feats, head = from.toString(), deprel = rel
+                    )
+                )
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/MarmotBridge.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/MarmotBridge.kt
new file mode 100644
index 0000000..b9caa5e
--- /dev/null
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/MarmotBridge.kt
@@ -0,0 +1,37 @@
+package de.ids_mannheim.korapxmltools
+
+import marmot.morph.MorphTagger
+import marmot.morph.Sentence
+import marmot.morph.Word
+import marmot.util.FileUtils
+import java.util.logging.Logger
+
+class MarmotBridge(override val model: String, override val logger: Logger) : TaggerToolBridge() {
+    override val foundry = "marmot"
+    val tagger: MorphTagger
+
+    init {
+        logger.info("Initializing MarMoT with model $model")
+        tagger = FileUtils.loadFromFile(model)
+        //tagger.setMaxLevel(100)
+        logger.info("Model $model loaded")
+    }
+
+    @Throws(java.lang.ArrayIndexOutOfBoundsException::class, java.lang.Exception::class)
+    override fun tagSentence(
+        sentenceTokens: MutableList<String>,
+        sentenceTokenOffsets: MutableList<String>,
+        morphoMap: MutableMap<String, KorapXml2Conllu.MorphoSpan>?
+    ) {
+        val sentence = Sentence(sentenceTokens.map { Word(it) })
+        var result: List<List<String>>
+        result = tagger.tag(sentence)  // LOGGER.info("Marmot tagger finished")// return
+        for (i in 0 until result.size) {
+            val taggedWord = KorapXml2Conllu.MorphoSpan(
+                xpos = result[i][0].split("|")[0], feats = result[i][1]
+            )
+            morphoMap?.set(sentenceTokenOffsets[i], taggedWord)
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/ParserToolBridge.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/ParserToolBridge.kt
new file mode 100644
index 0000000..8b1fa8f
--- /dev/null
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/ParserToolBridge.kt
@@ -0,0 +1,46 @@
+package de.ids_mannheim.korapxmltools
+
+abstract class ParserToolBridge : AnnotationToolBridge {
+    fun parseText(
+        tokens: Array<KorapXml2Conllu.Span>,
+        morpho: MutableMap<String, KorapXml2Conllu.MorphoSpan>?,
+        sentenceSpans: Array<KorapXml2Conllu.Span>?,
+        text: String
+    ): MutableMap<String, KorapXml2Conllu.MorphoSpan> {
+        val sentence_tokens = mutableListOf<String>()
+        val sentence_token_offsets = mutableListOf<String>()
+        var token_index = 1
+        var sentence_index = 0
+        tokens.forEach { span ->
+            if (span.from >= (sentenceSpans?.get(sentence_index)?.to ?: 11111110)) {
+                tagSentence(sentence_tokens, sentence_token_offsets, morpho)
+                sentence_tokens.clear()
+                sentence_token_offsets.clear()
+                sentence_index++
+                token_index = 1
+
+            }
+            sentence_tokens.add(
+                "$token_index\t${
+                    text.substring(
+                        span.from, span.to
+                    )
+                }\t_\t${morpho?.get("${span.from}-${span.to}")?.xpos ?: "_"}\t${morpho?.get("${span.from}-${span.to}")?.xpos ?: "_"}\t${
+                    morpho?.get(
+                        "${span.from}-${span.to}"
+                    )?.feats ?: "_"
+                }\t_\t_\t_\t_"
+            )
+            sentence_token_offsets.add("${span.from}-${span.to}")
+            token_index++
+        }
+        if (sentence_tokens.size > 0) {
+            try {
+                tagSentence(sentence_tokens, sentence_token_offsets, morpho)
+            } catch (e: ArrayIndexOutOfBoundsException) {
+                logger.warning("Tagging failed: ${e.message} ${e.stackTrace} ${sentence_tokens.joinToString { " " }}")
+            }
+        }
+        return morpho!!
+    }
+}
\ No newline at end of file
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/TaggerToolBridge.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/TaggerToolBridge.kt
new file mode 100644
index 0000000..03e04e8
--- /dev/null
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/TaggerToolBridge.kt
@@ -0,0 +1,35 @@
+package de.ids_mannheim.korapxmltools
+
+abstract class TaggerToolBridge : AnnotationToolBridge {
+
+    fun tagText(
+        tokens: Array<KorapXml2Conllu.Span>, sentenceSpans: Array<KorapXml2Conllu.Span>?, text: String
+    ): MutableMap<String, KorapXml2Conllu.MorphoSpan> {
+        val sentence_tokens = mutableListOf<String>()
+        val sentence_token_offsets = mutableListOf<String>()
+        val morphoMap = mutableMapOf<String, KorapXml2Conllu.MorphoSpan>()
+        var token_index = 0
+        var sentence_index = 0
+        tokens.forEach { span ->
+            if (span.from >= (sentenceSpans?.get(sentence_index)?.to ?: 11111110)) {
+                tagSentence(sentence_tokens, sentence_token_offsets, morphoMap)
+                sentence_tokens.clear()
+                sentence_token_offsets.clear()
+                sentence_index++
+                token_index = 1
+
+            }
+            sentence_tokens.add(text.substring(span.from, span.to))
+            sentence_token_offsets.add("${span.from}-${span.to}")
+            token_index++
+        }
+        if (sentence_tokens.size > 0) {
+            try {
+                tagSentence(sentence_tokens, sentence_token_offsets, morphoMap)
+            } catch (e: ArrayIndexOutOfBoundsException) {
+                logger.warning("Tagging failed: ${e.message} ${e.stackTrace} ${sentence_tokens.joinToString { " " }}")
+            }
+        }
+        return morphoMap
+    }
+}
\ No newline at end of file