Add internal OpenNLP POS tagger

Change-Id: I4e177ac4465f571b5376924736f278ebd019a0ab
diff --git a/app/.idea/copilot/chatSessions/blobs/version b/app/.idea/copilot/chatSessions/blobs/version
new file mode 100644
index 0000000..720d64f
--- /dev/null
+++ b/app/.idea/copilot/chatSessions/blobs/version
Binary files differ
diff --git a/app/build.gradle b/app/build.gradle
index 693a7be..2f74157 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -52,9 +52,12 @@
 
     implementation 'com.github.kupietz:cistern:v1.0.4'
     implementation 'org.maltparser:maltparser:1.9.2'
+    implementation 'org.apache.opennlp:opennlp-tools:2.3.2'
+    implementation 'org.slf4j:slf4j-simple:2.1.0-alpha1'
 }
 
 
+
 application {
     // Define the main class for the application.
     mainClass = 'de.ids_mannheim.korapxmltools.KorapXml2ConlluKt'
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt
index 4e81986..ab12d60 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt
@@ -18,12 +18,13 @@
 
 class AnnotationToolBridgeFactory {
     companion object {
-        const val taggerFoundries = "marmot"
+        const val taggerFoundries = "marmot|opennlp"
         const val parserFoundries = "malt"
 
         fun getAnnotationToolBridge(foundry: String, model: String, LOGGER: Logger): AnnotationToolBridge? {
             when (foundry) {
                 "marmot" -> return MarmotBridge(model, LOGGER)
+                "opennlp" -> return OpenNlpBridge(model, LOGGER)
                 "malt" -> return MaltParserBridge(model, LOGGER)
                 else -> LOGGER.severe("Unknown tagger $foundry")
             }
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index 763ecaf..104572c 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -283,6 +283,7 @@
     }
 
     private fun processZipFile(zipFilePath: String, foundry: String = "base") {
+        LOGGER.info("Processing ${zipFilePath} in thread ${Thread.currentThread().id}")
         if (zipFilePath.hasCorrespondingBaseZip()) {
             val zips = arrayOf(zipFilePath, zipFilePath.correspondingBaseZip()!!)
             Arrays.stream(zips).parallel().forEach { zip ->
@@ -304,6 +305,7 @@
     }
 
     private fun processZipFileSequentially(zipFilePath: String, foundry: String = "base") {
+        LOGGER.info("Processing ${zipFilePath} in thread ${Thread.currentThread().id}")
         if (zipFilePath.hasCorrespondingBaseZip()) {
             val zips = arrayOf(zipFilePath, zipFilePath.correspondingBaseZip()!!)
             Arrays.stream(zips).parallel().forEach { zip ->
@@ -328,7 +330,7 @@
     fun processZipEntry(zipFile: ZipFile, _foundry: String, zipEntry: ZipEntry, passedWaitForMorpho: Boolean) {
         var foundry = _foundry
         var waitForMorpho = passedWaitForMorpho
-        LOGGER.info("Processing ${zipEntry.name} in thread ${Thread.currentThread().id}")
+        LOGGER.finer("Processing ${zipEntry.name} in thread ${Thread.currentThread().id}")
         if (taggerName != null && !taggerToolBridges.containsKey(Thread.currentThread().id)) {
             val tagger = AnnotationToolBridgeFactory.getAnnotationToolBridge(taggerName!!, taggerModel!!, LOGGER) as TaggerToolBridge?
             taggerToolBridges[Thread.currentThread().id] = tagger
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/OpenNlpBridge.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/OpenNlpBridge.kt
new file mode 100644
index 0000000..694074e
--- /dev/null
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/OpenNlpBridge.kt
@@ -0,0 +1,51 @@
+package de.ids_mannheim.korapxmltools
+
+import opennlp.tools.postag.POSModel
+import opennlp.tools.postag.POSTaggerME
+import java.io.File
+import java.util.*
+import java.util.logging.Logger
+
+
+class OpenNlpBridge(override val model: String, override val logger: Logger) : TaggerToolBridge() {
+
+    override val foundry = "opennlp"
+    val tagger: POSTaggerME
+
+    companion object {
+        var POSmodel : POSModel? = null
+    }
+
+    init {
+
+        synchronized(model) {
+            if (POSmodel == null) {
+                logger.info("Initializing OpenNLP with model $model")
+                POSmodel = POSModel(File(model as String).inputStream())
+                logger.info("Model $model loaded")
+            }
+        }
+
+        tagger = POSTaggerME(POSmodel)
+
+    }
+
+    override fun tagSentence(
+        sentenceTokens: MutableList<String>,
+        sentenceTokenOffsets: MutableList<String>,
+        morphoMap: MutableMap<String, KorapXml2Conllu.MorphoSpan>?
+    ) {
+
+        // Perform POS tagging
+        val result = tagger.tag(sentenceTokens.toTypedArray())
+        val probs = tagger.probs()
+        for (i in 0 until result.size) {
+            val taggedWord = KorapXml2Conllu.MorphoSpan(
+                xpos = result[i],
+                misc = String.format(locale = Locale.ROOT, "%.5f", probs[i])
+            )
+            morphoMap?.set(sentenceTokenOffsets[i], taggedWord)
+        }
+    }
+
+}
\ No newline at end of file