Add option --tag-with marmot:<path/to/model>

Change-Id: I04db45f1ba2ebb44a938be6dd34131b448b19c1f
diff --git a/Readme.md b/Readme.md
index fa9d396..9641821 100644
--- a/Readme.md
+++ b/Readme.md
@@ -61,6 +61,20 @@
 ```shell script
 java  -jar app/build/libs/korapxml2conllu.jar -T 10 -A "docker run --rm -i korap/conllu2treetagger -l french" app/src/test/resources/wdf19.zip | conllu2korapxml wdf19.tree_tagger.zip
 ```
+### Tag with integrated MarMoT POS tagger
+
+```shell script
+$ java -jar ./app/build/libs/korapxml2conllu.jar -t marmot:models/de.marmot app/src/test/resources/goe.zip
+
+# foundry = base
+# filename = GOE/AGA/00000/base/tokens.xml
+# text_id = GOE_AGA.00000
+# start_offsets = 0 0 9 12
+# end_offsets = 22 8 11 22
+1       Campagne        _       _       NN      case=nom|number=sg|gender=fem   _       _       _       _
+2       in      _       _       APPR    _       _       _       _       _
+3       Frankreich      _       _       NE      case=dat|number=sg|gender=neut  _       _       _       _
+```
 
 ## Development and License
 
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt
index 6c83e08..3354211 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/AnnotationToolBridge.kt
@@ -10,6 +10,7 @@
 import kotlin.jvm.Throws
 
 abstract class AnnotationToolBridge {
+    abstract val model: String
     abstract val logger: Logger
 
     @Throws(java.lang.ArrayIndexOutOfBoundsException::class, java.lang.Exception::class)
@@ -50,18 +51,22 @@
 
 class AnnotationToolBridgeFactory {
     companion object {
-        fun getAnnotationToolBridge(annotateWith: String, LOGGER: Logger): AnnotationToolBridge? {
-            return MarmotBridge(LOGGER)
+        fun getAnnotationToolBridge(taggerName: String, taggerModel: String, LOGGER: Logger): AnnotationToolBridge? {
+            if (taggerName == "marmot") {
+                return MarmotBridge(taggerModel, LOGGER)
+            } else {
+                LOGGER.warning("Unknown tagger $taggerName")
+                return null
+            }
         }
     }
 }
 
-class MarmotBridge(override val logger: Logger) : AnnotationToolBridge() {
+class MarmotBridge(override val model: String, override val logger: Logger) : AnnotationToolBridge() {
 
     val tagger: MorphTagger
 
     init {
-        val model = "/home/kupietz/KorAP/korapxml2conllu/libs/de.marmot"
         logger.info("Initializing MarMoT with model $model")
         tagger = FileUtils.loadFromFile(model)
         //tagger.setMaxLevel(100)
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index 33448a7..74e74f8 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -19,6 +19,8 @@
 import java.util.logging.Level
 import java.util.logging.LogManager
 import java.util.logging.Logger
+import java.util.regex.Matcher
+import java.util.regex.Pattern
 import java.util.stream.IntStream
 import java.util.zip.ZipFile
 import javax.xml.parsers.DocumentBuilder
@@ -39,6 +41,8 @@
 class KorapXml2Conllu : Callable<Int> {
     val COMPATIBILITY_MODE = System.getenv("COMPATIBILITY_MODE") != null
 
+    @Spec lateinit var spec : Model.CommandSpec
+
     @Parameters(arity = "1..*", description = ["At least one zip file name"])
     var zipFileNames: Array<String>? = null
 
@@ -117,6 +121,34 @@
     )
     var threads: Int = Runtime.getRuntime().availableProcessors() / 2
 
+    private var taggerName: String? = null
+    private var taggerModel: String? = null
+    @Option(
+        names = ["--tag-with", "-t"],
+        paramLabel = "TAGGER:MODEL",
+        description = ["Specify a tagger and a model: marmot:<path/to/model>."]
+    )
+    fun setTagWith(tagWith: String) {
+        if (tagWith != null) {
+            val pattern: Pattern = Pattern.compile("(marmot):(.+)")
+            val matcher: Matcher = pattern.matcher(tagWith)
+            if (!matcher.matches()) {
+                throw ParameterException(spec.commandLine(),
+                    String.format("Invalid value '%s' for option '--tag-with':"+
+                        "value does not match the expected pattern marmot:<path/to/model>", tagWith))
+            } else {
+                taggerName = matcher.group(1)
+                taggerModel = matcher.group(2)
+                if (!File(taggerModel).exists()) {
+                    throw ParameterException(spec.commandLine(),
+                        String.format("Invalid value for option '--tag-with':"+
+                            "model file '%s' does not exist", taggerModel, taggerModel))
+                }
+            }
+        }
+    }
+
+
     override fun call(): Int {
         val handler = ConsoleHandler()
         LogManager.getLogManager().reset()
@@ -156,9 +188,7 @@
         val executor: ExecutorService = Executors.newFixedThreadPool(threads)
 
         if (annotateWith.isNotEmpty()) {
-            if (!annotateWith.contains(".jar")) {
-                annotationWorkerPool = AnnotationWorkerPool(annotateWith, threads, LOGGER)
-            }
+            annotationWorkerPool = AnnotationWorkerPool(annotateWith, threads, LOGGER)
         }
 
         var zips: Array<String> = args
@@ -238,9 +268,9 @@
                     .parallel()
                     .forEach { zipEntry ->
                         LOGGER.info("Processing ${zipEntry.name} in thread ${Thread.currentThread().id}")
-                        if (annotateWith.contains(".jar") && !annotationToolBridges.containsKey(Thread.currentThread().id)) {
+                        if (taggerName != null && !annotationToolBridges.containsKey(Thread.currentThread().id)) {
                             annotationToolBridges[Thread.currentThread().id] =
-                                AnnotationToolBridgeFactory.getAnnotationToolBridge(annotateWith, LOGGER)
+                                AnnotationToolBridgeFactory.getAnnotationToolBridge(taggerName!!, taggerModel!!, LOGGER)
                         }
 
                         try {