Add defaults for tagger and parser models

Change-Id: I7bb99480be15707cce112c78ca9ee596fdeb5bee
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 48dfea5..a9559ba 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -83,9 +83,9 @@
             "  Generate Krill tar from wud24_sample with multiple annotation foundries:",
             "    ./build/bin/korapxmltool -f krill -D . app/src/test/resources/wud24_sample*.zip",
             "",
-            "  Large corpus processing with custom memory and performance settings:",
+            "  Large corpus annotation with custom memory and performance and default model settings:",
             "    KORAPXMLTOOL_XMX=500g KORAPXMLTOOL_MODELS_PATH=/data/models KORAPXMLTOOL_JAVA_OPTS=\"-XX:+UseG1GC\" \\",
-            "        ./build/bin/korapxmltool --threads 100 -f zip -t marmot:de.marmot -P malt:german.mco wpd25*.zip"
+            "        ./build/bin/korapxmltool --threads 100 -f zip -t marmot -P malt wpd25*.zip"
     ]
 )
 
@@ -293,6 +293,18 @@
     // Store model path resolutions for logging after logger initialization
     private val modelPathResolutions: MutableList<Pair<String, String>> = mutableListOf()
     
+    // Default models for taggers and parsers
+    private val defaultTaggerModels = mapOf(
+        "marmot" to "de.marmot",
+        "opennlp" to "de-pos-maxent.bin",
+        "corenlp" to "german-fast.tagger"
+    )
+
+    private val defaultParserModels = mapOf(
+        "malt" to "german.mco",
+        "corenlp" to "germanSR.ser.gz"
+    )
+
     // Helper function to resolve model path with default search directory
     private fun resolveModelPath(modelPath: String): String? {
         // If absolute path or relative path exists as-is, return it
@@ -323,19 +335,27 @@
     }
     @Option(
         names = ["--tag-with", "-t"],
-        paramLabel = "TAGGER:MODEL",
-        description = ["Specify a tagger and a model: ${taggerFoundries}:<path/to/model>."]
+        paramLabel = "TAGGER[:MODEL]",
+        description = ["Specify a tagger and optionally a model: ${taggerFoundries}[:<path/to/model>].",
+                      "If model is omitted, defaults are: marmot→de.marmot, opennlp→de-pos-maxent.bin, corenlp→german-fast.tagger"]
     )
     fun setTagWith(tagWith: String) {
-        val pattern: Pattern = Pattern.compile("(${taggerFoundries}):(.+)")
+        // Pattern now makes the model part optional
+        val pattern: Pattern = Pattern.compile("(${taggerFoundries})(?::(.+))?")
         val matcher: Matcher = pattern.matcher(tagWith)
         if (!matcher.matches()) {
             throw ParameterException(spec.commandLine(),
                 String.format(Locale.ROOT, "Invalid value `%s' for option '--tag-with': "+
-                    "value does not match the expected pattern ${taggerFoundries}:<path/to/model>", tagWith))
+                    "value does not match the expected pattern ${taggerFoundries}[:<path/to/model>]", tagWith))
         } else {
             taggerName = matcher.group(1)
-            val originalModelPath = matcher.group(2)
+            val originalModelPath = matcher.group(2) ?: defaultTaggerModels[taggerName]
+
+            if (originalModelPath == null) {
+                throw ParameterException(spec.commandLine(),
+                    String.format(Locale.ROOT, "No default model available for tagger '%s'", taggerName))
+            }
+
             val resolvedModelPath = resolveModelPath(originalModelPath)
             
             if (resolvedModelPath != null) {
@@ -362,19 +382,27 @@
     private var parserModel: String? = null
     @Option(
         names = ["--parse-with", "-P"],
-        paramLabel = "parser:MODEL",
-        description = ["Specify a parser and a model: ${parserFoundries}:<path/to/model>."]
+        paramLabel = "PARSER[:MODEL]",
+        description = ["Specify a parser and optionally a model: ${parserFoundries}[:<path/to/model>].",
+                      "If model is omitted, defaults are: malt→german.mco, corenlp→germanSR.ser.gz"]
     )
     fun setParseWith(parseWith: String) {
-        val pattern: Pattern = Pattern.compile("(${parserFoundries}):(.+)")
+        // Pattern now makes the model part optional
+        val pattern: Pattern = Pattern.compile("(${parserFoundries})(?::(.+))?")
         val matcher: Matcher = pattern.matcher(parseWith)
         if (!matcher.matches()) {
             throw ParameterException(spec.commandLine(),
                 String.format(Locale.ROOT, "Invalid value `%s' for option '--parse-with': "+
-                        "value does not match the expected pattern (${parserFoundries}):<path/to/model>", parseWith))
+                        "value does not match the expected pattern ${parserFoundries}[:<path/to/model>]", parseWith))
         } else {
             parserName = matcher.group(1)
-            val originalModelPath = matcher.group(2)
+            val originalModelPath = matcher.group(2) ?: defaultParserModels[parserName]
+
+            if (originalModelPath == null) {
+                throw ParameterException(spec.commandLine(),
+                    String.format(Locale.ROOT, "No default model available for parser '%s'", parserName))
+            }
+
             val resolvedModelPath = resolveModelPath(originalModelPath)
             
             if (resolvedModelPath != null) {