Introduce env variable KORAPXMLTOOL_MODELS_PATH

Change-Id: I7f4a1d994c7e7b7600df6aac128ecbd995809ab2
diff --git a/Readme.md b/Readme.md
index 2c61e19..85e5d2a 100644
--- a/Readme.md
+++ b/Readme.md
@@ -90,7 +90,7 @@
 Example for large NOW export with progress and exclusions:
 
 ```
-KORAPXMLTOOL_XMX=64g KORAPXMLTOOL_JAVA_OPTS="-XX:+UseG1GC -Djdk.util.zip.disableMemoryMapping=true -Djdk.util.zip.reuseInflater=true" \
+KORAPXMLTOOL_XMX=64g KORAPXMLTOOL_MODELS_PATH=/data/models KORAPXMLTOOL_JAVA_OPTS="-XX:+UseG1GC -Djdk.util.zip.disableMemoryMapping=true -Djdk.util.zip.reuseInflater=true" \
      ./build/bin/korapxmltool -l info --threads 100 --zip-parallelism 8 \
      --lemma-only --sequential -f now \
      --exclude-zip-glob 'w?d24.tree_tagger.zip' \
@@ -120,8 +120,15 @@
 
 You need to download the pre-trained MarMoT models from the [MarMoT models repository](http://cistern.cis.lmu.de/marmot/models/CURRENT/).
 
+You can specify the full path to the model, or set the `KORAPXMLTOOL_MODELS_PATH` environment variable to specify a default search directory:
+
 ```shell script
+# With full path
 ./build/bin/korapxmltool -f zip -t marmot:models/de.marmot app/src/test/resources/goe.zip
+
+# With KORAPXMLTOOL_MODELS_PATH (searches in /data/models/ if model not found locally)
+export KORAPXMLTOOL_MODELS_PATH=/data/models
+./build/bin/korapxmltool -f zip -t marmot:de.marmot app/src/test/resources/goe.zip
 ```
 
 ### Tagging with integrated OpenNLP POS tagger directly to a new KorAP-XML ZIP file
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 491f088..43c3ea4 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -74,7 +74,8 @@
             "    ./build/bin/korapxmltool -f now /vol/corpora/DeReKo/current/KorAP/zip/*24.zip | pv > dach24.txt",
             "",
             "  Tag with external POS tagger:",
-            "    ./build/bin/korapxmltool -f zip -t marmot:models/de.marmot app/src/test/resources/goe.zip",
+            "    ./build/bin/korapxmltool -f zip -t marmot:de.marmot app/src/test/resources/goe.zip",
+            "    # (uses KORAPXMLTOOL_MODELS_PATH if model not found in current directory)",
             "",
             "  Use external spaCy annotation (without dependencies):",
             "    ./build/bin/korapxmltool -T4 -A \"docker run -e SPACY_USE_DEPENDENCIES=False --rm -i korap/conllu2spacy:latest\" -f zip ./app/src/test/resources/goe.zip",
@@ -83,8 +84,8 @@
             "    ./build/bin/korapxmltool -f krill -D out/krill app/src/test/resources/wud24_sample.zip app/src/test/resources/wud24_sample.spacy.zip app/src/test/resources/wud24_sample.marmot-malt.zip",
             "",
             "  Large corpus processing with custom memory and performance settings:",
-            "    KORAPXMLTOOL_XMX=500g KORAPXMLTOOL_JAVA_OPTS=\"-XX:+UseG1GC\" \\",
-            "        ./build/bin/korapxmltool --threads 100 -f zip -t marmot:models/de.marmot -P maltparser:models/de.malt wpd25*.zip"
+            "    KORAPXMLTOOL_XMX=500g KORAPXMLTOOL_MODELS_PATH=/data/models KORAPXMLTOOL_JAVA_OPTS=\"-XX:+UseG1GC\" \\",
+            "        ./build/bin/korapxmltool --threads 100 -f zip -t marmot:de.marmot -P maltparser:de.malt wpd25*.zip"
     ]
 )
 
@@ -288,6 +289,38 @@
 
     private var taggerName: String? = null
     private var taggerModel: String? = null
+    
+    // Store model path resolutions for logging after logger initialization
+    private val modelPathResolutions: MutableList<Pair<String, String>> = mutableListOf()
+    
+    // Helper function to resolve model path with default search directory
+    private fun resolveModelPath(modelPath: String): String? {
+        // If absolute path or relative path exists as-is, return it
+        if (File(modelPath).exists()) {
+            return modelPath
+        }
+        
+        // Check if KORAPXMLTOOL_MODELS_PATH environment variable is set
+        val defaultModelsPath = System.getenv("KORAPXMLTOOL_MODELS_PATH")
+        if (!defaultModelsPath.isNullOrBlank()) {
+            val resolvedPath = File(defaultModelsPath, modelPath).absolutePath
+            if (File(resolvedPath).exists()) {
+                return resolvedPath
+            }
+            
+            // If modelPath contains directory separators, try with just the filename
+            val fileName = File(modelPath).name
+            if (fileName != modelPath) {
+                val fileNamePath = File(defaultModelsPath, fileName).absolutePath
+                if (File(fileNamePath).exists()) {
+                    return fileNamePath
+                }
+            }
+        }
+        
+        // Model not found in any location
+        return null
+    }
     @Option(
         names = ["--tag-with", "-t"],
         paramLabel = "TAGGER:MODEL",
@@ -302,11 +335,25 @@
                     "value does not match the expected pattern ${taggerFoundries}:<path/to/model>", tagWith))
         } else {
             taggerName = matcher.group(1)
-            taggerModel = matcher.group(2)
-            if (!File(taggerModel!!).exists()) {
+            val originalModelPath = matcher.group(2)
+            val resolvedModelPath = resolveModelPath(originalModelPath)
+            
+            if (resolvedModelPath != null) {
+                taggerModel = resolvedModelPath
+                if (resolvedModelPath != originalModelPath) {
+                    // Store for logging after logger initialization
+                    modelPathResolutions.add(originalModelPath to resolvedModelPath)
+                }
+            } else {
+                val defaultModelsPath = System.getenv("KORAPXMLTOOL_MODELS_PATH")
+                val searchInfo = if (defaultModelsPath != null) {
+                    " (searched in current directory and KORAPXMLTOOL_MODELS_PATH='$defaultModelsPath')"
+                } else {
+                    " (searched in current directory; set KORAPXMLTOOL_MODELS_PATH environment variable to specify default model search path)"
+                }
                 throw ParameterException(spec.commandLine(),
-                    String.format(Locale.ROOT, "Invalid value for option '--tag-with':"+
-                        "model file '%s' does not exist", taggerModel, taggerModel))
+                    String.format(Locale.ROOT, "Invalid value for option '--tag-with': "+
+                        "model file '%s' does not exist%s", originalModelPath, searchInfo))
             }
         }
     }
@@ -327,11 +374,25 @@
                         "value does not match the expected pattern (${parserFoundries}):<path/to/model>", parseWith))
         } else {
             parserName = matcher.group(1)
-            parserModel = matcher.group(2)
-            if (!File(parserModel!!).exists()) {
+            val originalModelPath = matcher.group(2)
+            val resolvedModelPath = resolveModelPath(originalModelPath)
+            
+            if (resolvedModelPath != null) {
+                parserModel = resolvedModelPath
+                if (resolvedModelPath != originalModelPath) {
+                    // Store for logging after logger initialization
+                    modelPathResolutions.add(originalModelPath to resolvedModelPath)
+                }
+            } else {
+                val defaultModelsPath = System.getenv("KORAPXMLTOOL_MODELS_PATH")
+                val searchInfo = if (defaultModelsPath != null) {
+                    " (searched in current directory and KORAPXMLTOOL_MODELS_PATH='$defaultModelsPath')"
+                } else {
+                    " (searched in current directory; set KORAPXMLTOOL_MODELS_PATH environment variable to specify default model search path)"
+                }
                 throw ParameterException(spec.commandLine(),
-                    String.format(Locale.ROOT, "Invalid value for option '--parse-with':"+
-                            "model file '%s' does not exist", parserModel, parserModel))
+                    String.format(Locale.ROOT, "Invalid value for option '--parse-with': "+
+                            "model file '%s' does not exist%s", originalModelPath, searchInfo))
             }
         }
     }
@@ -355,6 +416,11 @@
         LOGGER.level = level
         handler.level = level  // Handler also needs to be set to the same level
 
+        // Log model path resolutions that occurred during parameter parsing
+        modelPathResolutions.forEach { (original, resolved) ->
+            LOGGER.info("Resolved model path '$original' to '$resolved'")
+        }
+
         if (lemmaOnly) {
             useLemma = true
             if (outputFormat != OutputFormat.WORD2VEC && outputFormat != OutputFormat.NOW) {