Fix foundry folder with -f zip

Change-Id: I55cd12cbf16e9fac4f46540be5cee35ccf6b6dc3
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index a03ede4..4d69378 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -62,6 +62,9 @@
 
     @Spec lateinit var spec : Model.CommandSpec
 
+    // When using --annotate-with, hold the external tool's foundry label (e.g., spacy, stanza)
+    private var externalFoundry: String? = null
+
     @Parameters(arity = "1..*", description = ["At least one zip file name"])
     var zipFileNames: Array<String>? = null
 
@@ -365,44 +368,36 @@
         entryExecutor = Executors.newFixedThreadPool(maxThreads)
 
         if (annotateWith.isNotEmpty()) {
-            // Initialize ZIP output stream BEFORE creating worker pool, if needed
-            if (outputFormat == OutputFormat.KORAPXML) {
-                // Determine output filename
-                val inputZipPath = args[0] // First ZIP file
-                var targetFoundry = "base"
-                when {
-                    annotateWith.contains("spacy") -> targetFoundry = "spacy"
-                    annotateWith.contains("stanza") -> targetFoundry = "stanza"
-                    annotateWith.contains("udpipe") -> targetFoundry = "udpipe"
-                    annotateWith.contains("tree") -> targetFoundry = "tree_tagger"
-                    annotateWith.contains("marmot") -> targetFoundry = "marmot"
-                    annotateWith.contains("opennlp") -> targetFoundry = "opennlp"
-                    annotateWith.contains("corenlp") -> targetFoundry = "corenlp"
-                    else -> targetFoundry = "annotated"
-                }
+            // Detect external foundry label once from annotateWith command
+            externalFoundry = detectFoundryFromAnnotateCmd(annotateWith)
+             // Initialize ZIP output stream BEFORE creating worker pool, if needed
+             if (outputFormat == OutputFormat.KORAPXML) {
+                 // Determine output filename
+                 val inputZipPath = args[0] // First ZIP file
+                val targetFoundry = externalFoundry ?: "annotated"
 
-                val outputMorphoZipFileName = inputZipPath.replace(Regex("\\.zip$"), ".".plus(targetFoundry).plus(".zip"))
-                LOGGER.info("Initializing output ZIP: $outputMorphoZipFileName (from input: $inputZipPath, foundry: $targetFoundry)")
+                 val outputMorphoZipFileName = inputZipPath.replace(Regex("\\.zip$"), ".".plus(targetFoundry).plus(".zip"))
+                 LOGGER.info("Initializing output ZIP: $outputMorphoZipFileName (from input: $inputZipPath, foundry: $targetFoundry)")
 
-                if (File(outputMorphoZipFileName).exists() && !overwrite) {
-                    LOGGER.severe("Output file $outputMorphoZipFileName already exists. Use --overwrite to overwrite.")
-                    exitProcess(1)
-                }
+                 if (File(outputMorphoZipFileName).exists() && !overwrite) {
+                     LOGGER.severe("Output file $outputMorphoZipFileName already exists. Use --overwrite to overwrite.")
+                     exitProcess(1)
+                 }
 
-                // Delete old file if it exists
-                if (File(outputMorphoZipFileName).exists()) {
-                    LOGGER.info("Deleting existing file: $outputMorphoZipFileName")
-                    File(outputMorphoZipFileName).delete()
-                }
+                 // Delete old file if it exists
+                 if (File(outputMorphoZipFileName).exists()) {
+                     LOGGER.info("Deleting existing file: $outputMorphoZipFileName")
+                     File(outputMorphoZipFileName).delete()
+                 }
 
-                dbFactory = DocumentBuilderFactory.newInstance()
-                dBuilder = dbFactory!!.newDocumentBuilder()
-                val fileOutputStream = FileOutputStream(outputMorphoZipFileName)
-                morphoZipOutputStream = ZipArchiveOutputStream(fileOutputStream).apply {
-                    setUseZip64(Zip64Mode.Always)
-                }
-                LOGGER.info("Initialized morphoZipOutputStream for external annotation to: $outputMorphoZipFileName")
-            }
+                 dbFactory = DocumentBuilderFactory.newInstance()
+                 dBuilder = dbFactory!!.newDocumentBuilder()
+                 val fileOutputStream = FileOutputStream(outputMorphoZipFileName)
+                 morphoZipOutputStream = ZipArchiveOutputStream(fileOutputStream).apply {
+                     setUseZip64(Zip64Mode.Always)
+                 }
+                 LOGGER.info("Initialized morphoZipOutputStream for external annotation to: $outputMorphoZipFileName")
+             }
 
             if (outputFormat == OutputFormat.KORAPXML) {
                 // For ZIP output with external annotation, we need a custom handler
@@ -612,17 +607,7 @@
             if (labelParts.isNotEmpty()) {
                 targetFoundry = labelParts.joinToString("-")
             } else if (annotateWith.isNotEmpty()) {
-                // Try to detect foundry from external annotation command
-                when {
-                    annotateWith.contains("spacy") -> targetFoundry = "spacy"
-                    annotateWith.contains("stanza") -> targetFoundry = "stanza"
-                    annotateWith.contains("udpipe") -> targetFoundry = "udpipe"
-                    annotateWith.contains("tree") -> targetFoundry = "tree_tagger"
-                    annotateWith.contains("marmot") -> targetFoundry = "marmot"
-                    annotateWith.contains("opennlp") -> targetFoundry = "opennlp"
-                    annotateWith.contains("corenlp") -> targetFoundry = "corenlp"
-                    else -> targetFoundry = "annotated"
-                }
+                targetFoundry = externalFoundry ?: detectFoundryFromAnnotateCmd(annotateWith)
                 LOGGER.info("Detected foundry '$targetFoundry' from annotation command: $annotateWith")
             }
             dbFactory = DocumentBuilderFactory.newInstance()
@@ -1012,6 +997,20 @@
         }
     }
 
+    private fun detectFoundryFromAnnotateCmd(cmd: String): String {
+        val lower = cmd.lowercase(Locale.getDefault())
+        return when {
+            lower.contains("spacy") -> "spacy"
+            lower.contains("stanza") -> "stanza"
+            lower.contains("udpipe") -> "udpipe"
+            lower.contains("tree") -> "tree_tagger"
+            lower.contains("marmot") -> "marmot"
+            lower.contains("opennlp") -> "opennlp"
+            lower.contains("corenlp") -> "corenlp"
+            else -> "annotated"
+        }
+    }
+
     private fun processText(
         docId: String,
         foundry: String,
@@ -1056,13 +1055,18 @@
         if (annotationWorkerPool != null) {
             if (outputFormat == OutputFormat.KORAPXML) {
                 // Store metadata in task, send clean CoNLL-U to external process
-                val entryPath = if (parserName != null)  docId.replace(Regex("[_.]"), "/").plus("/$parserName/").plus("dependency.xml")
+                // Use external foundry label for folder names when using --annotate-with
+                val targetFoundry = externalFoundry
+                    ?: taggerToolBridges[Thread.currentThread().threadId()]?.foundry
+                    ?: (parserName ?: morphoFoundry)
+                val entryPath = if (parserName != null)
+                    docId.replace(Regex("[_.]"), "/") + "/$targetFoundry/dependency.xml"
                 else
-                    docId.replace(Regex("[_.]"), "/").plus("/$morphoFoundry/").plus("morpho.xml")
-                LOGGER.fine("Sending document $docId (${output.length} chars) to annotation worker pool for ZIP output")
-                // Pass metadata via AnnotationTask, NOT in the text itself
-                annotationWorkerPool?.pushToQueue(output.toString(), docId, entryPath + "|" + foundry)
-                docsSentToAnnotation.incrementAndGet()
+                    docId.replace(Regex("[_.]"), "/") + "/$targetFoundry/morpho.xml"
+                 LOGGER.fine("Sending document $docId (${output.length} chars) to annotation worker pool for ZIP output")
+                 // Pass metadata via AnnotationTask, NOT in the text itself
+                annotationWorkerPool?.pushToQueue(output.toString(), docId, entryPath + "|" + targetFoundry)
+                 docsSentToAnnotation.incrementAndGet()
             } else {
                 LOGGER.fine("Sending document $docId (${output.length} chars) to annotation worker pool")
                 annotationWorkerPool?.pushToQueue(output.toString())
diff --git a/app/src/test/resources/goe.spacy.zip b/app/src/test/resources/goe.spacy.zip
index b934e51..faf4d86 100644
--- a/app/src/test/resources/goe.spacy.zip
+++ b/app/src/test/resources/goe.spacy.zip
Binary files differ