Make sure the foundry is picked up from external annotators (-A)

Resolves #8

Change-Id: I29a284ef24b90c01ada84eee7bfb8fedf7169d7f
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 6a5ff6b..56852ca 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -1228,6 +1228,31 @@
                     morphoZipOutputStream!!.flush()
                     morphoZipOutputStream!!.close()
                     LOGGER.info("Closed output ZIP file after annotation processing")
+
+                    // Rename ZIP file if foundry was detected from CoNLL-U output
+                    if (targetZipFileName != null && externalFoundry != null) {
+                        val currentFile = File(targetZipFileName!!)
+                        val baseZipName = File(args[0]).name.replace(Regex("\\.zip$"), "")
+                        val newFileName = File(outputDir, "$baseZipName.$externalFoundry.zip").absolutePath
+                        
+                        if (currentFile.absolutePath != newFileName) {
+                            val newFile = File(newFileName)
+                            if (currentFile.renameTo(newFile)) {
+                                LOGGER.info("Renamed output ZIP from ${currentFile.name} to ${newFile.name} based on detected foundry")
+                                
+                                // Also rename the log file
+                                val oldLogFile = File(targetZipFileName!!.replace(Regex("\\.zip$"), ".log"))
+                                val newLogFile = File(newFileName.replace(Regex("\\.zip$"), ".log"))
+                                if (oldLogFile.exists() && oldLogFile.renameTo(newLogFile)) {
+                                    LOGGER.info("Renamed log file from ${oldLogFile.name} to ${newLogFile.name}")
+                                }
+                                
+                                targetZipFileName = newFileName
+                            } else {
+                                LOGGER.warning("Failed to rename ZIP file from ${currentFile.absolutePath} to $newFileName")
+                            }
+                        }
+                    }
                 } catch (e: Exception) {
                     LOGGER.severe("ERROR closing ZIP file: ${e.message}")
                     e.printStackTrace()
@@ -3643,9 +3668,17 @@
         val sentenceSpans = mutableListOf<Span>()
         var sentenceStartOffset: Int? = null
         var sentenceEndOffset: Int? = null
+        var extractedFoundry: String? = null
 
         for (line in lines) {
             when {
+                line.startsWith("# foundry =") -> {
+                    val foundryStr = line.substring("# foundry =".length).trim()
+                    if (foundryStr.isNotEmpty()) {
+                        extractedFoundry = foundryStr
+                        LOGGER.fine("Extracted foundry from CoNLL-U output: $extractedFoundry")
+                    }
+                }
                 line.startsWith("# start_offsets =") -> {
                     val offsetsStr = line.substring("# start_offsets =".length).trim()
                     val allOffsets = offsetsStr.split(Regex("\\s+")).mapNotNull { it.toIntOrNull() }
@@ -3740,10 +3773,20 @@
             }
         }
 
+        // Use extracted foundry from CoNLL-U output if available
+        val actualFoundry = if (extractedFoundry != null) {
+            LOGGER.info("Using foundry from CoNLL-U output: $extractedFoundry (was: $foundry)")
+            // Update the global externalFoundry variable for consistent naming
+            externalFoundry = extractedFoundry
+            extractedFoundry
+        } else {
+            foundry
+        }
+
         try {
             val context = de.ids_mannheim.korapxmltools.formatters.OutputContext(
                 docId = tempDocId,
-                foundry = foundry,
+                foundry = actualFoundry,
                 tokens = tokens[tempDocId],
                 sentences = sentences[tempDocId],
                 text = texts[tempDocId],
@@ -3766,7 +3809,7 @@
                 "docid=\"$docId\""
             )
 
-            val morphoEntryPath = docId.replace(Regex("[_.]"), "/") + "/$foundry/morpho.xml"
+            val morphoEntryPath = docId.replace(Regex("[_.]"), "/") + "/$actualFoundry/morpho.xml"
 
             val morphoZipEntry = ZipArchiveEntry(morphoEntryPath)
             morphoZipEntry.unixMode = ZIP_ENTRY_UNIX_MODE
@@ -3786,7 +3829,7 @@
             try {
                 val context = de.ids_mannheim.korapxmltools.formatters.OutputContext(
                     docId = tempDocId,
-                    foundry = foundry,
+                    foundry = actualFoundry,
                     tokens = tokens[tempDocId],
                     sentences = sentences[tempDocId],
                     text = texts[tempDocId],
@@ -3809,7 +3852,7 @@
                     "docid=\"$docId\""
                 )
 
-                val dependencyEntryPath = docId.replace(Regex("[_.]"), "/") + "/$foundry/dependency.xml"
+                val dependencyEntryPath = docId.replace(Regex("[_.]"), "/") + "/$actualFoundry/dependency.xml"
 
                 val dependencyZipEntry = ZipArchiveEntry(dependencyEntryPath)
                 dependencyZipEntry.unixMode = ZIP_ENTRY_UNIX_MODE
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlFormatterTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlFormatterTest.kt
index 860aa22..1a2dbb6 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlFormatterTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlFormatterTest.kt
@@ -8,6 +8,7 @@
 import java.net.URL
 import kotlin.test.Test
 import kotlin.test.assertEquals
+import kotlin.test.assertNotNull
 import kotlin.test.assertTrue
 
 /**
@@ -99,20 +100,15 @@
             val outputZip = File(outputDir, "wud24_sample.corenlp.zip")
             assertTrue(outputZip.exists(), "Output ZIP should exist at ${outputZip.path}")
 
-            val constituencyFiles = mutableListOf<String>()
-            ProcessBuilder("unzip", "-l", outputZip.path)
-                .redirectOutput(ProcessBuilder.Redirect.PIPE)
-                .start()
-                .inputStream
-                .bufferedReader()
-                .useLines { lines ->
-                    lines.forEach { line ->
-                        if (line.contains("constituency.xml")) {
-                            constituencyFiles.add(line.trim())
-                        }
-                    }
+            // Get all ZIP entries
+            val zipEntries = org.apache.commons.compress.archivers.zip.ZipFile.builder()
+                .setFile(outputZip)
+                .get()
+                .use { zip ->
+                    zip.entries.asSequence().map { it.name }.toList()
                 }
 
+            val constituencyFiles = zipEntries.filter { it.contains("constituency.xml") }
             assertTrue(constituencyFiles.isNotEmpty(), "Should have constituency.xml files in output")
 
             val expectedDocs = listOf(
@@ -126,21 +122,70 @@
                 assertTrue(found, "Should have constituency.xml for $docPath")
             }
 
-            val morphoFiles = mutableListOf<String>()
-            ProcessBuilder("unzip", "-l", outputZip.path)
-                .redirectOutput(ProcessBuilder.Redirect.PIPE)
-                .start()
-                .inputStream
-                .bufferedReader()
-                .useLines { lines ->
-                    lines.forEach { line ->
-                        if (line.contains("/corenlp/morpho.xml")) {
-                            morphoFiles.add(line.trim())
-                        }
-                    }
+            val morphoFiles = zipEntries.filter { it.contains("/corenlp/morpho.xml") }
+            assertTrue(morphoFiles.size >= 3, "Should have morpho.xml files for at least 3 documents")
+
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun externalFoundryDetection() {
+        val baseZip = loadResource("wdd17sample.zip").path
+        val cmcConlluAnnotation = loadResource("wdd17sample.cmc.conllu").path
+        val outputDir = File.createTempFile("external_foundry_test", "").apply {
+            delete()
+            mkdirs()
+        }
+
+        try {
+            val args = arrayOf(
+                "-f",
+                "-t", "zip",
+                "-q",
+                "-D", outputDir.path,
+                "-j", "1",
+                "-A", "cat > /dev/null; cat $cmcConlluAnnotation",
+                baseZip
+            )
+
+            val exitCode = debug(args)
+            assertEquals(0, exitCode, "External annotation processing should succeed")
+
+            // Check that output ZIP has "cmc" in filename, not "annotated"
+            val outputZip = File(outputDir, "wdd17sample.cmc.zip")
+            assertTrue(outputZip.exists(), "Output ZIP should exist at ${outputZip.path} with 'cmc' foundry name")
+
+            // Verify internal structure contains cmc folders
+            val zipEntries = org.apache.commons.compress.archivers.zip.ZipFile.builder()
+                .setFile(outputZip)
+                .get()
+                .use { zip ->
+                    zip.entries.asSequence().map { it.name }.toList()
                 }
 
-            assertTrue(morphoFiles.size >= 3, "Should have morpho.xml files for at least 3 documents")
+            val cmcFolders = zipEntries.filter { it.contains("/cmc/") }
+            assertTrue(cmcFolders.isNotEmpty(), "Should have cmc folders in output ZIP structure")
+
+            // Verify no "annotated" folders exist
+            val annotatedFolders = zipEntries.filter { it.contains("/annotated/") }
+            assertTrue(annotatedFolders.isEmpty(), "Should NOT have 'annotated' folders in output ZIP structure")
+
+            // Verify morpho.xml contains CMC annotations (EMOASC, EMOIMG, URL)
+            val morphoXmlPath = "WDD17/B06/45592/cmc/morpho.xml"
+            val morphoXml = org.apache.commons.compress.archivers.zip.ZipFile.builder()
+                .setFile(outputZip)
+                .get()
+                .use { zip ->
+                    val entry = zip.getEntry(morphoXmlPath)
+                    assertNotNull(entry, "Should contain $morphoXmlPath")
+                    zip.getInputStream(entry).bufferedReader(java.nio.charset.StandardCharsets.UTF_8).use { it.readText() }
+                }
+
+            assertTrue(morphoXml.contains("EMOASC"), "morpho.xml should contain EMOASC annotations")
+            assertTrue(morphoXml.contains("EMOIMG"), "morpho.xml should contain EMOIMG annotations")
+            assertTrue(morphoXml.contains("URL"), "morpho.xml should contain URL annotations")
 
         } finally {
             outputDir.deleteRecursively()