Add conllu2korapxml part

Change-Id: Ic74a2e68e0a6c73a8d3e16ee8bf1b787d51219e2
diff --git a/app/build.gradle b/app/build.gradle
index fa09110..ac3cd4f 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -151,9 +151,11 @@
     def targetExec = new File(binDir, "korapxmltool")
     def krillExec = new File(binDir, "korapxml2krill")
     def conlluExec = new File(binDir, "korapxml2conllu")
+    def conllu2korapxmlExec = new File(binDir, "conllu2korapxml")
     outputs.file(targetExec)
     outputs.file(krillExec)
     outputs.file(conlluExec)
+    outputs.file(conllu2korapxmlExec)
 
     doLast {
         def shebang = rootProject.file("korapxmltool.shebang")
@@ -205,6 +207,22 @@
             java.nio.file.Files.copy(targetExec.toPath(), conlluExec.toPath())
             conlluExec.setExecutable(true, false)
         }
+        
+        // Create conllu2korapxml symlink for CoNLL-U to KorAP XML ZIP conversion
+        if (conllu2korapxmlExec.exists()) {
+            conllu2korapxmlExec.delete()
+        }
+        try {
+            java.nio.file.Files.createSymbolicLink(
+                conllu2korapxmlExec.toPath(),
+                java.nio.file.Paths.get("korapxmltool")
+            )
+            println "Created symlink: conllu2korapxml -> korapxmltool"
+        } catch (Exception e) {
+            println "Warning: Could not create conllu2korapxml symlink (${e.message}), copying instead"
+            java.nio.file.Files.copy(targetExec.toPath(), conllu2korapxmlExec.toPath())
+            conllu2korapxmlExec.setExecutable(true, false)
+        }
     }
 }
 
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 149b248..2b391b5 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -71,6 +71,13 @@
             "  Basic conversion to CoNLL-U format:",
             "    ./build/bin/korapxmltool app/src/test/resources/wdf19.tree_tagger.zip | head -10",
             "",
+            "  CoNLL-U to KorAP XML ZIP conversion (auto-detects foundry from comments):",
+            "    ./build/bin/conllu2korapxml file.conllu",
+            "    cat file.conllu | ./build/bin/conllu2korapxml -o output.zip",
+            "    ./build/bin/korapxmltool -t zip -F custom file.conllu",
+            "    # Note: Foundry auto-detected from '# foundry = <name>' comment; override with -F",
+            "    # Note: Output path auto-inferred (file.conllu → file.zip) or specify with -o",
+            "",
             "  Word2Vec style output:",
             "    ./build/bin/korapxmltool -t w2v app/src/test/resources/wud24_sample.zip",
             "",
@@ -107,7 +114,7 @@
     private var targetZipFileName: String? = null
     // Locale is now globally forced to ROOT at startup (see main())
 
-    @Parameters(arity = "1..*", description = ["At least one zip file name"])
+    @Parameters(arity = "0..*", description = ["Input files: KorAP-XML ZIP files or CoNLL-U files (.conllu). If omitted, reads from stdin (requires -o for output path)."])
     var zipFileNames: Array<String>? = null
 
     @Option(
@@ -263,6 +270,20 @@
     var outputDir: String = "."
 
     @Option(
+        names = ["-o", "--output"],
+        paramLabel = "FILE",
+        description = ["Output file path (for CoNLL-U to ZIP conversion). Required when reading from stdin."]
+    )
+    var outputFile: String? = null
+
+    @Option(
+        names = ["-F", "--foundry"],
+        paramLabel = "FOUNDRY",
+        description = ["Override foundry name for CoNLL-U input (default: auto-detect from '# foundry = <name>' comment)"]
+    )
+    var foundryOverride: String? = null
+
+    @Option(
         names = ["--mem-stats-interval"],
         paramLabel = "N",
         description = ["Log memory and cache statistics every N processed documents (0 disables; default: 0)"]
@@ -504,6 +525,73 @@
             })
         }
 
+        // CoNLL-U to KorAP XML ZIP conversion mode
+        val isConlluInput = zipFileNames == null || zipFileNames!!.isEmpty() || 
+                           zipFileNames!!.any { it.endsWith(".conllu") }
+        
+        if (isConlluInput) {
+            // Validate: CoNLL-U mode requires -t zip (default or explicit)
+            if (outputFormat != OutputFormat.KORAP_XML) {
+                throw ParameterException(spec.commandLine(), 
+                    "CoNLL-U input requires output format 'zip' (use -t zip or invoke as 'conllu2korapxml')")
+            }
+
+            when {
+                // Case 1: stdin input (no files specified)
+                zipFileNames == null || zipFileNames!!.isEmpty() -> {
+                    if (outputFile == null) {
+                        throw ParameterException(spec.commandLine(),
+                            "Reading from stdin requires -o/--output to specify output file path")
+                    }
+                    val finalOutputPath = if (outputDir != ".") {
+                        File(outputDir, File(outputFile!!).name).path
+                    } else {
+                        outputFile!!
+                    }
+                    LOGGER.info("Converting CoNLL-U from stdin to: $finalOutputPath")
+                    convertConlluToZip(System.`in`, finalOutputPath)
+                    return 0
+                }
+                
+                // Case 2: CoNLL-U file(s) specified
+                zipFileNames!!.all { it.endsWith(".conllu") } -> {
+                    zipFileNames!!.forEach { conlluFile ->
+                        val outputPath = when {
+                            outputFile != null -> {
+                                // Explicit -o specified: use outputDir if specified
+                                if (outputDir != ".") {
+                                    File(outputDir, File(outputFile!!).name).path
+                                } else {
+                                    outputFile!!
+                                }
+                            }
+                            else -> {
+                                // Auto-infer from input filename
+                                val baseName = File(conlluFile).name.replace(Regex("\\.conllu$"), ".zip")
+                                if (outputDir != ".") {
+                                    File(outputDir, baseName).path
+                                } else {
+                                    conlluFile.replace(Regex("\\.conllu$"), ".zip")
+                                }
+                            }
+                        }
+                        LOGGER.info("Converting CoNLL-U file: $conlluFile → $outputPath")
+                        FileInputStream(conlluFile).use { inputStream ->
+                            convertConlluToZip(inputStream, outputPath)
+                        }
+                    }
+                    return 0
+                }
+                
+                // Case 3: Mixed input (some .conllu, some .zip) - not supported
+                else -> {
+                    throw ParameterException(spec.commandLine(),
+                        "Cannot mix CoNLL-U (.conllu) and ZIP files in the same invocation")
+                }
+            }
+        }
+
+        // Normal ZIP processing mode
         LOGGER.info("Processing zip files: " + zipFileNames!!.joinToString(", "))
 
         korapxml2conllu(zipFileNames!!)
@@ -3265,6 +3353,298 @@
         sentences.remove(tempDocId)
     }
 
+    /**
+     * Convert CoNLL-U input to KorAP XML ZIP format
+     * Supports:
+     * - Auto-detection of foundry from "# foundry = <name>" comment
+     * - Manual foundry override via -F option
+     * - Multi-document input (split on "# text_id" changes)
+     * - Combined foundries (e.g., "marmot-malt" → marmot/morpho.xml + malt/dependency.xml)
+     * - Text ID to path conversion (WUD24_I0083.95367 → WUD24/I0083/95367)
+     */
+    private fun convertConlluToZip(inputStream: InputStream, outputPath: String) {
+        LOGGER.info("Converting CoNLL-U to KorAP XML ZIP: $outputPath")
+
+        // Initialize DocumentBuilder for XML generation
+        if (dBuilder == null) {
+            dbFactory = DocumentBuilderFactory.newInstance()
+            dBuilder = dbFactory!!.newDocumentBuilder()
+        }
+
+        // Parse text_id to derive directory path: WUD24_I0083.95367 → WUD24/I0083/95367
+        fun textIdToPath(textId: String): String {
+            val parts = textId.split('_', limit = 2)
+            if (parts.size < 2) return textId.replace('.', '/')
+            val corpus = parts[0]
+            val remainder = parts[1].replace('.', '/')
+            return "$corpus/$remainder"
+        }
+
+        // Read all input and split into documents
+        data class ConlluDocument(
+            val textId: String,
+            val foundry: String,
+            val lines: List<String>
+        )
+
+        val documents = mutableListOf<ConlluDocument>()
+        val reader = BufferedReader(InputStreamReader(inputStream, StandardCharsets.UTF_8))
+        var currentTextId: String? = null
+        var currentFoundry: String? = null
+        var currentLines = mutableListOf<String>()
+
+        reader.forEachLine { line ->
+            when {
+                line.startsWith("# text_id = ") -> {
+                    // Save previous document if exists
+                    if (currentTextId != null && currentFoundry != null && currentLines.isNotEmpty()) {
+                        documents.add(ConlluDocument(currentTextId!!, currentFoundry!!, currentLines.toList()))
+                        currentLines = mutableListOf()
+                    }
+                    currentTextId = line.substring("# text_id = ".length).trim()
+                }
+                line.startsWith("# foundry = ") -> {
+                    val detectedFoundry = line.substring("# foundry = ".length).trim()
+                    currentFoundry = foundryOverride ?: detectedFoundry
+                }
+                else -> {
+                    currentLines.add(line)
+                }
+            }
+        }
+
+        // Add final document
+        if (currentTextId != null && currentFoundry != null && currentLines.isNotEmpty()) {
+            documents.add(ConlluDocument(currentTextId!!, currentFoundry!!, currentLines.toList()))
+        }
+
+        if (documents.isEmpty()) {
+            LOGGER.severe("No documents found in CoNLL-U input (missing '# text_id' and '# foundry' comments)")
+            throw IllegalArgumentException("Invalid CoNLL-U format: missing required comments '# text_id' and '# foundry'")
+        }
+
+        LOGGER.info("Found ${documents.size} document(s) in CoNLL-U input")
+
+        // Create output ZIP
+        val outputFile = File(outputPath)
+        if (outputFile.exists() && !overwrite) {
+            LOGGER.severe("Output file already exists: $outputPath (use -f to overwrite)")
+            throw IOException("Output file already exists: $outputPath")
+        }
+
+        val zipOutputStream = ZipArchiveOutputStream(BufferedOutputStream(FileOutputStream(outputFile)))
+        zipOutputStream.setUseZip64(Zip64Mode.AsNeeded)
+
+        try {
+            // Process each document
+            documents.forEach { doc ->
+                LOGGER.fine("Processing document: ${doc.textId}, foundry: ${doc.foundry}")
+
+                // Parse CoNLL-U content
+                val morphoSpans = mutableMapOf<String, MorphoSpan>()
+                var currentStartOffsets: List<Int>? = null
+                var currentEndOffsets: List<Int>? = null
+                var tokenIndexInSentence = 0
+                val sentenceSpans = mutableListOf<Span>()
+                var sentenceStartOffset: Int? = null
+                var sentenceEndOffset: Int? = null
+
+                for (line in doc.lines) {
+                    when {
+                        line.startsWith("# start_offsets =") -> {
+                            val offsetsStr = line.substring("# start_offsets =".length).trim()
+                            val allOffsets = offsetsStr.split(Regex("\\s+")).mapNotNull { it.toIntOrNull() }
+                            if (allOffsets.isEmpty()) {
+                                LOGGER.severe("Missing start_offsets for text ${doc.textId}")
+                                throw IllegalArgumentException("CoNLL-U format error: missing start_offsets for text ${doc.textId}")
+                            }
+                            sentenceStartOffset = allOffsets.firstOrNull()
+                            currentStartOffsets = if (allOffsets.size > 1) allOffsets.drop(1) else allOffsets
+                            tokenIndexInSentence = 0
+                        }
+                        line.startsWith("# end_offsets =") -> {
+                            val offsetsStr = line.substring("# end_offsets =".length).trim()
+                            val allOffsets = offsetsStr.split(Regex("\\s+")).mapNotNull { it.toIntOrNull() }
+                            if (allOffsets.isEmpty()) {
+                                LOGGER.severe("Missing end_offsets for text ${doc.textId}")
+                                throw IllegalArgumentException("CoNLL-U format error: missing end_offsets for text ${doc.textId}")
+                            }
+                            sentenceEndOffset = allOffsets.firstOrNull()
+                            currentEndOffsets = if (allOffsets.size > 1) allOffsets.drop(1) else emptyList()
+                        }
+                        line.isEmpty() -> {
+                            // Sentence boundary
+                            if (sentenceStartOffset != null && sentenceEndOffset != null) {
+                                sentenceSpans.add(Span(sentenceStartOffset!!, sentenceEndOffset!!))
+                            }
+                            sentenceStartOffset = null
+                            sentenceEndOffset = null
+                            currentStartOffsets = null
+                            currentEndOffsets = null
+                            tokenIndexInSentence = 0
+                        }
+                        !line.startsWith("#") -> {
+                            val fields = line.split("\t")
+                            if (fields.size < 10) continue
+
+                            val lemma = if (fields.size > 2) fields[2] else "_"
+                            val upos = if (fields.size > 3) fields[3] else "_"
+                            val xpos = if (fields.size > 4) fields[4] else "_"
+                            val feats = if (fields.size > 5) fields[5] else "_"
+                            val head = if (fields.size > 6) fields[6] else "_"
+                            val deprel = if (fields.size > 7) fields[7] else "_"
+                            val deps = if (fields.size > 8) fields[8] else "_"
+                            val misc = if (fields.size > 9) fields[9] else "_"
+
+                            if (currentStartOffsets == null || currentEndOffsets == null) {
+                                LOGGER.severe("Token found before offset comments in text ${doc.textId}")
+                                throw IllegalArgumentException("CoNLL-U format error: tokens found before offset comments in text ${doc.textId}")
+                            }
+
+                            if (tokenIndexInSentence < currentStartOffsets.size &&
+                                tokenIndexInSentence < currentEndOffsets.size) {
+
+                                val spanFrom = currentStartOffsets[tokenIndexInSentence]
+                                val spanTo = currentEndOffsets[tokenIndexInSentence]
+                                val spanKey = "$spanFrom-$spanTo"
+
+                                morphoSpans[spanKey] = MorphoSpan(lemma, upos, xpos, feats, head, deprel, deps, misc)
+                                tokenIndexInSentence++
+                            }
+                        }
+                    }
+                }
+
+                // Capture final sentence if not ended with empty line
+                if (sentenceStartOffset != null && sentenceEndOffset != null) {
+                    sentenceSpans.add(Span(sentenceStartOffset!!, sentenceEndOffset!!))
+                }
+
+                if (morphoSpans.isEmpty()) {
+                    LOGGER.warning("No morpho spans found for text ${doc.textId}, skipping")
+                    return@forEach
+                }
+
+                // Determine which layers to generate based on foundry and content
+                val hasDependencies = morphoSpans.values.any { span ->
+                    span.head != null && span.head != "_" && span.deprel != null && span.deprel != "_"
+                }
+
+                // Get foundry names for each layer (handles combined foundries like "marmot-malt")
+                val morphoFoundry = getFoundryForLayer(doc.foundry, "morpho")
+                val dependencyFoundry = if (hasDependencies) getFoundryForLayer(doc.foundry, "dependency") else null
+
+                // Store data in temp maps for XML generation
+                val tempDocId = "_temp_conllu_${doc.textId}"
+                morpho[tempDocId] = morphoSpans
+                if (sentenceSpans.isNotEmpty()) {
+                    sentences[tempDocId] = sentenceSpans.toTypedArray()
+                } else if (morphoSpans.isNotEmpty()) {
+                    // Fallback: create single sentence spanning all tokens
+                    val minOffset = morphoSpans.keys.minOfOrNull { it.split("-")[0].toInt() } ?: 0
+                    val maxOffset = morphoSpans.keys.maxOfOrNull { it.split("-")[1].toInt() } ?: 0
+                    sentences[tempDocId] = arrayOf(Span(minOffset, maxOffset))
+                }
+
+                // Generate morpho.xml
+                try {
+                    val basePath = textIdToPath(doc.textId)
+                    val morphoPath = "$basePath/$morphoFoundry/morpho.xml"
+
+                    val context = de.ids_mannheim.korapxmltools.formatters.OutputContext(
+                        docId = tempDocId,
+                        foundry = morphoFoundry,
+                        tokens = getTokenSpansFromMorho(morphoSpans),
+                        sentences = sentences[tempDocId],
+                        text = null,
+                        morpho = morpho[tempDocId],
+                        metadata = null,
+                        extraFeatures = null,
+                        fileName = null,
+                        useLemma = useLemma,
+                        extractMetadataRegex = extractMetadataRegex,
+                        extractAttributesRegex = extractAttributesRegex,
+                        columns = columns,
+                        constituencyTrees = null,
+                        includeOffsetsInMisc = false,
+                        compatibilityMode = COMPATIBILITY_MODE,
+                        tokenSeparator = tokenSeparator
+                    )
+
+                    val morphoXmlOutput = KorapXmlFormatter.formatMorpho(context, dBuilder!!)
+                    val fixedMorphoXml = morphoXmlOutput.toString().replace(
+                        "docid=\"$tempDocId\"",
+                        "docid=\"${doc.textId}\""
+                    )
+
+                    val morphoZipEntry = ZipArchiveEntry(morphoPath)
+                    morphoZipEntry.unixMode = ZIP_ENTRY_UNIX_MODE
+                    zipOutputStream.putArchiveEntry(morphoZipEntry)
+                    zipOutputStream.write(fixedMorphoXml.toByteArray())
+                    zipOutputStream.closeArchiveEntry()
+
+                    LOGGER.fine("Wrote $morphoPath (${fixedMorphoXml.length} bytes)")
+                } catch (e: Exception) {
+                    LOGGER.severe("ERROR generating morpho.xml for ${doc.textId}: ${e.message}")
+                    throw e
+                }
+
+                // Generate dependency.xml if dependencies present
+                if (hasDependencies && dependencyFoundry != null) {
+                    try {
+                        val basePath = textIdToPath(doc.textId)
+                        val dependencyPath = "$basePath/$dependencyFoundry/dependency.xml"
+
+                        val context = de.ids_mannheim.korapxmltools.formatters.OutputContext(
+                            docId = tempDocId,
+                            foundry = dependencyFoundry,
+                            tokens = getTokenSpansFromMorho(morphoSpans),
+                            sentences = sentences[tempDocId],
+                            text = null,
+                            morpho = morpho[tempDocId],
+                            metadata = null,
+                            extraFeatures = null,
+                            fileName = null,
+                            useLemma = useLemma,
+                            extractMetadataRegex = extractMetadataRegex,
+                            extractAttributesRegex = extractAttributesRegex,
+                            columns = columns,
+                            constituencyTrees = null,
+                            includeOffsetsInMisc = false,
+                            compatibilityMode = COMPATIBILITY_MODE,
+                            tokenSeparator = tokenSeparator
+                        )
+
+                        val dependencyXmlOutput = KorapXmlFormatter.formatDependency(context, dBuilder!!)
+                        val fixedDependencyXml = dependencyXmlOutput.toString().replace(
+                            "docid=\"$tempDocId\"",
+                            "docid=\"${doc.textId}\""
+                        )
+
+                        val dependencyZipEntry = ZipArchiveEntry(dependencyPath)
+                        dependencyZipEntry.unixMode = ZIP_ENTRY_UNIX_MODE
+                        zipOutputStream.putArchiveEntry(dependencyZipEntry)
+                        zipOutputStream.write(fixedDependencyXml.toByteArray())
+                        zipOutputStream.closeArchiveEntry()
+
+                        LOGGER.fine("Wrote $dependencyPath (${fixedDependencyXml.length} bytes)")
+                    } catch (e: Exception) {
+                        LOGGER.severe("ERROR generating dependency.xml for ${doc.textId}: ${e.message}")
+                        throw e
+                    }
+                }
+
+                // Cleanup temp data
+                morpho.remove(tempDocId)
+                sentences.remove(tempDocId)
+            }
+
+            LOGGER.info("Successfully wrote ${documents.size} document(s) to $outputPath")
+        } finally {
+            zipOutputStream.close()
+        }
+    }
+
     // Collect structural spans from structure.xml for krill format
     private fun collectKrillStructureSpans(docId: String, spans: NodeList) {
         // Skip if already output (thread-safe check with ConcurrentHashMap.KeySet)
@@ -4468,6 +4848,35 @@
             System.err.println("korapxml2conllu compatibility mode: using conllu format")
             newArgs.toTypedArray()
         }
+        "conllu2korapxml" -> {
+            // Set zip output format for conllu2korapxml (CoNLL-U → KorAP XML ZIP)
+            val newArgs = mutableListOf<String>()
+            
+            // Always set zip output format
+            if (!args.contains("-t") && !args.contains("--to")) {
+                newArgs.add("-t")
+                newArgs.add("zip")
+            }
+            
+            var i = 0
+            while (i < args.size) {
+                val arg = args[i]
+                if (arg == "-t" || arg == "--to") {
+                    // If format is already specified, override with zip
+                    newArgs.add(arg)
+                    if (i + 1 < args.size) {
+                        i++
+                        newArgs.add("zip")
+                    }
+                } else {
+                    newArgs.add(arg)
+                }
+                i++
+            }
+            
+            System.err.println("conllu2korapxml mode: converting CoNLL-U to KorAP XML ZIP")
+            newArgs.toTypedArray()
+        }
         else -> args
     }
     
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/ConlluConversionTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/ConlluConversionTest.kt
new file mode 100644
index 0000000..c95e6ee
--- /dev/null
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/ConlluConversionTest.kt
@@ -0,0 +1,388 @@
+package de.ids_mannheim.korapxmltools
+
+import org.junit.After
+import org.junit.Before
+import java.io.ByteArrayOutputStream
+import java.io.File
+import java.io.PrintStream
+import java.net.URL
+import kotlin.test.Test
+import kotlin.test.assertEquals
+import kotlin.test.assertFalse
+import kotlin.test.assertTrue
+
+/**
+ * Tests for CoNLL-U to KorAP XML ZIP conversion functionality
+ */
+class ConlluConversionTest {
+    private val outContent = ByteArrayOutputStream(10000000)
+    private val errContent = ByteArrayOutputStream()
+    private val originalOut: PrintStream = System.out
+    private val originalErr: PrintStream = System.err
+
+    @Before
+    fun setUpStreams() {
+        System.setOut(PrintStream(outContent))
+        System.setErr(PrintStream(errContent))
+    }
+
+    @After
+    fun restoreStreams() {
+        System.setOut(originalOut)
+        System.setErr(originalErr)
+    }
+
+    private fun loadResource(path: String): URL {
+        val resource = Thread.currentThread().contextClassLoader.getResource(path)
+        requireNotNull(resource) { "Resource $path not found" }
+        return resource
+    }
+
+    private fun createTempDir(prefix: String): File {
+        return File.createTempFile(prefix, "").apply {
+            delete()
+            mkdirs()
+        }
+    }
+
+    @Test
+    fun canConvertBasicConlluToZip() {
+        val outputDir = createTempDir("conllu_basic")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.spacy.conllu").path
+            )
+            val exitCode = debug(args)
+            assertEquals(0, exitCode, "CoNLL-U conversion should succeed")
+            assertTrue(outputZip.exists(), "Output ZIP should be created")
+            assertTrue(outputZip.length() > 0, "Output ZIP should not be empty")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun conlluZipContainsMorphoXml() {
+        val outputDir = createTempDir("conllu_morpho")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.spacy.conllu").path
+            )
+            debug(args)
+            
+            val zipEntries = extractZipFileList(outputZip)
+            assertTrue(zipEntries.any { it.contains("spacy/morpho.xml") }, 
+                "ZIP should contain morpho.xml files")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun conlluZipContainsDependencyXml() {
+        val outputDir = createTempDir("conllu_dependency")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.spacy.conllu").path
+            )
+            debug(args)
+            
+            val zipEntries = extractZipFileList(outputZip)
+            assertTrue(zipEntries.any { it.contains("spacy/dependency.xml") }, 
+                "ZIP should contain dependency.xml files when dependencies present")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun canAutoInferOutputFilename() {
+        val outputDir = createTempDir("conllu_autoinfer")
+        try {
+            // Copy test file to temp dir
+            val inputFile = File(outputDir, "test.conllu")
+            File(loadResource("wud24_sample.spacy.conllu").path).copyTo(inputFile)
+            
+            val args = arrayOf(
+                "-t", "zip",
+                "-D", outputDir.path,
+                inputFile.path
+            )
+            val exitCode = debug(args)
+            assertEquals(0, exitCode, "CoNLL-U conversion should succeed")
+            
+            val outputZip = File(outputDir, "test.zip")
+            assertTrue(outputZip.exists(), "Output ZIP should be auto-inferred as test.zip")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun respectsOutputDirOption() {
+        val outputDir = createTempDir("conllu_output_dir")
+        try {
+            val inputFile = File(outputDir, "input.conllu")
+            File(loadResource("wud24_sample.spacy.conllu").path).copyTo(inputFile)
+            
+            val args = arrayOf(
+                "-t", "zip",
+                "-D", outputDir.path,
+                inputFile.path
+            )
+            debug(args)
+            
+            val outputZip = File(outputDir, "input.zip")
+            assertTrue(outputZip.exists(), "Output should be in specified directory")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun canHandleCombinedFoundries() {
+        val outputDir = createTempDir("conllu_combined")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.marmot-malt.conllu").path
+            )
+            val exitCode = debug(args)
+            assertEquals(0, exitCode, "Combined foundry conversion should succeed")
+            
+            val zipEntries = extractZipFileList(outputZip)
+            assertTrue(zipEntries.any { it.contains("marmot/morpho.xml") }, 
+                "ZIP should contain marmot morpho.xml")
+            assertTrue(zipEntries.any { it.contains("malt/dependency.xml") }, 
+                "ZIP should contain malt dependency.xml")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun canOverrideFoundryName() {
+        val outputDir = createTempDir("conllu_override")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-F", "custom",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.spacy.conllu").path
+            )
+            val exitCode = debug(args)
+            assertEquals(0, exitCode, "Foundry override conversion should succeed")
+            
+            val zipEntries = extractZipFileList(outputZip)
+            assertTrue(zipEntries.any { it.contains("custom/morpho.xml") }, 
+                "ZIP should contain custom foundry morpho.xml")
+            assertFalse(zipEntries.any { it.contains("spacy/morpho.xml") }, 
+                "ZIP should not contain original spacy foundry")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun canConvertFromStdin() {
+        val outputDir = createTempDir("conllu_stdin")
+        try {
+            val outputZip = File(outputDir, "stdin_output.zip")
+            val inputFile = File(loadResource("wud24_sample.spacy.conllu").path)
+            
+            // Use KorapXmlTool directly with redirected stdin
+            val inputStream = inputFile.inputStream()
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path
+            )
+            
+            val originalIn = System.`in`
+            try {
+                System.setIn(inputStream)
+                val exitCode = debug(args)
+                assertEquals(0, exitCode, "Stdin conversion should succeed")
+                assertTrue(outputZip.exists(), "Output ZIP should be created from stdin")
+            } finally {
+                System.setIn(originalIn)
+                inputStream.close()
+            }
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun validatesRequiredTextId() {
+        val outputDir = createTempDir("conllu_validation")
+        try {
+            // Create invalid CoNLL-U without text_id
+            val invalidConllu = File(outputDir, "invalid.conllu")
+            invalidConllu.writeText("""
+                # foundry = test
+                # start_offsets = 0 5
+                # end_offsets = 4 10
+                1	Test	test	NOUN	NN	_	0	ROOT	_	_
+                
+            """.trimIndent())
+            
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                invalidConllu.path
+            )
+            val exitCode = debug(args)
+            assertTrue(exitCode != 0, "Should fail without text_id")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun handlesMultipleDocuments() {
+        val outputDir = createTempDir("conllu_multidoc")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.spacy.conllu").path
+            )
+            val exitCode = debug(args)
+            assertEquals(0, exitCode, "Multi-document conversion should succeed")
+            
+            val zipEntries = extractZipFileList(outputZip)
+            // The sample file has 3 documents: WUD24_I0083.95367, WUD24_K0086.98010, WUD24_Z0087.65594
+            assertTrue(zipEntries.any { it.contains("WUD24/I0083/95367") }, 
+                "Should contain first document")
+            assertTrue(zipEntries.any { it.contains("WUD24/K0086/98010") }, 
+                "Should contain second document")
+            assertTrue(zipEntries.any { it.contains("WUD24/Z0087/65594") }, 
+                "Should contain third document")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun createsValidKorapXmlStructure() {
+        val outputDir = createTempDir("conllu_xml_validation")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.spacy.conllu").path
+            )
+            val exitCode = debug(args)
+            assertEquals(0, exitCode)
+            
+            // Check ZIP contains expected files
+            val zipEntries = extractZipFileList(outputZip)
+            assertTrue(zipEntries.any { it.contains("WUD24/I0083/95367/spacy/morpho.xml") },
+                "ZIP should contain morpho.xml")
+            assertTrue(zipEntries.any { it.contains("WUD24/I0083/95367/spacy/dependency.xml") },
+                "ZIP should contain dependency.xml")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun morphoXmlContainsLexicalFeatures() {
+        val outputDir = createTempDir("conllu_morpho_features")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.spacy.conllu").path
+            )
+            debug(args)
+            
+            val morphoXml = extractFileFromZip(outputZip, "WUD24/I0083/95367/spacy/morpho.xml")
+            assertTrue(morphoXml.length > 100, "Morpho XML should have substantial content")
+            assertTrue(morphoXml.contains("lemma") && morphoXml.contains("upos"),
+                "Morpho XML should contain lemma and upos fields")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun dependencyXmlContainsDependencyRelations() {
+        val outputDir = createTempDir("conllu_dep_relations")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.spacy.conllu").path
+            )
+            debug(args)
+            
+            val depXml = extractFileFromZip(outputZip, "WUD24/I0083/95367/spacy/dependency.xml")
+            assertTrue(depXml.length > 100, "Dependency XML should have substantial content")
+            assertTrue(depXml.contains("deprel") || depXml.contains("label"),
+                "Dependency XML should contain dependency relations")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun xmlContainsCorrectDocumentId() {
+        val outputDir = createTempDir("conllu_docid")
+        try {
+            val outputZip = File(outputDir, "output.zip")
+            val args = arrayOf(
+                "-t", "zip",
+                "-o", outputZip.path,
+                loadResource("wud24_sample.spacy.conllu").path
+            )
+            debug(args)
+            
+            val morphoXml = extractFileFromZip(outputZip, "WUD24/I0083/95367/spacy/morpho.xml")
+            assertTrue(morphoXml.contains("WUD24_I0083.95367"),
+                "XML should contain correct document ID")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    private fun extractZipFileList(zipFile: File): List<String> {
+        val process = ProcessBuilder("unzip", "-l", zipFile.path)
+            .redirectOutput(ProcessBuilder.Redirect.PIPE)
+            .start()
+        val output = process.inputStream.bufferedReader().use { it.readText() }
+        process.waitFor()
+        return output.lines()
+    }
+
+    private fun extractFileFromZip(zipFile: File, filePath: String): String {
+        val process = ProcessBuilder("unzip", "-p", zipFile.path, filePath)
+            .redirectOutput(ProcessBuilder.Redirect.PIPE)
+            .redirectError(ProcessBuilder.Redirect.PIPE)
+            .start()
+        val content = process.inputStream.bufferedReader().use { it.readText() }
+        val exitCode = process.waitFor()
+        if (exitCode != 0) {
+            val error = process.errorStream.bufferedReader().use { it.readText() }
+            throw RuntimeException("Failed to extract $filePath from $zipFile: $error")
+        }
+        return content
+    }
+}
diff --git a/app/src/test/resources/wud24_sample.spacy.zip b/app/src/test/resources/wud24_sample.spacy.zip
index d65720a..8e88c44 100644
--- a/app/src/test/resources/wud24_sample.spacy.zip
+++ b/app/src/test/resources/wud24_sample.spacy.zip
Binary files differ