Fix filename comment in korapxml2conllu
Change-Id: Ib3657a7550285a50e11d524722e4aa4e133fddd2
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 49a9dd0..5b30088 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -2670,6 +2670,14 @@
// Formatter-based output methods using modular formatters
private fun formatConlluOutput(foundry: String, docId: String): StringBuilder {
+ // For CoNLL-U output, show foundry-specific file path instead of base tokens.xml
+ val foundryFileName = if (foundry == "base") {
+ fnames[docId] // Keep base/tokens.xml for base foundry
+ } else {
+ fnames[docId]?.replace("/base/tokens.xml", "/$foundry/morpho.xml")
+ ?: "$foundry/morpho.xml"
+ }
+
val context = de.ids_mannheim.korapxmltools.formatters.OutputContext(
docId = docId,
foundry = foundry,
@@ -2679,7 +2687,7 @@
morpho = morpho[docId],
metadata = metadata[docId],
extraFeatures = extraFeatures[docId],
- fileName = fnames[docId],
+ fileName = foundryFileName,
useLemma = useLemma,
extractMetadataRegex = extractMetadataRegex,
extractAttributesRegex = extractAttributesRegex,
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/ConlluConversionTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/ConlluConversionTest.kt
index c95e6ee..abe725a 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/ConlluConversionTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/ConlluConversionTest.kt
@@ -9,6 +9,7 @@
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFalse
+import kotlin.test.assertNotNull
import kotlin.test.assertTrue
/**
@@ -385,4 +386,40 @@
}
return content
}
+
+ @Test
+ fun testFoundrySpecificFilenameInConlluOutput() {
+ // Regression test for issue where CoNLL-U output showed base/tokens.xml
+ // instead of foundry-specific file (e.g., spacy/morpho.xml)
+ val spacyZip = loadResource("wud24_sample.spacy.zip")
+ val args = arrayOf("-t", "conllu", spacyZip.path)
+
+ debug(args)
+
+ val output = outContent.toString()
+ val lines = output.split("\n")
+
+ // Check that foundry is correctly identified
+ val foundryLine = lines.find { it.startsWith("# foundry = ") }
+ assertNotNull(foundryLine, "Should have foundry comment line")
+ assertEquals("# foundry = spacy", foundryLine)
+
+ // Check that filename points to spacy/morpho.xml, not base/tokens.xml
+ val filenameLine = lines.find { it.startsWith("# filename = ") }
+ assertNotNull(filenameLine, "Should have filename comment line")
+ assertTrue(
+ filenameLine!!.contains("/spacy/morpho.xml"),
+ "Filename should point to spacy/morpho.xml, but was: $filenameLine"
+ )
+ assertFalse(
+ filenameLine.contains("/base/tokens.xml"),
+ "Filename should not point to base/tokens.xml, but was: $filenameLine"
+ )
+
+ // Verify the specific expected pattern for spacy foundry
+ assertTrue(
+ filenameLine.matches(Regex("# filename = .*/spacy/morpho\\.xml")),
+ "Filename should match pattern '*/spacy/morpho.xml', but was: $filenameLine"
+ )
+ }
}