Add tests for K2K_PUBLISHER_STRING,K2K_TRANSLATOR_TEXT

Change-Id: Ia3f4da38dcb59700f00058761c04b62d8b2848e5
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KrillJsonGeneratorTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KrillJsonGeneratorTest.kt
index 76b9b92..47ad97c 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KrillJsonGeneratorTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KrillJsonGeneratorTest.kt
@@ -9,6 +9,7 @@
 import java.net.URL
 import kotlin.test.Test
 import kotlin.test.assertEquals
+import kotlin.test.assertFalse
 import kotlin.test.assertTrue
 
 /**
@@ -470,4 +471,201 @@
             extractDir.deleteRecursively()
         }
     }
+
+    @Test
+    fun testPublisherAsStringEnvVar() {
+        val baseZip = loadResource("wdf19.zip").path
+        
+        // First test WITHOUT the environment variable (default: type:attachement)
+        val defaultTar = ensureKrillTar("wdf19_default_publisher", "wdf19.krill.tar") { outputDir ->
+            arrayOf(
+                "-t", "krill",
+                "-D", outputDir.path,
+                baseZip
+            )
+        }
+
+        val defaultJsons = readKrillJson(defaultTar)
+        assertTrue(defaultJsons.isNotEmpty(), "Should have generated Krill JSON files")
+        
+        // Check that publisher is type:attachement by default
+        var foundPublisher = false
+        defaultJsons.values.forEach { json ->
+            val publisherMatch = Regex(""""\s*key"\s*:\s*"publisher".*?"type"\s*:\s*"type:([^"]+)"""").find(json)
+            if (publisherMatch != null) {
+                foundPublisher = true
+                val typeValue = publisherMatch.groupValues[1]
+                assertEquals("attachement", typeValue, "Publisher should be type:attachement by default")
+            }
+        }
+        assertTrue(foundPublisher, "Should find publisher field in at least one document")
+
+        // Now test WITH K2K_PUBLISHER_STRING environment variable
+        val outputDir = File.createTempFile("publisher_string_test", "").apply {
+            delete()
+            mkdirs()
+        }
+
+        try {
+            val process = ProcessBuilder(
+                "java", "-jar", "app/build/libs/korapxmltool.jar",
+                "-t", "krill",
+                "-D", outputDir.path,
+                baseZip
+            )
+                .directory(File("/home/kupietz/KorAP/korapxmltool"))
+                .apply {
+                    environment()["K2K_PUBLISHER_STRING"] = "1"
+                }
+                .redirectErrorStream(true)
+                .start()
+
+            val exitCode = process.waitFor()
+            assertEquals(0, exitCode, "Krill conversion with K2K_PUBLISHER_STRING should succeed")
+
+            val tar = File(outputDir, "wdf19.krill.tar")
+            assertTrue(tar.exists(), "Expected wdf19.krill.tar")
+
+            val envJsons = readKrillJson(tar)
+            assertTrue(envJsons.isNotEmpty(), "Should have generated Krill JSON files with env var")
+
+            // Check that publisher is now type:string
+            var foundPublisherWithEnv = false
+            envJsons.values.forEach { json ->
+                val publisherMatch = Regex(""""\s*key"\s*:\s*"publisher".*?"type"\s*:\s*"type:([^"]+)"""").find(json)
+                if (publisherMatch != null) {
+                    foundPublisherWithEnv = true
+                    val typeValue = publisherMatch.groupValues[1]
+                    assertEquals("string", typeValue, "Publisher should be type:string when K2K_PUBLISHER_STRING is set")
+                }
+            }
+            assertTrue(foundPublisherWithEnv, "Should find publisher field with env var set")
+        } finally {
+            outputDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun testTranslatorAsTextEnvVar() {
+        // Create a temporary test ZIP with translator field
+        val testDir = File.createTempFile("translator_test_corpus", "").apply {
+            delete()
+            mkdirs()
+        }
+
+        try {
+            // Create a simple test corpus with translator metadata
+            val textDir = File(testDir, "TEST/TEST.00001")
+            textDir.mkdirs()
+
+            File(textDir, "header.xml").writeText("""
+                <?xml version="1.0" encoding="UTF-8"?>
+                <idsHeader type="text" TEIform="teiHeader">
+                    <fileDesc>
+                        <titleStmt>
+                            <t.title>Test Document with Translator</t.title>
+                            <t.author>Test Author</t.author>
+                            <translator>Test Translator</translator>
+                        </titleStmt>
+                    </fileDesc>
+                </idsHeader>
+            """.trimIndent())
+
+            File(textDir, "data.xml").writeText("""
+                <?xml version="1.0" encoding="UTF-8"?>
+                <raw_text><text>Test content.</text></raw_text>
+            """.trimIndent())
+
+            // Create ZIP
+            val zipFile = File(testDir, "test_translator.zip")
+            val zipProcess = ProcessBuilder("zip", "-r", zipFile.path, "TEST")
+                .directory(testDir)
+                .redirectErrorStream(true)
+                .start()
+            assertEquals(0, zipProcess.waitFor(), "ZIP creation should succeed")
+
+            // Test WITHOUT K2K_TRANSLATOR_TEXT (default: type:attachement)
+            val defaultOutputDir = File.createTempFile("translator_default", "").apply {
+                delete()
+                mkdirs()
+            }
+
+            try {
+                val defaultProcess = ProcessBuilder(
+                    "java", "-jar", "app/build/libs/korapxmltool.jar",
+                    "-t", "krill",
+                    "-D", defaultOutputDir.path,
+                    zipFile.path
+                )
+                    .directory(File("/home/kupietz/KorAP/korapxmltool"))
+                    .redirectErrorStream(true)
+                    .start()
+
+                assertEquals(0, defaultProcess.waitFor(), "Default conversion should succeed")
+
+                val defaultTar = File(defaultOutputDir, "test_translator.krill.tar")
+                assertTrue(defaultTar.exists(), "Expected test_translator.krill.tar")
+
+                val defaultJsons = readKrillJson(defaultTar)
+                assertTrue(defaultJsons.isNotEmpty(), "Should have generated JSON")
+
+                var foundTranslator = false
+                defaultJsons.values.forEach { json ->
+                    val translatorMatch = Regex(""""\s*key"\s*:\s*"translator".*?"type"\s*:\s*"type:([^"]+)"""").find(json)
+                    if (translatorMatch != null) {
+                        foundTranslator = true
+                        val typeValue = translatorMatch.groupValues[1]
+                        assertEquals("attachement", typeValue, "Translator should be type:attachement by default")
+                    }
+                }
+                assertTrue(foundTranslator, "Should find translator field in generated JSON")
+            } finally {
+                defaultOutputDir.deleteRecursively()
+            }
+
+            // Test WITH K2K_TRANSLATOR_TEXT (type:text)
+            val envOutputDir = File.createTempFile("translator_text", "").apply {
+                delete()
+                mkdirs()
+            }
+
+            try {
+                val envProcess = ProcessBuilder(
+                    "java", "-jar", "app/build/libs/korapxmltool.jar",
+                    "-t", "krill",
+                    "-D", envOutputDir.path,
+                    zipFile.path
+                )
+                    .directory(File("/home/kupietz/KorAP/korapxmltool"))
+                    .apply {
+                        environment()["K2K_TRANSLATOR_TEXT"] = "1"
+                    }
+                    .redirectErrorStream(true)
+                    .start()
+
+                assertEquals(0, envProcess.waitFor(), "Conversion with K2K_TRANSLATOR_TEXT should succeed")
+
+                val envTar = File(envOutputDir, "test_translator.krill.tar")
+                assertTrue(envTar.exists(), "Expected test_translator.krill.tar with env var")
+
+                val envJsons = readKrillJson(envTar)
+                assertTrue(envJsons.isNotEmpty(), "Should have generated JSON with env var")
+
+                var foundTranslatorWithEnv = false
+                envJsons.values.forEach { json ->
+                    val translatorMatch = Regex(""""\s*key"\s*:\s*"translator".*?"type"\s*:\s*"type:([^"]+)"""").find(json)
+                    if (translatorMatch != null) {
+                        foundTranslatorWithEnv = true
+                        val typeValue = translatorMatch.groupValues[1]
+                        assertEquals("text", typeValue, "Translator should be type:text when K2K_TRANSLATOR_TEXT is set")
+                    }
+                }
+                assertTrue(foundTranslatorWithEnv, "Should find translator field with env var set")
+            } finally {
+                envOutputDir.deleteRecursively()
+            }
+        } finally {
+            testDir.deleteRecursively()
+        }
+    }
 }