Fix missing header metadata
Change-Id: I1185b86c2e5cbb59d9c397fa055a30343bb5d63e
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 5d6292f..671e70f 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -1446,7 +1446,7 @@
try {
val zipFile = ApacheZipFile(File(zipPath))
val entries = zipFile.entries.toList()
- .filter { !it.isDirectory && it.name.matches(Regex(".*(data|tokens|structure|morpho|dependency|sentences|constituency)\\.xml$")) }
+ .filter { !it.isDirectory && it.name.matches(Regex(".*(data|tokens|structure|morpho|dependency|sentences|constituency|header)\\.xml$")) }
val entriesByTextId = entries.groupBy { getTextIdFromPath(it.name) }
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KrillJsonGeneratorTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KrillJsonGeneratorTest.kt
index e678fd7..28f1db5 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KrillJsonGeneratorTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KrillJsonGeneratorTest.kt
@@ -396,4 +396,41 @@
}
}
}
+
+ @Test
+ fun testMetadataPresence() {
+ val baseZip = loadResource("wud24_sample.zip").path
+
+ val generatedTar = ensureKrillTar("metadata_test") { outputDir ->
+ arrayOf(
+ "-t", "krill",
+ "-D", outputDir.path,
+ baseZip
+ )
+ }
+
+ val kotlinJsons = readKrillJson(generatedTar)
+ assertTrue(kotlinJsons.isNotEmpty(), "Should have generated Krill JSON files")
+
+ // Test that essential metadata fields are present
+ kotlinJsons.forEach { (textId, json) ->
+ // Check for fields structure
+ assertTrue(json.contains("\"fields\""), "Text $textId should have fields metadata")
+
+ // Check for common metadata fields that should be in header.xml
+ val metadataFields = listOf(
+ "\"title\"", "\"author\"", "\"pubPlace\"", "\"publisher\"",
+ "\"availability\"", "\"textType\"", "\"textDomain\""
+ )
+
+ var hasMetadata = false
+ metadataFields.forEach { field ->
+ if (json.contains(field)) {
+ hasMetadata = true
+ }
+ }
+ assertTrue(hasMetadata, "Text $textId should have at least some metadata fields from header.xml")
+ }
+ }
+
}