Process zips ordered and sequentially
Change-Id: I287837a2dfa18b967ea3d068aa968d198a16f8f8
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index f3a95e4..5886116 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -237,7 +237,11 @@
) {
try {
ZipFile(zipFilePath).use { zipFile ->
- zipFile.stream().parallel().forEach { zipEntry ->
+ zipFile.stream().filter({ !it.name.contains("header.xml") })
+ //.sorted({ o1, o2 -> o1.name.compareTo(o2.name) })
+ .forEachOrdered { zipEntry ->
+ LOGGER.info("Processing ${zipEntry.name} in thread ${Thread.currentThread().id}")
+
try {
if (zipEntry.name.matches(Regex(".*(data|tokens|structure|morpho)\\.xml$"))) {
val inputStream: InputStream = zipFile.getInputStream(zipEntry)
@@ -247,13 +251,13 @@
dBuilder.parse(InputSource(InputStreamReader(inputStream, "UTF-8")))
} catch (e: SAXParseException) {
LOGGER.warning("Error parsing file: " + zipEntry.name + " " + e.message)
- return@forEach
+ return@forEachOrdered
}
doc.documentElement.normalize()
val docId: String = doc.documentElement.getAttribute("docid")
if (siglePattern != null && !Regex(siglePattern!!).containsMatchIn(docId)) {
- return@forEach
+ return@forEachOrdered
}
// LOGGER.info("Processing file: " + zipEntry.getName())
val fileName = zipEntry.name.replace(Regex(".*?/([^/]+\\.xml)$"), "$1")