Process sequentially if only one thread
Change-Id: I4b6cd0c80d7d7e60ea8f2720fc80091e0550b1a4
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index 11b2061..5bf3fc5 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -25,6 +25,7 @@
import java.util.zip.ZipFile
import javax.xml.parsers.DocumentBuilder
import javax.xml.parsers.DocumentBuilderFactory
+import kotlin.math.max
import kotlin.math.min
import kotlin.system.exitProcess
@@ -207,29 +208,19 @@
}
}
waitForMorpho = zips.size > 1
- Arrays.stream(zips).forEach { zipFilePath ->
- executor.submit {
- processZipFile(
- (zipFilePath ?: "").toString(),
- getFoundryFromZipFileNames(zips)
- )
+
+ if (maxThreads > 1) {
+ LOGGER.info("Processing zip files in parallel with $maxThreads threads")
+ Arrays.stream(zips).parallel().forEach { zipFilePath ->
+ processZipFile((zipFilePath ?: "").toString(), getFoundryFromZipFileNames(zips))
+ }
+ } else {
+ LOGGER.info("Processing zip files sequentially")
+ Arrays.stream(zips).forEachOrdered { zipFilePath ->
+ processZipFileSequentially((zipFilePath ?: "").toString(), getFoundryFromZipFileNames(zips))
}
}
- executor.shutdown()
- while (!executor.isTerminated) {
- // Wait for all tasks to finish
- }
- texts.keys.sorted().parallelStream().forEach { docId ->
- if (!tokens.containsKey(docId)) {
- tokens[docId] = getTokenSpansFromMorho(morpho[docId]!!)
- }
- processText(
- docId,
- getFoundryFromZipFileName(fnames[docId]!!),
- true
- )
- }
if (annotationWorkerPool != null) {
LOGGER.info("closing worker pool")
annotationWorkerPool?.close()
@@ -263,19 +254,22 @@
return "base"
}
- private fun processZipFile(
- zipFilePath: String,
- foundry: String = "base",
-
- ) {
- ZipFile(zipFilePath).use { zipFile ->
- zipFile.stream().filter({ extractMetadataRegex.isNotEmpty() || !it.name.contains("header.xml") })
- //.sorted({ o1, o2 -> o1.name.compareTo(o2.name) })
- .parallel()
- .forEach { zipEntry ->
- processZipEntry(zipFile, foundry, zipEntry)
- }
- }
+ private fun processZipFile(zipFilePath: String, foundry: String = "base") {
+ ZipFile(zipFilePath).use { zipFile ->
+ zipFile.stream().filter({ extractMetadataRegex.isNotEmpty() || !it.name.contains("header.xml") })
+ .parallel().forEach { zipEntry ->
+ processZipEntry(zipFile, foundry, zipEntry)
+ }
+ }
+ }
+ private fun processZipFileSequentially(zipFilePath: String, foundry: String = "base") {
+ ZipFile(zipFilePath).use { zipFile ->
+ zipFile.stream().filter({ extractMetadataRegex.isNotEmpty() || !it.name.contains("header.xml") })
+ //.sorted({ o1, o2 -> o1.name.compareTo(o2.name) })
+ .forEachOrdered() { zipEntry ->
+ processZipEntry(zipFile, foundry, zipEntry)
+ }
+ }
}
fun processZipEntry(zipFile: ZipFile, foundry: String, zipEntry: java.util.zip.ZipEntry) {