Fix progressbar for w2v/now output with -o and without -q
Resolves #15
Change-Id: Iaa558b157b52660d062c8fb555ea283ef79f4401
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 9a789a7..d9113b1 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -1348,6 +1348,36 @@
}
}
+ // For text output formats with file output, initialize progress bar based on total zip size
+ // This avoids the overhead of pre-scanning all zips to count documents
+ if (outputFile != null && !quiet && (outputFormat == OutputFormat.CONLLU ||
+ outputFormat == OutputFormat.WORD2VEC ||
+ outputFormat == OutputFormat.NOW)) {
+ val totalBytes = zipSizes.values.sum()
+ if (totalBytes > 0) {
+ val taskName = when (outputFormat) {
+ OutputFormat.CONLLU -> "Converting to CoNLL-U"
+ OutputFormat.WORD2VEC -> "Extracting Word2Vec"
+ OutputFormat.NOW -> "Extracting NOW format"
+ else -> "Processing"
+ }
+
+ // Initialize progress bar with total MB (convert bytes to MB, keep as double for precision)
+ // We'll update it as each zip is completed using processedZipBytes
+ val totalMB = totalBytes / (1024.0 * 1024.0)
+ progressBar = ProgressBarBuilder()
+ .setTaskName(taskName)
+ .setInitialMax((totalMB * 100).toLong()) // Multiply by 100 to preserve 2 decimal places
+ .setStyle(ProgressBarStyle.COLORFUL_UNICODE_BAR)
+ .setUpdateIntervalMillis(500)
+ .showSpeed()
+ .setUnit(" MB", 100) // Divide by 100 when displaying
+ .build()
+
+ LOGGER.info("Initialized progress tracking for ${zips.size} zip(s), total size: ${humanBytes(totalBytes)}")
+ }
+ }
+
if (maxThreads > 1) {
val foundry = getFoundryFromZipFileNames(zips)
val parallelism = maxThreads.coerceAtLeast(1)
@@ -2102,9 +2132,18 @@
val etaStr = if (etaSeconds >= 0) formatDuration(etaSeconds) else "unknown"
LOGGER.info(
"Finished zip ${if (ord>0) ord else "?"}/$totalZips: ${zipFilePath} " +
- "(${humanBytes(size)}). Progress: ${String.format(Locale.ROOT, "%.1f", pct)}%%, " +
+ "(${humanBytes(size)}). Progress: ${String.format(Locale.ROOT, "%.1f", pct)}%, " +
"ETA ${etaStr} at ${humanSpeed}"
)
+
+ // Update progress bar for text output formats (size-based progress in MB)
+ if (!quiet && progressBar != null &&
+ (outputFormat == OutputFormat.CONLLU ||
+ outputFormat == OutputFormat.WORD2VEC ||
+ outputFormat == OutputFormat.NOW)) {
+ val doneMB = done / (1024.0 * 1024.0)
+ progressBar?.stepTo((doneMB * 100).toLong()) // Multiply by 100 to match initialization
+ }
} catch (e: Exception) {
LOGGER.fine("Failed to log zip progress for $zipFilePath: ${e.message}")
}
@@ -2155,15 +2194,18 @@
documentCount = entries.count { it.name.contains("tokens.xml") }
}
- // Update total document count and start timer if this is the first ZIP with external annotation
- // Initialize progress bar either for external annotation (-A) or internal tagging (-t)
- if ((annotationWorkerPool != null || taggerName != null) && documentCount > 0) {
- val newTotal = totalDocsInInput.addAndGet(documentCount)
+ // Update total document count and start timer for external annotation or internal tagging
+ // (Text output formats use size-based progress initialized upfront)
+ val shouldShowProgress = (annotationWorkerPool != null || taggerName != null)
+
+ if (shouldShowProgress && documentCount > 0) {
+ // Only for annotation/tagging scenarios
if (annotationStartTime.get() == 0L) {
+ val newTotal = totalDocsInInput.addAndGet(documentCount)
annotationStartTime.set(System.currentTimeMillis())
LOGGER.info("Starting annotation of $newTotal document(s)")
if (!quiet) {
- // Initialize progress bar for external annotation with ZIP output
+ // Initialize progress bar for annotation
progressBar = ProgressBarBuilder()
.setTaskName(targetZipFileName ?: "Annotating")
.setInitialMax(newTotal.toLong())
@@ -2174,7 +2216,7 @@
}
} else if (!quiet) {
// Increase the total as we discover more documents in later zips
- progressBar?.maxHint(newTotal.toLong())
+ progressBar?.maxHint(totalDocsInInput.addAndGet(documentCount).toLong())
}
}
@@ -2926,6 +2968,8 @@
synchronized(System.out) {
writeOutput(output.toString())
}
+ // Note: For text output formats, progress is now tracked by zip size in logZipProgress,
+ // not by individual documents, so we don't step the progress bar here
// Release internal char[] early
output.setLength(0)
} else {