Add progressbar and ETA
Change-Id: Ie5efe67ad5ec976b5332a54be037db9ee0ffe06a
diff --git a/app/build.gradle b/app/build.gradle
index 701a18a..f5acf7a 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -45,6 +45,7 @@
implementation 'org.slf4j:slf4j-simple:2.0.17'
implementation 'org.apache.ant:ant:1.10.15'
implementation 'org.apache.commons:commons-compress:1.28.0'
+ implementation 'me.tongfei:progressbar:0.10.1'
}
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index eb9687f..15a654a 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -31,6 +31,11 @@
import java.util.zip.ZipEntry
import java.util.zip.ZipFile
+import me.tongfei.progressbar.ProgressBar
+import me.tongfei.progressbar.ProgressBarBuilder
+import me.tongfei.progressbar.ProgressBarStyle
+import java.time.LocalDateTime
+import java.time.format.DateTimeFormatter
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream
import javax.xml.parsers.DocumentBuilder
import javax.xml.parsers.DocumentBuilderFactory
@@ -319,6 +324,9 @@
private val processedDocs = java.util.concurrent.atomic.AtomicInteger(0)
private val docsSentToAnnotation = java.util.concurrent.atomic.AtomicInteger(0)
private val docsWrittenToZip = java.util.concurrent.atomic.AtomicInteger(0)
+ private val totalDocsInInput = java.util.concurrent.atomic.AtomicInteger(0) // Track total documents for progress
+ private val annotationStartTime = java.util.concurrent.atomic.AtomicLong(0) // Track when annotation started
+ private var progressBar: ProgressBar? = null
var taggerToolBridges: ConcurrentHashMap<Long, TaggerToolBridge?> = ConcurrentHashMap()
var parserToolBridges: ConcurrentHashMap<Long, ParserToolBridge?> = ConcurrentHashMap()
@@ -485,6 +493,9 @@
}
}
+ // Close progress bar
+ progressBar?.close()
+
// Check if all documents were written
val sent = docsSentToAnnotation.get()
val written = docsWrittenToZip.get()
@@ -716,14 +727,37 @@
private fun processZipEntriesWithPool(zipFile: ZipFile, foundry: String, waitForMorpho: Boolean) {
// Collect entries first to avoid lazy evaluation surprises, filter header.xml unless metadata extraction is requested
val entries: MutableList<ZipEntry> = ArrayList()
+ var documentCount = 0
val enumEntries = zipFile.entries()
while (enumEntries.hasMoreElements()) {
val e = enumEntries.nextElement()
if (extractMetadataRegex.isEmpty() && e.name.contains("header.xml")) continue
entries.add(e)
+ // Count data.xml files as documents for progress tracking
+ if (e.name.contains("data.xml")) {
+ documentCount++
+ }
}
if (entries.isEmpty()) return
+ // Update total document count and start timer if this is the first ZIP with external annotation
+ if (annotationWorkerPool != null && documentCount > 0) {
+ val newTotal = totalDocsInInput.addAndGet(documentCount)
+ if (annotationStartTime.get() == 0L) {
+ annotationStartTime.set(System.currentTimeMillis())
+ LOGGER.info("Starting annotation of $newTotal document(s)")
+
+ // Initialize progress bar for external annotation with ZIP output
+ progressBar = ProgressBarBuilder()
+ .setTaskName("Annotating")
+ .setInitialMax(newTotal.toLong())
+ .setStyle(ProgressBarStyle.ASCII)
+ .setUpdateIntervalMillis(500) // Update every 500ms
+ .showSpeed()
+ .build()
+ }
+ }
+
// If only one thread requested, do sequential to avoid pool overhead
if (maxThreads <= 1) {
entries.forEach { entry -> processZipEntry(zipFile, foundry, entry, waitForMorpho) }
@@ -1733,7 +1767,32 @@
morphoZipOutputStream!!.closeArchiveEntry()
}
LOGGER.fine("Successfully wrote morpho.xml for $docId")
- docsWrittenToZip.incrementAndGet()
+ val written = docsWrittenToZip.incrementAndGet()
+
+ // Update progress bar
+ progressBar?.step()
+
+ // Show progress with ETA at INFO level
+ if (annotationWorkerPool != null && totalDocsInInput.get() > 0) {
+ val total = totalDocsInInput.get()
+ val percent = (written * 100.0) / total
+ val elapsed = (System.currentTimeMillis() - annotationStartTime.get()) / 1000.0
+ val docsPerSec = if (elapsed > 0) written / elapsed else 0.0
+ val remaining = total - written
+ val etaSec = if (docsPerSec > 0) remaining / docsPerSec else 0.0
+
+ // Calculate estimated finish time
+ val finishTime = LocalDateTime.now().plusSeconds(etaSec.toLong())
+ val timeFormatter = DateTimeFormatter.ofPattern("HH:mm:ss")
+
+ if (written % 10 == 0 || written == total) {
+ val etaMin = (etaSec / 60).toInt()
+ val etaSec2 = (etaSec % 60).toInt()
+ LOGGER.info(String.format(Locale.ROOT,
+ "Progress: %d/%d (%.1f%%), %.1f docs/s, ETA %02d:%02d, finish ~%s",
+ written, total, percent, docsPerSec, etaMin, etaSec2, finishTime.format(timeFormatter)))
+ }
+ }
} catch (e: Exception) {
LOGGER.severe("ERROR generating/writing morpho.xml: ${e.message}")
e.printStackTrace()