Fix task scheduler imbalance for krill output
Change-Id: I4d59235b3d5602d67ed325f1f5335b967468852c
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 83bc07e..a919d47 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -1081,6 +1081,13 @@
return TextIdSortKey(prefix, monthRank, mid, tailNumber, textId)
}
+ private fun compareQueuedTasksForScheduling(left: PrioritizedTask?, right: PrioritizedTask?): Int {
+ if (left == null && right == null) return 0
+ if (left == null) return 1
+ if (right == null) return -1
+ return left.compareTo(right)
+ }
+
val texts: ConcurrentHashMap<String, NonBmpString> = ConcurrentHashMap()
val sentences: ConcurrentHashMap<String, Array<Span>> = ConcurrentHashMap()
val tokens: ConcurrentHashMap<String, Array<Span>> = ConcurrentHashMap()
@@ -2495,10 +2502,13 @@
// Throttle submission if this foundry is getting too far ahead
while (workStealingSchedulerActive && foundrySubmissionComplete.isNotEmpty()) {
// Find the minimum NEXT text ID across all foundries (either in queue or submitted)
- val minNextTextId = foundryTaskQueues.entries
+ val minQueuedTask = foundryTaskQueues.entries
.filter { it.value.isNotEmpty() }
- .minOfOrNull { it.value.peek()?.textId ?: "~~~~~" }
- ?: foundryWatermarks.values.minOrNull()
+ .minWithOrNull { left, right ->
+ compareQueuedTasksForScheduling(left.value.peek(), right.value.peek())
+ }?.value?.peek()
+ val minNextTextId = minQueuedTask?.textId
+ ?: foundryWatermarks.values.minWithOrNull(this::compareTextIds)
?: textId
// Calculate position difference
@@ -2607,9 +2617,8 @@
val foundryToProcess = synchronized(foundryTaskQueues) {
foundryTaskQueues.entries
.filter { entry -> entry.value.isNotEmpty() }
- .minByOrNull { entry ->
- // Use the NEXT text ID in queue (peek), not the completed watermark
- entry.value.peek()?.textId ?: "~~~~~"
+ .minWithOrNull { left, right ->
+ compareQueuedTasksForScheduling(left.value.peek(), right.value.peek())
}?.key
}
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/GeneralFeaturesTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/GeneralFeaturesTest.kt
index ad43606..6b15dec 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/GeneralFeaturesTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/GeneralFeaturesTest.kt
@@ -105,9 +105,42 @@
assertEquals(expected, sorted, "Mixed month IDs should follow calendar order")
}
+ @Test
+ fun queueHeadSchedulerUsesMonthAwareTextOrder() {
+ val tool = KorapXmlTool()
+ val left = prioritizedTask(tool, "spacy", "DNB17_DEZ.81042")
+ val right = prioritizedTask(tool, "base", "DNB17_OKT.12345")
+
+ val scheduledFirst = tool.compareQueuedTasksForScheduling(left, right)
+ assertTrue(
+ scheduledFirst > 0,
+ "Queue scheduling must use compareTextIds semantics rather than raw string order"
+ )
+ }
+
private fun KorapXmlTool.compareTextIds(a: String, b: String): Int {
val m = KorapXmlTool::class.java.getDeclaredMethod("compareTextIds", String::class.java, String::class.java)
m.isAccessible = true
return m.invoke(this, a, b) as Int
}
+
+ private fun KorapXmlTool.compareQueuedTasksForScheduling(left: Any, right: Any): Int {
+ val taskClass = Class.forName("de.ids_mannheim.korapxmltools.KorapXmlTool\$PrioritizedTask")
+ val m = KorapXmlTool::class.java.getDeclaredMethod("compareQueuedTasksForScheduling", taskClass, taskClass)
+ m.isAccessible = true
+ return m.invoke(this, left, right) as Int
+ }
+
+ private fun prioritizedTask(tool: KorapXmlTool, foundry: String, textId: String): Any {
+ val taskClass = Class.forName("de.ids_mannheim.korapxmltools.KorapXmlTool\$PrioritizedTask")
+ val ctor = taskClass.getDeclaredConstructor(
+ KorapXmlTool::class.java,
+ String::class.java,
+ String::class.java,
+ Runnable::class.java,
+ Long::class.javaPrimitiveType
+ )
+ ctor.isAccessible = true
+ return ctor.newInstance(tool, foundry, textId, Runnable { }, 0L)
+ }
}