Use archive-order plain streaming mode also for w2v output

Change-Id: I42a03c401c16b17922277024cb1663f655a692f6
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 498a763..1b973f8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@
 
 - Plain NOW export now opens ZIP input with `java.util.zip.ZipFile` in streaming mode instead of Apache Commons `ZipFile`, removing the multi-minute startup delay on very large archives with huge entry counts and allowing extraction to begin almost immediately
 - Plain NOW export startup and progress diagnostics now log ZIP open time and first-output timing more explicitly, making it easier to distinguish ZIP indexing overhead from actual extraction work
+- Plain Word2Vec export now uses the same archive-order streaming ZIP path as plain NOW output, including the faster `java.util.zip.ZipFile` opener for large archives with many entries
 
 ## [v3.3.0] - 2026-03-26
 
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 7051227..d929e85 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -972,12 +972,19 @@
             outputFormat == OutputFormat.WORD2VEC ||
             outputFormat == OutputFormat.NOW)
 
-    internal fun canStreamNowEntriesImmediately(): Boolean =
-        outputFormat == OutputFormat.NOW &&
+    internal fun canUseArchiveOrderTextStreaming(): Boolean =
+        (outputFormat == OutputFormat.NOW || outputFormat == OutputFormat.WORD2VEC) &&
             annotationWorkerPool == null &&
             taggerName == null &&
             parserName == null
 
+    private fun textStreamingModeLabel(): String =
+        when (outputFormat) {
+            OutputFormat.NOW -> "NOW"
+            OutputFormat.WORD2VEC -> "Word2Vec"
+            else -> outputFormat.name
+        }
+
     internal fun canUseStaxTextParsing(): Boolean =
         outputFormat == OutputFormat.CONLLU ||
             outputFormat == OutputFormat.WORD2VEC ||
@@ -1353,13 +1360,13 @@
                 Thread(r, "KrillWorker-${Thread.currentThread().threadId()}")
             }
             LOGGER.info("Initialized work-stealing scheduler with $maxThreads worker threads for Krill output")
-        } else if (canStreamNowEntriesImmediately()) {
+        } else if (canUseArchiveOrderTextStreaming()) {
             entryExecutor = java.util.concurrent.Executors.newFixedThreadPool(maxThreads) { r ->
-                Thread(r, "NowEntryWorker-${Thread.currentThread().threadId()}")
+                Thread(r, "TextStreamWorker-${Thread.currentThread().threadId()}")
             }
             val textParserMode = if (shouldParseDataXmlWithStax()) "StAX" else "DOM"
             LOGGER.info(
-                "Initialized NOW streaming mode: archive-order entries, parallel entry processing, " +
+                "Initialized ${textStreamingModeLabel()} streaming mode: archive-order entries, parallel entry processing, " +
                     "no text-ID scheduling, data.xml via $textParserMode"
             )
         } else {
@@ -1620,8 +1627,8 @@
         if (maxThreads > 1) {
             val foundry = getFoundryFromZipFileNames(zips)
             val parallelism = maxThreads.coerceAtLeast(1)
-            if (canStreamNowEntriesImmediately()) {
-                LOGGER.info("Processing zips in NOW streaming mode; zip parallelism=$parallelism; entry order=archive")
+            if (canUseArchiveOrderTextStreaming()) {
+                LOGGER.info("Processing zips in ${textStreamingModeLabel()} streaming mode; zip parallelism=$parallelism; entry order=archive")
             } else {
                 LOGGER.info("Processing zips with ordered queue; parallelism=$parallelism; entries ${if (sequentialInZip) "sequential" else "parallel"}")
             }
@@ -2222,7 +2229,7 @@
     private fun compareTextIds(a: String, b: String): Int =
         monthAwareSortKey(a).compareTo(monthAwareSortKey(b))
 
-    private fun shouldUseUnicodeExtraFieldsOnOpen(): Boolean = !canStreamNowEntriesImmediately()
+    private fun shouldUseUnicodeExtraFieldsOnOpen(): Boolean = !canUseArchiveOrderTextStreaming()
 
     private interface ReadableZipEntry {
         val name: String
@@ -2291,7 +2298,7 @@
         return "base"
     }
 
-    private fun useJavaZipForNow(): Boolean = canStreamNowEntriesImmediately()
+    private fun useJavaZipForTextStreaming(): Boolean = canUseArchiveOrderTextStreaming()
 
     private fun processZipFile(zipFilePath: String, foundry: String = "base") {
         val ord = zipOrdinals[zipFilePath] ?: 0
@@ -2415,7 +2422,7 @@
         LOGGER.fine("About to process ZIP entries: hasCorrespondingBaseZip=${zipFilePath.hasCorrespondingBaseZip()}")
         if (zipFilePath.hasCorrespondingBaseZip()) {
             val baseZip = zipFilePath.correspondingBaseZip()!!
-            val relatedZips = if (canStreamNowEntriesImmediately() && !lemmaOnly) {
+            val relatedZips = if (canUseArchiveOrderTextStreaming() && !lemmaOnly) {
                 arrayOf(baseZip, zipFilePath)
             } else {
                 arrayOf(zipFilePath, baseZip)
@@ -2428,9 +2435,9 @@
                 } else {
                     foundry  // Keep original foundry for non-krill formats
                 }
-                if (useJavaZipForNow()) {
+                if (useJavaZipForTextStreaming()) {
                     openJavaZipFile(zip).use { zipFile ->
-                        LOGGER.info("Using NOW streaming mode for $zip: archive-order entries, no text-ID sorting")
+                        LOGGER.info("Using ${textStreamingModeLabel()} streaming mode for $zip: archive-order entries, no text-ID sorting")
                         processZipEntriesStreaming(zipFile, zip, zipFoundry, true)
                     }
                 } else {
@@ -2443,9 +2450,9 @@
             LOGGER.fine("Opening ZipFile for processing: $zipFilePath")
             try {
                 // If no corresponding base ZIP exists, this IS the base ZIP
-                if (useJavaZipForNow()) {
+                if (useJavaZipForTextStreaming()) {
                     openJavaZipFile(zipFilePath).use { zipFile ->
-                        LOGGER.info("Using NOW streaming mode for $zipFilePath: archive-order entries, no text-ID sorting")
+                        LOGGER.info("Using ${textStreamingModeLabel()} streaming mode for $zipFilePath: archive-order entries, no text-ID sorting")
                         processZipEntriesStreaming(zipFile, zipFilePath, foundry, false)
                     }
                 } else {
@@ -2482,7 +2489,7 @@
         if (zipFilePath.hasCorrespondingBaseZip()) {
             // Process the two related zips strictly sequentially to limit memory growth
             val baseZip = zipFilePath.correspondingBaseZip()!!
-            val zips = if (canStreamNowEntriesImmediately() && !lemmaOnly) {
+            val zips = if (canUseArchiveOrderTextStreaming() && !lemmaOnly) {
                 arrayOf(baseZip, zipFilePath)
             } else {
                 arrayOf(zipFilePath, baseZip)
@@ -2494,9 +2501,9 @@
                 } else {
                     foundry  // Keep original foundry for non-krill formats
                 }
-                if (useJavaZipForNow()) {
+                if (useJavaZipForTextStreaming()) {
                     openJavaZipFile(zip).use { zipFile ->
-                        LOGGER.info("Using NOW streaming mode for $zip: archive-order entries, no text-ID sorting")
+                        LOGGER.info("Using ${textStreamingModeLabel()} streaming mode for $zip: archive-order entries, no text-ID sorting")
                         processZipEntriesStreaming(zipFile, zip, zipFoundry, true)
                     }
                 } else {
@@ -2512,9 +2519,9 @@
                 }
             }
         } else {
-            if (useJavaZipForNow()) {
+            if (useJavaZipForTextStreaming()) {
                 openJavaZipFile(zipFilePath).use { zipFile ->
-                    LOGGER.info("Using NOW streaming mode for $zipFilePath: archive-order entries, no text-ID sorting")
+                    LOGGER.info("Using ${textStreamingModeLabel()} streaming mode for $zipFilePath: archive-order entries, no text-ID sorting")
                     processZipEntriesStreaming(zipFile, zipFilePath, foundry, false)
                 }
             } else {
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/GeneralFeaturesTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/GeneralFeaturesTest.kt
index 6668220..61c4ebe 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/GeneralFeaturesTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/GeneralFeaturesTest.kt
@@ -137,11 +137,11 @@
     }
 
     @Test
-    fun plainNowOutputCanStreamWithoutSorting() {
+    fun plainNowOutputCanUseArchiveOrderStreaming() {
         val tool = KorapXmlTool()
         tool.outputFormat = OutputFormat.NOW
 
-        assertTrue(tool.canStreamNowEntriesImmediately())
+        assertTrue(tool.canUseArchiveOrderTextStreaming())
         assertTrue(tool.canUseStaxTextParsing())
         assertTrue(!tool.shouldParseDataXmlWithStax())
         tool.useStaxTextParser = true
@@ -149,11 +149,20 @@
     }
 
     @Test
-    fun nonNowOutputKeepsOrderedPipeline() {
+    fun plainWord2VecOutputCanUseArchiveOrderStreaming() {
+        val tool = KorapXmlTool()
+        tool.outputFormat = OutputFormat.WORD2VEC
+
+        assertTrue(tool.canUseArchiveOrderTextStreaming())
+        assertTrue(tool.canUseStaxTextParsing())
+    }
+
+    @Test
+    fun conlluOutputKeepsOrderedPipeline() {
         val tool = KorapXmlTool()
         tool.outputFormat = OutputFormat.CONLLU
 
-        assertTrue(!tool.canStreamNowEntriesImmediately())
+        assertTrue(!tool.canUseArchiveOrderTextStreaming())
         assertTrue(tool.canUseStaxTextParsing())
     }