Overhaul and standardize command line options

Change-Id: I976da707c29b0bc9aac241398f834ef7198d0482
diff --git a/Readme.md b/Readme.md
index 3fce1e8..cc7ae27 100644
--- a/Readme.md
+++ b/Readme.md
@@ -84,15 +84,14 @@
 
 - `--lemma-only`: For `-f w2v` and `-f now`, skip loading `data.xml` and output only lemmas from `morpho.xml`. This reduces memory and speeds up throughput.
 - `--sequential`: Process entries inside each zip sequentially (zips can still run in parallel). Recommended for `w2v`/`now` to keep locality and lower memory.
-- `--zip-parallelism N`: Limit how many zips are processed concurrently (defaults to `--threads`). Helps avoid disk thrash and native inflater pressure.
 - `--exclude-zip-glob GLOB` (repeatable): Skip zip basenames that match the glob (e.g., `--exclude-zip-glob 'w?d24.tree_tagger.zip'`).
 
 Example for large NOW export with progress and exclusions:
 
 ```
 KORAPXMLTOOL_XMX=64g KORAPXMLTOOL_MODELS_PATH=/data/models KORAPXMLTOOL_JAVA_OPTS="-XX:+UseG1GC -Djdk.util.zip.disableMemoryMapping=true -Djdk.util.zip.reuseInflater=true" \
-     ./build/bin/korapxmltool -l info --threads 100 --zip-parallelism 8 \
-     --lemma-only --sequential -f now \
+     ./build/bin/korapxmltool -l info -j 100 \
+     --lemma-only --sequential -t now \
      --exclude-zip-glob 'w?d24.tree_tagger.zip' \
      /vol/corpora/DeReKo/current/KorAP/zip/*24.tree_tagger.zip | pv > dach2024.lemma.txt
 ```
@@ -124,14 +123,14 @@
 
 ```shell script
 # With full path
-./build/bin/korapxmltool -f zip -t marmot:models/de.marmot app/src/test/resources/goe.zip
+./build/bin/korapxmltool -t zip -T marmot:models/de.marmot app/src/test/resources/goe.zip
 
 # With KORAPXMLTOOL_MODELS_PATH (searches in /data/models/ if model not found locally)
 export KORAPXMLTOOL_MODELS_PATH=/data/models
-./build/bin/korapxmltool -f zip -t marmot:de.marmot app/src/test/resources/goe.zip
+./build/bin/korapxmltool -t zip -T marmot:de.marmot app/src/test/resources/goe.zip
 
-# Without setting KORAPXMLTOOL_MODELS_PATH (uses default ../lib/models from executable)
-./build/bin/korapxmltool -f zip -t marmot:de.marmot app/src/test/resources/goe.zip
+# Without setting KORAPXMLTOOL_MODELS_PATH (searches current directory only)
+./build/bin/korapxmltool -t zip -T marmot:models/de.marmot app/src/test/resources/goe.zip
 ```
 
 ### Tagging with integrated OpenNLP POS tagger directly to a new KorAP-XML ZIP file
@@ -183,12 +182,12 @@
 Note that parsers take POS tagged input.
 
 ```shell script
-./build/bin/korapxmltool -f zip -T2 -P malt:german.mco goe.tree_tagger.zip
+./build/bin/korapxmltool -t zip -j2 -P malt:german.mco goe.tree_tagger.zip
 ```
 
 ### Tag with MarMoT and parse with Maltparser in one run directly to a new KorAP-XML ZIP file
 ```shell script
-./build/bin/korapxmltool -f zip -t marmot:models/de.marmot -P malt:german.mco goe.zip
+./build/bin/korapxmltool -t zip -T marmot:models/de.marmot -P malt:german.mco goe.zip
 ```
 
 ## Development and License
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 6c06c7f..6e325c7 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -72,27 +72,27 @@
             "    ./build/bin/korapxmltool app/src/test/resources/wdf19.tree_tagger.zip | head -10",
             "",
             "  Word2Vec style output:",
-            "    ./build/bin/korapxmltool -f w2v app/src/test/resources/wud24_sample.zip",
+            "    ./build/bin/korapxmltool -t w2v app/src/test/resources/wud24_sample.zip",
             "",
             "  Extract metadata and convert:",
             "    ./build/bin/korapxmltool -m '<textSigle>([^<]+)' -m '<creatDate>([^<]+)' --word2vec t/data/wdf19.zip",
             "",
             "  NOW corpus export:",
-            "    ./build/bin/korapxmltool -f now /vol/corpora/DeReKo/current/KorAP/zip/*24.zip | pv > dach24.txt",
+            "    ./build/bin/korapxmltool -t now /vol/corpora/DeReKo/current/KorAP/zip/*24.zip | pv > dach24.txt",
             "",
             "  Tag with integrated MarMot POS tagger, and parse with internal Malt parser:",
-            "    ./build/bin/korapxmltool -f zip -t marmot:de.marmot -P malt:german.mco app/src/test/resources/goe.zip",
-            "    # (uses KORAPXMLTOOL_MODELS_PATH if model not found in current directory; defaults to ../lib/models)",
+            "    ./build/bin/korapxmltool -t zip -T marmot:de.marmot -P malt:german.mco app/src/test/resources/goe.zip",
+            "    # (uses KORAPXMLTOOL_MODELS_PATH if model not found in current directory)",
             "",
             "  Use external spaCy annotation (without dependencies):",
-            "    ./build/bin/korapxmltool -T4 -A \"docker run -e SPACY_USE_DEPENDENCIES=False --rm -i korap/conllu2spacy:latest\" -f zip ./app/src/test/resources/goe.zip",
+            "    ./build/bin/korapxmltool -j4 -A \"docker run -e SPACY_USE_DEPENDENCIES=False --rm -i korap/conllu2spacy:latest\" -t zip ./app/src/test/resources/goe.zip",
             "",
             "  Generate Krill tar from wud24_sample with multiple annotation foundries:",
-            "    ./build/bin/korapxmltool -f krill -D . app/src/test/resources/wud24_sample*.zip",
+            "    ./build/bin/korapxmltool -t krill -D . app/src/test/resources/wud24_sample*.zip",
             "",
             "  Large corpus annotation with custom memory and performance and default model settings:",
             "    KORAPXMLTOOL_XMX=500g KORAPXMLTOOL_MODELS_PATH=/data/models KORAPXMLTOOL_JAVA_OPTS=\"-XX:+UseG1GC\" \\",
-            "        ./build/bin/korapxmltool --threads 100 -f zip -t marmot -P malt wpd25*.zip"
+            "        ./build/bin/korapxmltool -j 100 -t zip -T marmot -P malt wpd25*.zip"
     ]
 )
 
@@ -111,7 +111,7 @@
     var zipFileNames: Array<String>? = null
 
     @Option(
-        names = ["-f", "--output-format"],
+        names = ["-t", "--to"],
         description = ["Output format: ${ConlluOutputFormat.NAME}, ${Word2VecOutputFormat.NAME}, ${KorapXmlOutputFormat.NAME}, ${NowOutputFormat.NAME}, ${KrillOutputFormat.NAME}",
             "conllu: CoNLL-U format",
             "korapxml, xml, zip: KorAP-XML format zip",
@@ -169,16 +169,6 @@
     )
     var columns: Int = 10
 
-    @Option(
-        names = ["--word2vec", "-w"],
-        description = ["Print text in LM training format: tokens separated by space, sentences separated by newline",
-            "Deprecated: use -f word2vec"]
-    )
-    fun setWord2Vec(word2vec: Boolean) {
-        if (word2vec) {
-            outputFormat = OutputFormat.WORD2VEC
-        }
-    }
 
     @Option(
         names = ["--exclude-zip-glob"],
@@ -237,7 +227,7 @@
     var quiet: Boolean = false
 
     @Option(
-        names = ["--threads", "-T"],
+        names = ["-j", "--jobs", "--threads"],
         paramLabel = "THREADS",
         description = ["Maximum number of threads to use. Default: ${"$"}{DEFAULT-VALUE}"]
     )
@@ -250,12 +240,6 @@
         System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", threads.toString())
     }
 
-    @Option(
-        names = ["--zip-parallelism"],
-        paramLabel = "N",
-        description = ["Maximum number of zip files to process concurrently. Defaults to --threads."]
-    )
-    var zipParallelism: Int? = null
 
     @Option(
         names = ["--sequential"],
@@ -266,7 +250,7 @@
     var sequentialInZip: Boolean = false
 
     @Option(
-        names = ["--overwrite", "-o"],
+        names = ["-f", "--force"],
         description = ["Overwrite existing files"]
     )
     var overwrite: Boolean = false
@@ -295,7 +279,7 @@
         names = ["--lemma-only"],
         description = [
             "Do not load texts from data.xml and output only lemmas (requires morpho.xml).",
-            "Only valid with -f word2vec or -f now; implies --lemma."
+            "Only valid with -t word2vec or -t now; implies --lemma."
         ]
     )
     var lemmaOnly: Boolean = false
@@ -347,7 +331,7 @@
         return null
     }
     @Option(
-        names = ["--tag-with", "-t"],
+        names = ["-T", "--tag-with"],
         paramLabel = "TAGGER[:MODEL]",
         description = ["Specify a tagger and optionally a model: ${taggerFoundries}[:<path/to/model>].",
                       "If model is omitted, defaults are: marmot→de.marmot, opennlp→de-pos-maxent.bin, corenlp→german-fast.tagger"]
@@ -394,7 +378,7 @@
     private var parserName: String? = null
     private var parserModel: String? = null
     @Option(
-        names = ["--parse-with", "-P"],
+        names = ["-P", "--parse-with"],
         paramLabel = "PARSER[:MODEL]",
         description = ["Specify a parser and optionally a model: ${parserFoundries}[:<path/to/model>].",
                       "If model is omitted, defaults are: malt→german.mco, corenlp→germanSR.ser.gz"]
@@ -465,7 +449,7 @@
         if (lemmaOnly) {
             useLemma = true
             if (outputFormat != OutputFormat.WORD2VEC && outputFormat != OutputFormat.NOW) {
-                throw ParameterException(spec.commandLine(), "--lemma-only is supported only with -f word2vec or -f now")
+                throw ParameterException(spec.commandLine(), "--lemma-only is supported only with -t word2vec or -t now")
             }
         }
 
@@ -811,7 +795,7 @@
             LOGGER.info("Initializing krill TAR output: $krillOutputFileName")
 
             if (File(krillOutputFileName!!).exists() && !overwrite) {
-                LOGGER.severe("Output file $krillOutputFileName already exists. Use --overwrite to overwrite.")
+                LOGGER.severe("Output file $krillOutputFileName already exists. Use --force to overwrite.")
                 exitProcess(1)
             }
 
@@ -892,7 +876,7 @@
 
                 // Check for existing output file BEFORE redirecting logging, so user sees the message
                 if (File(outputMorphoZipFileName).exists() && !overwrite) {
-                    val errorMsg = "Output file $outputMorphoZipFileName already exists. Use --overwrite to overwrite."
+                    val errorMsg = "Output file $outputMorphoZipFileName already exists. Use --force to overwrite."
                     System.err.println("ERROR: $errorMsg")
                     LOGGER.severe(errorMsg)
                     exitProcess(1)
@@ -987,13 +971,13 @@
 
         if (sequentialInZip) {
             if (outputFormat != OutputFormat.WORD2VEC && outputFormat != OutputFormat.NOW) {
-                throw ParameterException(spec.commandLine(), "--sequential is supported only with -f word2vec or -f now")
+                throw ParameterException(spec.commandLine(), "--sequential is supported only with -t word2vec or -t now")
             }
         }
 
         if (maxThreads > 1) {
             val foundry = getFoundryFromZipFileNames(zips)
-            val parallelism = (zipParallelism ?: maxThreads).coerceAtLeast(1)
+            val parallelism = maxThreads.coerceAtLeast(1)
             LOGGER.info("Processing zips with ordered queue; parallelism=$parallelism; entries ${if (sequentialInZip) "sequential" else "parallel"}")
             processZipsWithQueue(zips, foundry, parallelism)
         } else {
@@ -1350,7 +1334,7 @@
 
             // Check for existing output file BEFORE redirecting logging, so user sees the message
             if (File(outputMorphoZipFileName).exists() && !overwrite) {
-                val errorMsg = "Output file $outputMorphoZipFileName already exists. Use --overwrite to overwrite."
+                val errorMsg = "Output file $outputMorphoZipFileName already exists. Use --force to overwrite."
                 System.err.println("ERROR: $errorMsg")
                 LOGGER.severe(errorMsg)
                 exitProcess(1)
@@ -4206,7 +4190,7 @@
         zipInventory.clear()
 
         // Scan ZIPs in parallel for faster startup
-        val scanParallelism = (zipParallelism ?: maxThreads).coerceAtLeast(1)
+        val scanParallelism = maxThreads.coerceAtLeast(1)
         val executor = java.util.concurrent.Executors.newFixedThreadPool(scanParallelism)
 
         try {
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlToolTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlToolTest.kt
index bfbc622..ea1860f 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlToolTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlToolTest.kt
@@ -178,18 +178,16 @@
 
     @Test
     fun deprecatedW2vOptionWorks() {
+        // Test that the old -w option no longer works (should fail for v3.0)
         val args = arrayOf("-w", loadResource("wdf19.zip").path)
-        debug(args)
-        assertContains(
-            outContent.toString(),
-            "\nje ne suis pas du tout d'accord !\n"
-        )
-        assertFalse { outContent.toString().contains("WDF19_A0000.13865") }
+        val exitCode = debug(args)
+        // Should fail since -w was removed
+        assertTrue(exitCode != 0, "Old -w option should no longer work in v3.0")
     }
 
     @Test
     fun w2vOptionWorks() {
-        val args = arrayOf("-f", "w2v", loadResource("wdf19.zip").path)
+        val args = arrayOf("-t", "w2v", loadResource("wdf19.zip").path)
         debug(args)
         assertContains(
             outContent.toString(),
@@ -200,7 +198,7 @@
 
     @Test
     fun nowOptionWorks() {
-        val args = arrayOf("-f", "now", loadResource("wdf19.zip").path)
+        val args = arrayOf("-t", "now", loadResource("wdf19.zip").path)
         debug(args)
         val output = outContent.toString()
         // Check that output starts with @@<text-sigle>
@@ -218,7 +216,7 @@
 
     @Test
     fun canConvertXMLwithInvalidComments() {
-        val args = arrayOf("-w", zca20scrambled)
+        val args = arrayOf("-t", "w2v", zca20scrambled)
         debug(args)
         assertContains(
             outContent.toString(),
@@ -250,7 +248,7 @@
 
     @Test
     fun canExtractMetadata() {
-        val args = arrayOf("--word2vec", "-m" ,"<textSigle>([^<]+)", "-m", "<creatDate>([^<]+)", loadResource("wdf19.zip").path)
+        val args = arrayOf("-t", "w2v", "-m" ,"<textSigle>([^<]+)", "-m", "<creatDate>([^<]+)", loadResource("wdf19.zip").path)
         debug(args)
         assertContains(
             outContent.toString(),
@@ -260,7 +258,7 @@
 
     @Test
     fun canHandleNonBmpText() {
-        val args = arrayOf("--word2vec", wdd17)
+        val args = arrayOf("-t", "w2v", wdd17)
         debug(args)
         assertContains(
             outContent.toString(),
@@ -317,7 +315,7 @@
         val tmpSourceFileName = tmpSourceFile.absolutePath
         File(sourceFile).copyTo(File(tmpSourceFileName), true)
         val outputDir = File(tmpSourceFileName).parentFile.absolutePath
-        val args = arrayOf("-D", outputDir, "-o", "-f", "zip", tmpSourceFileName)
+        val args = arrayOf("-D", outputDir, "-f", "-t", "zip", tmpSourceFileName)
         debug(args)
 
         val resultFile = tmpSourceFileName.toString().replace(".zip", ".base.zip")
@@ -333,7 +331,7 @@
         val resultFile = tmpSourceFileName.toString().replace(".zip", ".base.zip")
         File(resultFile).createNewFile()
         val outputDir = File(tmpSourceFileName).parentFile.absolutePath
-        val args = arrayOf("-D", outputDir, "-o", "-f", "zip", tmpSourceFileName)
+        val args = arrayOf("-D", outputDir, "-f", "-t", "zip", tmpSourceFileName)
         debug(args)
         assert(File(resultFile).exists())
         assert(File(resultFile).length() > 0)
@@ -341,7 +339,7 @@
 
     @Test
     fun canWord2VecLemma() {
-        val args = arrayOf("--lemma", "-f", "w2v", loadResource("goe.tree_tagger.zip").path)
+        val args = arrayOf("--lemma", "-t", "w2v", loadResource("goe.tree_tagger.zip").path)
         debug(args)
         val out = outContent.toString()
         // Expect lemma sequence containing "mein Ankunft" (surface would include inflected form elsewhere)
@@ -350,7 +348,7 @@
 
     @Test
     fun canNowLemma() {
-        val args = arrayOf("--lemma", "-f", "now", loadResource("goe.tree_tagger.zip").path)
+        val args = arrayOf("--lemma", "-t", "now", loadResource("goe.tree_tagger.zip").path)
         debug(args)
         val out = outContent.toString()
         assertContains(out, "@@")
@@ -360,7 +358,7 @@
 
     @Test
     fun lemmaOnlyWord2VecWorks() {
-        val args = arrayOf("--lemma-only", "-f", "w2v", loadResource("goe.tree_tagger.zip").path)
+        val args = arrayOf("--lemma-only", "-t", "w2v", loadResource("goe.tree_tagger.zip").path)
         debug(args)
         val out = outContent.toString()
         // Should produce some lemma tokens without requiring data.xml
@@ -369,7 +367,7 @@
 
     @Test
     fun lemmaOnlyNowWorks() {
-        val args = arrayOf("--lemma-only", "-f", "now", loadResource("goe.tree_tagger.zip").path)
+        val args = arrayOf("--lemma-only", "-t", "now", loadResource("goe.tree_tagger.zip").path)
         debug(args)
         val out = outContent.toString()
         assertContains(out, "@@")
@@ -393,7 +391,7 @@
         val rc = debug(args)
         // Non-zero is expected; and error message should be present
         assertTrue(rc != 0)
-        assertContains(errContent.toString(), "--sequential is supported only with -f word2vec or -f now")
+        assertContains(errContent.toString(), "--sequential is supported only with -t word2vec or -t now")
     }
 
     @Test
@@ -475,7 +473,7 @@
 
         val generatedTar = ensureKrillTar("wud24_full_foundries") { outputDir ->
             arrayOf(
-                "-f", "krill",
+                "-t", "krill",
                 "-l", "info",
                 "-D", outputDir.path,
                 baseZip,
@@ -617,7 +615,7 @@
         val spacyZip = loadResource("wud24_sample.spacy.zip").path
 
         val generatedTar = ensureKrillTar("wud24_base_spacy") { outputDir ->
-            arrayOf("-f", "krill", "-D", outputDir.path, baseZip, spacyZip)
+            arrayOf("-t", "krill", "-D", outputDir.path, baseZip, spacyZip)
         }
         assertTrue(generatedTar.exists())
 
@@ -659,7 +657,7 @@
         val spacyZip = loadResource("wud24_sample.spacy.zip").path
 
         val generatedTar = ensureKrillTar("wud24_base_spacy") { outputDir ->
-            arrayOf("-f", "krill", "-D", outputDir.path, baseZip, spacyZip)
+            arrayOf("-t", "krill", "-D", outputDir.path, baseZip, spacyZip)
         }
         assertTrue(generatedTar.exists())
 
@@ -707,7 +705,7 @@
         val treeTaggerZip = loadResource("wud24_sample.tree_tagger.zip").path
 
         val generatedTar = ensureKrillTar("wud24_full_foundries") { outputDir ->
-            arrayOf("-f", "krill", "-D", outputDir.path, baseZip, spacyZip, marmotZip, opennlpZip, treeTaggerZip)
+            arrayOf("-t", "krill", "-D", outputDir.path, baseZip, spacyZip, marmotZip, opennlpZip, treeTaggerZip)
         }
         assertTrue(generatedTar.exists())
 
@@ -749,7 +747,7 @@
         val spacyZip = loadResource("wud24_sample.spacy.zip").path
 
         val defaultTar = ensureKrillTar("wud24_default_corenlp") { outputDir ->
-            arrayOf("-f", "krill", "-D", outputDir.path, baseZip, spacyZip, wud24Corenlp)
+            arrayOf("-t", "krill", "-D", outputDir.path, baseZip, spacyZip, wud24Corenlp)
         }
         assertTrue(defaultTar.exists(), "Default krill tar should exist")
 
@@ -765,7 +763,7 @@
         )
 
         val flagTar = ensureKrillTar("wud24_default_corenlp_nwt") { outputDir ->
-            arrayOf("-f", "krill", "--non-word-tokens", "-D", outputDir.path, baseZip, spacyZip, wud24Corenlp)
+            arrayOf("-t", "krill", "--non-word-tokens", "-D", outputDir.path, baseZip, spacyZip, wud24Corenlp)
         }
         assertTrue(flagTar.exists(), "Krill tar should exist when --non-word-tokens is set")
 
@@ -794,7 +792,7 @@
 
         val kotlinTar = ensureKrillTar("wud24_reference_default") { outputDir ->
             arrayOf(
-                "-f", "krill",
+                "-t", "krill",
                 "-D", outputDir.path,
                 baseZip,
                 spacyZip,
@@ -837,7 +835,7 @@
 
         val kotlinTar = ensureKrillTar("wud24_reference_nwt") { outputDir ->
             arrayOf(
-                "-f", "krill",
+                "-t", "krill",
                 "--non-word-tokens",
                 "-D", outputDir.path,
                 baseZip,
@@ -898,7 +896,7 @@
         try {
             // Run CoreNLP with both tagger and parser
             val args = arrayOf(
-                "-f", "zip",
+                "-t", "zip",
                 "-o",
                 "-D", outputDir.path,
                 "-t", "corenlp:${taggerModel.path}",