KORAPXMLTOOL_XMX_MB -> KORAPXMLTOOL_XMX

Change-Id: I4a5228d9c6025dc178267987828b358b682c9002
diff --git a/Readme.md b/Readme.md
index 2d9060a..2c61e19 100644
--- a/Readme.md
+++ b/Readme.md
@@ -90,7 +90,7 @@
 Example for large NOW export with progress and exclusions:
 
 ```
-KORAPXMLTOOL_XMX_MB=65536 KORAPXMLTOOL_JAVA_OPTS="-XX:+UseG1GC -Djdk.util.zip.disableMemoryMapping=true -Djdk.util.zip.reuseInflater=true" \
+KORAPXMLTOOL_XMX=64g KORAPXMLTOOL_JAVA_OPTS="-XX:+UseG1GC -Djdk.util.zip.disableMemoryMapping=true -Djdk.util.zip.reuseInflater=true" \
      ./build/bin/korapxmltool -l info --threads 100 --zip-parallelism 8 \
      --lemma-only --sequential -f now \
      --exclude-zip-glob 'w?d24.tree_tagger.zip' \
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 8b74787..491f088 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -83,7 +83,7 @@
             "    ./build/bin/korapxmltool -f krill -D out/krill app/src/test/resources/wud24_sample.zip app/src/test/resources/wud24_sample.spacy.zip app/src/test/resources/wud24_sample.marmot-malt.zip",
             "",
             "  Large corpus processing with custom memory and performance settings:",
-            "    KORAPXMLTOOL_XMX_MB=512000 KORAPXMLTOOL_JAVA_OPTS=\"-XX:+UseG1GC\" \\",
+            "    KORAPXMLTOOL_XMX=500g KORAPXMLTOOL_JAVA_OPTS=\"-XX:+UseG1GC\" \\",
             "        ./build/bin/korapxmltool --threads 100 -f zip -t marmot:models/de.marmot -P maltparser:models/de.malt wpd25*.zip"
     ]
 )
diff --git a/korapxmltool.shebang b/korapxmltool.shebang
index 6f8befc..a6d561d 100644
--- a/korapxmltool.shebang
+++ b/korapxmltool.shebang
@@ -6,7 +6,8 @@
 # (Run `zip -A korapxmltool` if your unzip/java complains about prepended bytes.)
 #
 # Override memory:
-#   export KORAPXMLTOOL_XMX_MB=8192          # sets -Xmx8192m
+#   export KORAPXMLTOOL_XMX=20g              # sets -Xmx20g
+#   export KORAPXMLTOOL_XMX=8192m            # sets -Xmx8192m
 #   export KORAPXMLTOOL_JAVA_OPTS="... -Xmx4g"  # full custom opts
 # Otherwise we pick ~75% of detected memory (cgroup aware), clamped to [1024m, 65536m].
 
@@ -54,9 +55,26 @@
 fi
 
 if ! $has_xmx; then
-  if [[ -n ${KORAPXMLTOOL_XMX_MB:-} && ${KORAPXMLTOOL_XMX_MB:-} =~ ^[0-9]+$ ]]; then
-    xmx_mb=${KORAPXMLTOOL_XMX_MB}
-  else
+  if [[ -n ${KORAPXMLTOOL_XMX:-} ]]; then
+    # Handle KORAPXMLTOOL_XMX with units (g/G for GB, m/M for MB, or just number for MB)
+    if [[ ${KORAPXMLTOOL_XMX} =~ ^[0-9]+[gG]$ ]]; then
+      # Convert GB to MB
+      xmx_gb=${KORAPXMLTOOL_XMX%[gG]}
+      xmx_mb=$((xmx_gb * 1024))
+    elif [[ ${KORAPXMLTOOL_XMX} =~ ^[0-9]+[mM]$ ]]; then
+      # Extract MB value
+      xmx_mb=${KORAPXMLTOOL_XMX%[mM]}
+    elif [[ ${KORAPXMLTOOL_XMX} =~ ^[0-9]+$ ]]; then
+      # Treat plain number as MB for backward compatibility
+      xmx_mb=${KORAPXMLTOOL_XMX}
+    else
+      echo "Warning: Invalid KORAPXMLTOOL_XMX format '${KORAPXMLTOOL_XMX}'. Use formats like '20g', '8192m', or '8192'." >&2
+      xmx_mb=""
+    fi
+  fi
+  
+  # If no valid XMX was provided or parsing failed, use auto-detection
+  if [[ -z ${xmx_mb:-} ]]; then
     mem_mb=$(detect_mem_limit_mb)
     xmx_mb=$(( mem_mb * 75 / 100 ))
     (( xmx_mb < 1024 )) && xmx_mb=1024