KORAPXMLTOOL_XMX_MB -> KORAPXMLTOOL_XMX
Change-Id: I4a5228d9c6025dc178267987828b358b682c9002
diff --git a/Readme.md b/Readme.md
index 2d9060a..2c61e19 100644
--- a/Readme.md
+++ b/Readme.md
@@ -90,7 +90,7 @@
Example for large NOW export with progress and exclusions:
```
-KORAPXMLTOOL_XMX_MB=65536 KORAPXMLTOOL_JAVA_OPTS="-XX:+UseG1GC -Djdk.util.zip.disableMemoryMapping=true -Djdk.util.zip.reuseInflater=true" \
+KORAPXMLTOOL_XMX=64g KORAPXMLTOOL_JAVA_OPTS="-XX:+UseG1GC -Djdk.util.zip.disableMemoryMapping=true -Djdk.util.zip.reuseInflater=true" \
./build/bin/korapxmltool -l info --threads 100 --zip-parallelism 8 \
--lemma-only --sequential -f now \
--exclude-zip-glob 'w?d24.tree_tagger.zip' \
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 8b74787..491f088 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -83,7 +83,7 @@
" ./build/bin/korapxmltool -f krill -D out/krill app/src/test/resources/wud24_sample.zip app/src/test/resources/wud24_sample.spacy.zip app/src/test/resources/wud24_sample.marmot-malt.zip",
"",
" Large corpus processing with custom memory and performance settings:",
- " KORAPXMLTOOL_XMX_MB=512000 KORAPXMLTOOL_JAVA_OPTS=\"-XX:+UseG1GC\" \\",
+ " KORAPXMLTOOL_XMX=500g KORAPXMLTOOL_JAVA_OPTS=\"-XX:+UseG1GC\" \\",
" ./build/bin/korapxmltool --threads 100 -f zip -t marmot:models/de.marmot -P maltparser:models/de.malt wpd25*.zip"
]
)
diff --git a/korapxmltool.shebang b/korapxmltool.shebang
index 6f8befc..a6d561d 100644
--- a/korapxmltool.shebang
+++ b/korapxmltool.shebang
@@ -6,7 +6,8 @@
# (Run `zip -A korapxmltool` if your unzip/java complains about prepended bytes.)
#
# Override memory:
-# export KORAPXMLTOOL_XMX_MB=8192 # sets -Xmx8192m
+# export KORAPXMLTOOL_XMX=20g # sets -Xmx20g
+# export KORAPXMLTOOL_XMX=8192m # sets -Xmx8192m
# export KORAPXMLTOOL_JAVA_OPTS="... -Xmx4g" # full custom opts
# Otherwise we pick ~75% of detected memory (cgroup aware), clamped to [1024m, 65536m].
@@ -54,9 +55,26 @@
fi
if ! $has_xmx; then
- if [[ -n ${KORAPXMLTOOL_XMX_MB:-} && ${KORAPXMLTOOL_XMX_MB:-} =~ ^[0-9]+$ ]]; then
- xmx_mb=${KORAPXMLTOOL_XMX_MB}
- else
+ if [[ -n ${KORAPXMLTOOL_XMX:-} ]]; then
+ # Handle KORAPXMLTOOL_XMX with units (g/G for GB, m/M for MB, or just number for MB)
+ if [[ ${KORAPXMLTOOL_XMX} =~ ^[0-9]+[gG]$ ]]; then
+ # Convert GB to MB
+ xmx_gb=${KORAPXMLTOOL_XMX%[gG]}
+ xmx_mb=$((xmx_gb * 1024))
+ elif [[ ${KORAPXMLTOOL_XMX} =~ ^[0-9]+[mM]$ ]]; then
+ # Extract MB value
+ xmx_mb=${KORAPXMLTOOL_XMX%[mM]}
+ elif [[ ${KORAPXMLTOOL_XMX} =~ ^[0-9]+$ ]]; then
+ # Treat plain number as MB for backward compatibility
+ xmx_mb=${KORAPXMLTOOL_XMX}
+ else
+ echo "Warning: Invalid KORAPXMLTOOL_XMX format '${KORAPXMLTOOL_XMX}'. Use formats like '20g', '8192m', or '8192'." >&2
+ xmx_mb=""
+ fi
+ fi
+
+ # If no valid XMX was provided or parsing failed, use auto-detection
+ if [[ -z ${xmx_mb:-} ]]; then
mem_mb=$(detect_mem_limit_mb)
xmx_mb=$(( mem_mb * 75 / 100 ))
(( xmx_mb < 1024 )) && xmx_mb=1024