Make groovy scripts available as executables

Change-Id: I2d3316769de8453e34656c32d66574c3f6085804
diff --git a/Readme.md b/Readme.md
index 10e45c5..dc4d6e3 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,6 +1,6 @@
 # totalngrams
 
-Package for effectively processing frequency lists from very large corpora in tab separated value format, 
+Package for effectively processing token lists from very large corpora in tab separated value format, 
 by making full use of multicore-processors.
 
 An older version of `totalngrams` was used for Koplenig et al. (2022).
@@ -42,7 +42,26 @@
   -V, --version          Print version information and exit.
 
 ```
+#### Example usage
+```bash
+FOLDS=16
+BASE="."
 
+for l in "-l"; do #  "-l"
+  for n in $(seq 1 2 3); do
+    for f in $(seq 1 $FOLDS); do
+      totalngrams\
+        --pad \
+        -P 79 \
+        -n $n \
+        -f $f \
+        -F $FOLDS \
+        $l -o "$BASE/paddedlemmaposfreq/$n-gram-token$l-freqs.$f.tsv.xz" $BASE/conllu/*.conllu.gz
+    done
+  done
+done
+
+```
 ## Scripts
 
 The package also contains some groovy scripts for handling *pseudonymization* tasks, i.e. replacing
@@ -58,12 +77,10 @@
 ./src/main/groovy/org/ids_mannheim/GeneratePseudonymKey.groovy -h
 ```
 
-```bash
-java -Dgroovy.grape.enable=false -cp target/totalngrams-2.1.0.jar\
- org.ids_mannheim.GeneratePseudonymKey -c 0 1-gram-token-l-freqs.*.tsv.xz | xz -T0 > token_key.tsv.xz
+or:
 
-java -Dgroovy.grape.enable=false -cp target/totalngrams-2.1.0.jar\
- org.ids_mannheim.GeneratePseudonymKey -c 1 1-gram-token-l-freqs.*.tsv.xz
+```bash
+generate_pseudonym_key -c 0 1-gram-token-l-freqs.*.tsv.xz | xz -T0 > token_key.tsv.xz
 ```
 
 ### Pseudonymize
@@ -71,7 +88,7 @@
 #### Example usage
 
 ```
-java -Dgroovy.grape.enable=false -cp totalngrams-2.1.0.jar org.ids_mannheim.Pseudonymize
+pseudonymize -d /tmp -k tokens_key.tsv.xz -k lemma_key.tsv.xz  *-gram-token-l-freqs.*.tsv.xz
 ```
 
 ### FilterKeys
@@ -79,21 +96,22 @@
 #### Example usage
 
 ```
-java -Xmx160000m -Dgroovy.grape.enable=false -cp totalngrams-2.1.0.jar org.ids_mannheim.FilterKeys\
- -k token_keys.tsv.xz -k lemma_keys.tsv.xz 1-gram-token-l-freqs.*.tsv.xz
+filter_keys -k token_keys.tsv.xz -k lemma_keys.tsv.xz 1-gram-token-l-freqs.*.tsv.xz
 ```
 
 # Installation
 
 ### Prerequisites
 
-* Java Development Kit (JDK) >= 18
+* [Java Development Kit (JDK) >= 18](https://www.oracle.com/de/java/technologies/downloads/)
 * [Apache Maven](https://maven.apache.org/)
 
+### Install
 ```bash
 git clone "https://korap.ids-mannheim.de/gerrit/IDS-Mannheim/totalngrams"
 cd totalngrams
 mvn install
+export PATH=`pwd`/appassembler/bin:$PATH
 ```
 
 # References
diff --git a/pom.xml b/pom.xml
index 5f0531a..6a16bc1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -159,7 +159,7 @@
                 </executions>
                 <configuration>
                     <binFolder>bin</binFolder>
-                    <extraJvmArguments>-Xmx512000m</extraJvmArguments>
+                    <extraJvmArguments>-XX:MaxRAMPercentage=85 -Dgroovy.grape.enable=false</extraJvmArguments>
                     <binFileExtensions>
                         <unix/>
                     </binFileExtensions>
@@ -168,6 +168,18 @@
                             <mainClass>org.ids_mannheim.TotalNGrams</mainClass>
                             <id>totalngrams</id>
                         </program>
+                        <program>
+                            <mainClass>org.ids_mannheim.GeneratePseudonymKey</mainClass>
+                            <id>generate_pseudonym_key</id>
+                        </program>
+                        <program>
+                            <mainClass>org.ids_mannheim.Pseudonymize</mainClass>
+                            <id>pseudonymize</id>
+                        </program>
+                        <program>
+                            <mainClass>org.ids_mannheim.FilterKeys</mainClass>
+                            <id>filter_keys</id>
+                        </program>
                     </programs>
                 </configuration>
             </plugin>
diff --git a/src/main/groovy/org/ids_mannheim/FilterKeys.groovy b/src/main/groovy/org/ids_mannheim/FilterKeys.groovy
index d561b02..1852f2d 100755
--- a/src/main/groovy/org/ids_mannheim/FilterKeys.groovy
+++ b/src/main/groovy/org/ids_mannheim/FilterKeys.groovy
@@ -22,11 +22,11 @@
 
 class FilterKeys {
 
-    static tag = "FilterKeys"
+    static tag = (System.getProperty("app.name") ? System.getProperty("app.name") : new File(this.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()).getName())
     static final int maxSpecialKeys = 1000
 
     static interface pseudonymizeArgs {
-        @Option(shortName = 'k', description = "pseudonymization key, use multiple times for multiple keys")
+        @Option(shortName = 'k', description = "pseudonymization key(s), use multiple times for multiple keys\n(e.g. `-k tokenkey.tsv -k lemmakey.tsv`)")
         String[] keys()
 
         @Option(shortName = 'h', description = "print this help message")
diff --git a/src/main/groovy/org/ids_mannheim/GeneratePseudonymKey.groovy b/src/main/groovy/org/ids_mannheim/GeneratePseudonymKey.groovy
index 3a0d415..f691511 100755
--- a/src/main/groovy/org/ids_mannheim/GeneratePseudonymKey.groovy
+++ b/src/main/groovy/org/ids_mannheim/GeneratePseudonymKey.groovy
@@ -20,7 +20,7 @@
 import java.util.logging.Logger
 
 class GeneratePseudonymKey {
-    static tag = "GeneratePseudonymKey"
+    static tag = (System.getProperty("app.name") ? System.getProperty("app.name") : new File(this.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()).getName())
 
     static interface GeneratePseudonymKeyArgs {
         @Option(shortName = 'c', defaultValue = "0", description = 'generate pseudonyms for column n')
@@ -32,7 +32,7 @@
         @Option(shortName = 'h')
         boolean help()
 
-        @Unparsed(description = "tsv formatted frequency lists to be pseudonymized")
+        @Unparsed(description = "tsv-formatted frequency lists, on the basis of which pseudonym keys are calculated")
         String[] files()
     }
 
diff --git a/src/main/groovy/org/ids_mannheim/Pseudonymize.groovy b/src/main/groovy/org/ids_mannheim/Pseudonymize.groovy
index f1c1196..b7aa909 100755
--- a/src/main/groovy/org/ids_mannheim/Pseudonymize.groovy
+++ b/src/main/groovy/org/ids_mannheim/Pseudonymize.groovy
@@ -21,20 +21,19 @@
 import java.util.logging.Logger
 
 class Pseudonymize {
-
-    static tag = "Pseudonymize"
+    static tag = (System.getProperty("app.name") ? System.getProperty("app.name") : new File(this.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()).getName())
 
     static interface pseudonymizeArgs {
-        @Option(shortName = 'k')
+        @Option(shortName = 'k', description = "pseudonymization key(s), use multiple times for multiple keys\n(e.g. `-k tokenkey.tsv -k lemmakey.tsv`)")
         String[] keys()
 
-        @Option(shortName = 'd', defaultValue = "./")
+        @Option(shortName = 'd', defaultValue = ".", description = "where to write the results")
         String destPath()
 
         @Option(shortName = 'h')
         boolean help()
 
-        @Unparsed()
+        @Unparsed(description = "tsv-formatted frequency lists to be pseudonymized")
         List files()
     }