Incorporate pseudonymization scripts into maven project

And set minimum java version to 17

Change-Id: Ieb8c9c0cd64214111cb7ef11775ea0272f46054c
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 97bde2c..4cc443d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,7 +1,7 @@
 include:
   - template: Security/Dependency-Scanning.gitlab-ci.yml
 
-image: maven:3.6-jdk-11
+image: maven:latest
 
 build-and-test:
   stage: build
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 04917f9..73282e3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## [2.1.0]
+- added script 'GeneratePseudonymKey.groovy' to compute pseudonyms
+- added script `Pseudonymize.groovy` to pseudonymize tokens (and lemmas)
+
 ## [2.0] - 2021-10-07
 - for `.*\\.(freq|tsv)(\\.gz)?` input files automatically cumulate frequencies
 - -N option added to sort keys with same frequency numerically
diff --git a/config.groovy b/config.groovy
new file mode 100644
index 0000000..1471ad0
--- /dev/null
+++ b/config.groovy
@@ -0,0 +1 @@
+configuration.pluginFactory = org.codehaus.groovy.control.ParserPluginFactory.antlr4()
diff --git a/pom.xml b/pom.xml
index 74ceb7b..4a20c45 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,8 +10,12 @@
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <maven.compiler.release>11</maven.compiler.release>
-    </properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+        <!--  Use the release flag only if you are using Java 9+  -->
+        <!--  <maven.compiler.release>8</maven.compiler.release>  -->
+        <!-- verbose is useful for debugging purposes -->
+        <maven.compiler.verbose>true</maven.compiler.verbose>     </properties>
 
     <build>
         <resources>
@@ -28,14 +32,16 @@
                 </includes>
             </resource>
         </resources>
+
         <plugins>
+
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-javadoc-plugin</artifactId>
                 <version>3.1.1</version>
                 <configuration>
                     <failOnError>false</failOnError>
-                    <source>${maven.compiler.release}</source>
+                    <source>${maven.compiler.target}</source>
                     <javadocExecutable>${java.home}/bin/javadoc
                     </javadocExecutable>
                 </configuration>
@@ -70,12 +76,11 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.8.1</version>
+                <version>3.10.1</version>
                 <configuration>
                     <!-- or whatever version you use -->
-                    <release>${maven.compiler.release}</release>
-                    <source>${maven.compiler.release}</source>
-                    <target>${maven.compiler.release}</target>
+                    <source>${maven.compiler.source}</source>
+                    <target>${maven.compiler.target}</target>
                     <showDeprecation>true</showDeprecation>
                     <annotationProcessorPaths>
                         <path>
@@ -84,10 +89,6 @@
                             <version>4.2.0</version>
                         </path>
                     </annotationProcessorPaths>
-                    <compilerArgs>
-                        <arg>-Aproject=${project.groupId}/${project.artifactId}
-                        </arg>
-                    </compilerArgs>
                 </configuration>
                 <executions>
                     <!-- Replacing default-compile as it is treated specially by maven -->
@@ -117,8 +118,9 @@
                     </execution>
                 </executions>
             </plugin>
-            <plugin>
-                <artifactId>maven-assembly-plugin</artifactId>
+
+             <plugin>
+            <artifactId>maven-assembly-plugin</artifactId>
                 <executions>
                     <execution>
                         <phase>package</phase>
@@ -172,11 +174,85 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-surefire-plugin</artifactId>
-                <!-- JUnit 5 requires Surefire version 2.22.0 or higher -->
                 <version>2.22.2</version>
+                <configuration>
+                    <includes>
+                        <include>**/*Test.java</include>
+                    </includes>
+                </configuration>
             </plugin>
         </plugins>
     </build>
+
+    <profiles>
+        <profile>
+            <id>activate-this-only-outside-intellij</id>
+            <activation>
+                <property>
+                    <name>!idea.version</name>
+                </property>
+            </activation>
+
+            <build>
+                <plugins>
+                    <plugin>
+                        <artifactId>maven-compiler-plugin</artifactId>
+                        <version>3.10.1</version>
+                        <configuration>
+                            <compilerId>groovy-eclipse-compiler</compilerId>
+                            <compilerArguments>
+                                <configScript>${project.basedir}/config.groovy</configScript>
+                            </compilerArguments>
+                        </configuration>
+                        <dependencies>
+                            <dependency>
+                                <groupId>org.codehaus.groovy</groupId>
+                                <artifactId>groovy-eclipse-compiler</artifactId>
+                                <version>3.8.0</version>
+                            </dependency>
+                            <dependency>
+                                <groupId>org.codehaus.groovy</groupId>
+                                <artifactId>groovy-eclipse-batch</artifactId>
+                                <version>4.0.6-02</version>
+                            </dependency>
+                            <dependency>
+                                <groupId>org.apache.ivy</groupId>
+                                <artifactId>ivy</artifactId>
+                                <version>2.5.1</version>
+                            </dependency>
+                        </dependencies>
+                    </plugin>
+                </plugins>
+            </build>
+            <dependencies>
+                <dependency>
+                    <groupId>org.codehaus.groovy</groupId>
+                    <artifactId>groovy-eclipse-compiler</artifactId>
+                    <version>3.8.0</version>
+                </dependency>
+                <dependency>
+                    <groupId>org.codehaus.groovy</groupId>
+                    <artifactId>groovy-eclipse-batch</artifactId>
+                    <version>4.0.6-02</version>
+                </dependency>
+            </dependencies>
+
+            <repositories>
+                <repository>
+                    <id>groovy-libs-release-local</id>
+                    <url>https://groovy.jfrog.io/artifactory/plugins-release-local</url>
+                </repository>
+            </repositories>
+
+            <pluginRepositories>
+                <pluginRepository>
+                    <id>groovy-plugins-release-local</id>
+                    <url>https://groovy.jfrog.io/artifactory/plugins-release-local</url>
+                </pluginRepository>
+            </pluginRepositories>
+        </profile>
+    </profiles>
+
     <dependencies>
         <dependency>
             <groupId>info.picocli</groupId>
@@ -229,5 +305,43 @@
             <artifactId>xz</artifactId>
             <version>1.9</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.groovy</groupId>
+            <artifactId>groovy</artifactId>
+            <version>4.0.6</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.codehaus.groovy/groovy-eclipse-batch -->
+
+        <dependency>
+            <groupId>org.codehaus.gpars</groupId>
+            <artifactId>gpars</artifactId>
+            <version>1.2.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.ivy</groupId>
+            <artifactId>ivy</artifactId>
+            <version>2.5.1</version>
+            <scope>compile</scope>
+            <optional>true</optional>
+        </dependency>
+        <dependency>
+            <groupId>info.picocli</groupId>
+            <artifactId>picocli-groovy</artifactId>
+            <version>4.6.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.groovy</groupId>
+            <artifactId>groovy-cli-picocli</artifactId>
+            <version>4.0.6</version>
+        </dependency>
+
+        <dependency>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+            <version>1.2</version>
+        </dependency>
     </dependencies>
+
+
 </project>
\ No newline at end of file
diff --git a/scripts/generate_pseudonym_key.groovy b/scripts/generate_pseudonym_key.groovy
deleted file mode 100755
index afc2435..0000000
--- a/scripts/generate_pseudonym_key.groovy
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/env groovy
-@Grab('info.picocli:picocli-groovy:4.6.3')
-@Grab('org.tukaani:xz:1.9')
-@Grab('org.codehaus.gpars:gpars:1.2.1')
-@Grab('org.apache.commons:commons-compress:1.22')
-
-import groovy.cli.Option
-import groovy.cli.Unparsed
-import groovy.cli.commons.CliBuilder
-import groovyx.gpars.GParsPool
-import org.apache.commons.compress.compressors.CompressorStreamFactory
-
-
-import java.util.concurrent.ConcurrentHashMap
-import java.util.concurrent.atomic.LongAdder
-import java.util.logging.Logger
-
-def tag = "generate_pseudonym_key"
-
-interface GeneratePseudonymKeyArgs {
-    @Option(shortName = 'c', defaultValue = '0', description = 'generate pseudonyms for column n')
-    int column()
-
-    @Option(shortName = 's', defaultValue = "«END»,«START»", convert = { it.split(",")}, description = "comma separated special keys (will get pseudonyms -n..-1)")
-    String[] specialKeys()
-
-    @Option(shortName = 'h')
-    boolean help()
-
-    @Unparsed
-    List files()
-}
-
-CliBuilder cli = new CliBuilder(usage: "${tag} [options] file [files]")
-def options = cli.parseFromSpec(GeneratePseudonymKeyArgs, args)
-
-if (options.help() || !options.files() || options.files()[0].startsWith('-')) {
-    cli.usage()
-    System.exit(-1)
-}
-
-def freqList = new ConcurrentHashMap<String, LongAdder>(10000000, 0.75, options.files().size())
-System.setProperty("java.util.logging.SimpleFormatter.format", '[%1$tF %1$tT]:%4$s: %5$s%n')
-Logger log = Logger.getLogger("org.ids_mannheim.${tag}")
-log.info("Generating pseudonym key for column ${options.column()} in ${options.files()}")
-
-GParsPool.withPool {
-    options.files().eachParallel(fname -> {
-        log.info("Reading ${fname}")
-        def input = new CompressorStreamFactory().createCompressorInputStream(new BufferedInputStream(new FileInputStream(fname)))
-        input.splitEachLine("\t") { fields ->
-            freqList.computeIfAbsent(fields[options.column()], k -> new LongAdder()).add(Long.parseLong(fields[fields.size() - 1]))
-        }
-        log.info("Done reading ${fname}")
-    })
-}
-
-log.info("Sorting and writing...")
-
-def j = -options.specialKeys().size()
-options.specialKeys().each {
-    freqList.remove(it)
-    println("${it}\t${j++}")
-}
-
-def i = 0
-freqList.entrySet()
-        .parallelStream()
-        .sorted({ a, b -> def ret = b.value <=> a.value; if (ret == 0) a.key <=> b.key else ret })
-        .forEachOrdered({ e -> println "${e.key}\t${i++}" })
diff --git a/scripts/pseudonymize.groovy b/scripts/pseudonymize.groovy
deleted file mode 100755
index 70447fd..0000000
--- a/scripts/pseudonymize.groovy
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/bin/env groovy
-@Grab('info.picocli:picocli-groovy:4.6.3')
-@Grab('org.tukaani:xz:1.9') // should be imported even if not used directly
-@Grab('org.codehaus.gpars:gpars:1.2.1')
-@Grab('org.apache.commons:commons-compress:1.22')
-import groovy.cli.Option
-import groovy.cli.Unparsed
-import groovy.cli.commons.CliBuilder
-import groovyx.gpars.GParsPool
-import org.apache.commons.compress.compressors.CompressorStreamFactory
-
-
-import java.util.concurrent.ConcurrentHashMap
-import java.util.logging.Logger
-
-def tag = "pseudonymize"
-
-interface pseudonymizeArgs {
-    @Option(shortName = 'k')
-    String[] keys()
-
-    @Option(shortName = 'd')
-    String destPath()
-
-    @Option(shortName = 'h', defaultValue = '0')
-    boolean help()
-
-    @Unparsed
-    List files()
-}
-
-def compressorOutputStream(fname) {
-    def compresserTypes = ["gz" : CompressorStreamFactory.GZIP, "xz" : CompressorStreamFactory.XZ, "bzip": CompressorStreamFactory.BZIP2]
-    String extension = fname.substring(fname.lastIndexOf(".") + 1)
-    def type = compresserTypes[extension]
-    if (type)
-        return new PrintStream(new BufferedOutputStream(new CompressorStreamFactory().createCompressorOutputStream(type, new BufferedOutputStream(new FileOutputStream(fname)))))
-    else
-        return new PrintStream(new BufferedOutputStream(new FileOutputStream(fname)))
-}
-
-static def readKey(fname, log) {
-    def myMap = new ConcurrentHashMap<String, Integer>(100000000,0.75,1)
-    log.info("Reading key ${fname} ...")
-    def input = new CompressorStreamFactory().createCompressorInputStream(new BufferedInputStream(new FileInputStream(fname)))
-    input.splitEachLine("\t") { fields -> myMap.put(fields[0], Integer.valueOf(fields[fields.size() - 1])) }
-    log.info("Done reading key ${fname}.")
-    return myMap
-}
-
-CliBuilder cli = new CliBuilder(usage: "${tag} [options] file [files]")
-def options = cli.parseFromSpec(pseudonymizeArgs, args)
-
-if (options.help() || !options.files() || options.files()[0].startsWith('-')) {
-    cli.usage()
-    System.exit(-1)
-}
-
-System.setProperty("java.util.logging.SimpleFormatter.format", '[%1$tF %1$tT]:%4$s: %5$s%n')
-Logger log = Logger.getLogger("")
-log.info("Pseudonymizing ${options.keys().size()} columns")
-def keyMaps = []
-
-options.keys().each { fname -> keyMaps << readKey(fname, log) }
-
-GParsPool.withPool {
-    options.files().eachParallel(fname -> {
-        def outName = options.destPath() + "/" + new File(fname).getName()
-        def outFile = new File(outName)
-        if (outFile.exists()) {
-            log.warning("${outName} already exists - skipping")
-        } else {
-            log.info("Pseudonymizing ${fname} to ${outName}")
-            def output_stream = compressorOutputStream(outName)
-            def input = new CompressorStreamFactory().createCompressorInputStream(new BufferedInputStream(new FileInputStream(fname)))
-
-            input.splitEachLine("\t") { fields ->
-                def output_string = ""
-                for (int i in 0..(fields.size() - 2)) {
-                    if (i % 3 < keyMaps.size()) {
-                        def val = keyMaps[i % 3][fields[i]]
-                        if (val == null)
-                            log.severe("`${fields[i]}' not found in dictionary")
-                        output_stream.print("${val}\t")
-                    } else {
-                        output_stream.print("${fields[i]}\t")
-                    }
-                }
-                output_stream.println(output_string + fields[fields.size() - 1])
-            }
-            log.info("Done pseudonymizing ${fname} to ${outName}")
-            output_stream.close()
-        }
-    })
-}
\ No newline at end of file
diff --git a/src/main/groovy/org/ids_mannheim/GeneratePseudonymKey.groovy b/src/main/groovy/org/ids_mannheim/GeneratePseudonymKey.groovy
new file mode 100755
index 0000000..4c5397c
--- /dev/null
+++ b/src/main/groovy/org/ids_mannheim/GeneratePseudonymKey.groovy
@@ -0,0 +1,80 @@
+#!/bin/env groovy
+package org.ids_mannheim
+
+import groovy.cli.Option
+import groovy.cli.Unparsed
+@GrabConfig(systemClassLoader = true)
+@Grab('info.picocli:picocli-groovy:4.6.3')
+@Grab('org.tukaani:xz:1.9')
+@Grab('org.codehaus.gpars:gpars:1.2.1')
+@Grab('org.apache.commons:commons-compress:1.22')
+
+
+
+import groovy.cli.picocli.CliBuilder
+import groovyx.gpars.GParsPool
+import org.apache.commons.compress.compressors.CompressorStreamFactory
+
+import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.atomic.LongAdder
+import java.util.logging.Logger
+
+class GeneratePseudonymKey {
+    static tag = "GeneratePseudonymKey"
+
+    static interface GeneratePseudonymKeyArgs {
+        @Option(shortName = 'c', defaultValue = "0", description = 'generate pseudonyms for column n')
+        int column()
+
+        @Option(shortName = 's', defaultValue = "«END»,«START»", description = "comma separated special keys (will get pseudonyms -n..-1)")
+        String specialKeys()
+
+        @Option(shortName = 'h')
+        boolean help()
+
+        @Unparsed
+        String[] files()
+    }
+
+    static void main(String[] args) {
+        CliBuilder cli = new CliBuilder(name: "${tag}", width: 120, footer: "Examples:\n" +
+                "generate_pseudonym_key.groovy -c 0 1-gram-token-l-freqs.*.tsv.xz | xz -T0 > token_key.tsv.xz\n" +
+                "generate_pseudonym_key.groovy -c 1 1-gram-token-l-freqs.*.tsv.xz | xz -T0 > lemma_key.tsv.xz\n")
+
+        def options = cli.parseFromSpec(GeneratePseudonymKeyArgs, args)
+
+        if (options.help() || !options.files() || options.files()[0].startsWith('-')) {
+            cli.usage()
+            System.exit(-1)
+        }
+        def specialKeys = options.specialKeys().split(",")
+        def freqList = new ConcurrentHashMap<String, LongAdder>(10000000, 0.75, options.files().size())
+        System.setProperty("java.util.logging.SimpleFormatter.format", '[%1$tF %1$tT]:%4$s: %5$s%n')
+        Logger log = Logger.getLogger("org.ids_mannheim.${tag}")
+        log.info("Generating pseudonym key for column ${options.column()} in ${options.files()}")
+
+        GParsPool.withPool {
+            options.files().eachParallel(fname -> {
+                log.info("Reading ${fname}")
+                def input = new CompressorStreamFactory().createCompressorInputStream(new BufferedInputStream(new FileInputStream(fname)))
+                input.splitEachLine("\t") { fields -> freqList.computeIfAbsent(fields[options.column()], k -> new LongAdder()).add(Long.parseLong(fields[fields.size() - 1]))
+                }
+                log.info("Done reading ${fname}")
+            })
+        }
+
+        log.info("Sorting and writing...")
+
+        def j = -specialKeys.size()
+        specialKeys.each {
+            freqList.remove(it)
+            println("${it}\t${j++}")
+        }
+
+        def i = 0
+        freqList.entrySet()
+                .parallelStream()
+                .sorted({ a, b -> def ret = b.value <=> a.value; if (ret == 0) a.key <=> b.key else ret })
+                .forEachOrdered({ e -> println "${e.key}\t${i++}" })
+    }
+}
diff --git a/src/main/groovy/org/ids_mannheim/Pseudonymize.groovy b/src/main/groovy/org/ids_mannheim/Pseudonymize.groovy
new file mode 100755
index 0000000..f1c1196
--- /dev/null
+++ b/src/main/groovy/org/ids_mannheim/Pseudonymize.groovy
@@ -0,0 +1,114 @@
+#!/bin/env groovy
+
+package org.ids_mannheim
+
+// @GrabConfig(systemProperties = "groovy.grape.enable=false")
+@GrabConfig(systemClassLoader=true)
+@Grab('org.tukaani:xz:1.9') // should be imported even if not used directly
+@Grab('org.codehaus.gpars:gpars:1.2.1')
+@Grab('org.apache.commons:commons-compress:1.22')
+@Grab('info.picocli:picocli:4.6.3')
+
+import groovyx.gpars.GParsPool
+
+import groovy.cli.Option
+import groovy.cli.Unparsed
+import groovy.cli.picocli.CliBuilder
+
+import org.apache.commons.compress.compressors.CompressorStreamFactory
+
+import java.util.concurrent.ConcurrentHashMap
+import java.util.logging.Logger
+
+class Pseudonymize {
+
+    static tag = "Pseudonymize"
+
+    static interface pseudonymizeArgs {
+        @Option(shortName = 'k')
+        String[] keys()
+
+        @Option(shortName = 'd', defaultValue = "./")
+        String destPath()
+
+        @Option(shortName = 'h')
+        boolean help()
+
+        @Unparsed()
+        List files()
+    }
+
+    static def compressorOutputStream(fname) {
+        def compresserTypes = ["gz": CompressorStreamFactory.GZIP, "xz": CompressorStreamFactory.XZ, "bzip": CompressorStreamFactory.BZIP2]
+        String extension = fname.substring(fname.lastIndexOf(".") + 1)
+        def type = compresserTypes[extension]
+        if (type)
+            return new PrintStream(new BufferedOutputStream(new CompressorStreamFactory().createCompressorOutputStream(type, new BufferedOutputStream(new FileOutputStream(fname)))))
+        else
+            return new PrintStream(new BufferedOutputStream(new FileOutputStream(fname)))
+    }
+
+    static def readKey(fname, log) {
+        def myMap = new ConcurrentHashMap<String, Integer>(100000000,0.75,1)
+        log.info("Reading key ${fname} ...")
+        def input = new CompressorStreamFactory().createCompressorInputStream(new BufferedInputStream(new FileInputStream(fname)))
+        input.splitEachLine("\t") { fields -> myMap.put(fields[0], Integer.valueOf(fields[fields.size() - 1])) }
+        log.info("Done reading key ${fname}.")
+        return myMap
+    }
+
+
+    static void main(String[] args) {
+        System.setProperty("java.util.logging.SimpleFormatter.format", '[%1$tF %1$tT]:%4$s: %5$s%n')
+        Logger log = Logger.getLogger("")
+
+        CliBuilder cli = new CliBuilder(name: tag, width: 100)
+
+        def options = cli.parseFromSpec(pseudonymizeArgs, args)
+
+        if (options.help() || !options.files() || options.files()[0].startsWith('-')) {
+            cli.usage()
+            System.exit(-1)
+        }
+
+        pseudonymizeFiles(options.files(), options.keys(), options.destPath(), log)
+    }
+
+    static void pseudonymizeFiles(files, keys, destPath, Logger log) {
+        log.info("Pseudonymizing ${keys.size()} columns")
+        def keyMaps = []
+
+        keys.each { fname -> keyMaps << readKey(fname, log) }
+
+        GParsPool.withPool {
+            files.eachParallel(fname -> {
+                def outName = destPath + "/" + new File(fname).getName()
+                def outFile = new File(outName)
+                if (outFile.exists()) {
+                    log.warning("${outName} already exists - skipping")
+                } else {
+                    log.info("Pseudonymizing ${fname} to ${outName}")
+                    def output_stream = compressorOutputStream(outName)
+                    def input = new CompressorStreamFactory().createCompressorInputStream(new BufferedInputStream(new FileInputStream(fname)))
+
+                    input.splitEachLine("\t") { fields ->
+                        def output_string = ""
+                        for (int i in 0..(fields.size() - 2)) {
+                            if (i % 3 < keyMaps.size()) {
+                                def val = keyMaps[i % 3][fields[i]]
+                                if (val == null)
+                                    log.severe("`${fields[i]}' not found in dictionary")
+                                output_stream.print("${val}\t")
+                            } else {
+                                output_stream.print("${fields[i]}\t")
+                            }
+                        }
+                        output_stream.println(output_string + fields[fields.size() - 1])
+                    }
+                    log.info("Done pseudonymizing ${fname} to ${outName}")
+                    output_stream.close()
+                }
+            })
+        }
+    }
+}
diff --git a/src/test/java/org/ids_mannheim/PseudonymizeTest.java b/src/test/java/org/ids_mannheim/PseudonymizeTest.java
new file mode 100644
index 0000000..b7aaef1
--- /dev/null
+++ b/src/test/java/org/ids_mannheim/PseudonymizeTest.java
@@ -0,0 +1,67 @@
+package org.ids_mannheim;
+
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+import java.nio.file.Path;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.logging.Logger;
+
+public class PseudonymizeTest {
+    private static final ByteArrayOutputStream outContent = new ByteArrayOutputStream();
+    private static final ByteArrayOutputStream errContent = new ByteArrayOutputStream();
+    private static final PrintStream originalOut = System.out;
+    private static final PrintStream originalErr = System.err;
+
+    @BeforeAll
+    public static void setUpStreams() {
+        System.setOut(new PrintStream(outContent));
+        System.setErr(new PrintStream(errContent));
+    }
+
+    @AfterAll
+    public static void restoreStreams() {
+        System.setOut(originalOut);
+        System.setErr(originalErr);
+    }
+
+    @BeforeAll
+    public static void disableGrapeGrab() {
+        System.setProperty("groovy.grape.enable", "false");
+    }
+
+    @Test
+    void generateKeyTest() {
+        GeneratePseudonymKey.main(new String[]{"src/test/resources/token_lemma_pos_freqs.tsv.xz"});
+        assertTrue(outContent.toString().contains("\ndurchaus\t934\n"));
+        assertTrue(outContent.toString().contains("\nlängst\t936\n"));
+        assertFalse(outContent.toString().contains("\t937\n"));
+    }
+
+    @Test
+    void readKeyTest() {
+        ConcurrentHashMap map = (ConcurrentHashMap) Pseudonymize.readKey("src/test/resources/token.keys.gz", Logger.getLogger(""));
+        assertEquals(936, map.get("längst"));
+    }
+
+    @TempDir
+    static Path sharedTempDir;
+
+    @Test
+    void pseudonymizeFilesTest() {
+        Logger log = Logger.getLogger("");
+        String fname = sharedTempDir + File.separator + "token_lemma_pos_freqs.tsv.xz";
+        new File(fname).delete();
+        Pseudonymize.pseudonymizeFiles(new String[]{"src/test/resources/token_lemma_pos_freqs.tsv.xz"},
+                new String[]{"src/test/resources/token.keys.gz", "src/test/resources/lemma.keys.gz"}, sharedTempDir.toAbsolutePath().toString(), log);
+        ConcurrentHashMap pseudonymized = (ConcurrentHashMap) Pseudonymize.readKey(fname, log);
+        assertEquals(223997, pseudonymized.get("936"));
+    }
+}
\ No newline at end of file
diff --git a/src/test/resources/lemma.keys.gz b/src/test/resources/lemma.keys.gz
new file mode 100644
index 0000000..3b366b4
--- /dev/null
+++ b/src/test/resources/lemma.keys.gz
Binary files differ
diff --git a/src/test/resources/token.keys.gz b/src/test/resources/token.keys.gz
new file mode 100644
index 0000000..66c0ea5
--- /dev/null
+++ b/src/test/resources/token.keys.gz
Binary files differ
diff --git a/src/test/resources/token_lemma_pos_freqs.tsv.xz b/src/test/resources/token_lemma_pos_freqs.tsv.xz
new file mode 100644
index 0000000..52ec718
--- /dev/null
+++ b/src/test/resources/token_lemma_pos_freqs.tsv.xz
Binary files differ