Fix Genderstern and ommission word token breaks after hyphens

Resolves #115

Change-Id: Iacf5667b508050a6dfd09ca9938f449d05582a95
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a41661..ba70c7b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+
+## 2.2.6 [unreleased]
+
+* Fixed genderstern and omission asterisk breaking after hyphens (issue #115)
+
 ## 2.2.5
 
 * adds more ossrh sync data to maven pom
diff --git a/Readme.md b/Readme.md
index 8d419cc..ada404c 100644
--- a/Readme.md
+++ b/Readme.md
@@ -26,7 +26,7 @@
 
 ## Installation
 ```shell script
-mvn clean install
+mvn clean package
 ```
 #### Note
 Because of the large table of abbreviations, the conversion from the jflex source to java,
diff --git a/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex b/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex
index 5579b30..d22a7dc 100644
--- a/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex
+++ b/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex
@@ -635,7 +635,7 @@
 
 // normal stuff
 // dashed words
-{WORD}({DASH}{NEWLINE}*{WORD})+                                { return currentToken();}
+{WORD}({DASH}{NEWLINE}*({WORD}|{OMISSIONWORD}))+                 { return currentToken();}
 {WORD}{DASH}                                                   { return currentToken();}
 {TWITTER_HANDLE}                                               { return currentToken(); }
 {TWITTER_HASHTAG}                                              { return currentToken(); }
diff --git a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
index ab11412..f1eda03 100644
--- a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
+++ b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
@@ -333,6 +333,19 @@
         assertEquals(tokens.length, 4);
     }
 
+    // Regression test for https://github.com/KorAP/KorAP-Tokenizer/issues/115
+    @Test
+    public void testTokenizerGendersternAfterHyphen () {
+        DerekoDfaTokenizer_de tok = new DerekoDfaTokenizer_de();
+        String[] tokens = tok.tokenize("Die Serb*innen wie die Kosovo-Albaner*innen");
+        assertEquals("Die", tokens[0]);
+        assertEquals("Serb*innen", tokens[1]);
+        assertEquals("wie", tokens[2]);
+        assertEquals("die", tokens[3]);
+        assertEquals("Kosovo-Albaner*innen", tokens[4]);
+        assertEquals(5, tokens.length);
+    }
+
     @Test
     // Probably interpreted as HOST
     public void testTokenizerFileExtension1 () {