Add ver.di (Germn trade union) to abbreviations
Thanks @notesjor
Change-Id: Iede4c5715fd1ac698533aef98b8ff627e0ba0b05
diff --git a/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex b/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex
index 342c668..5582821 100644
--- a/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex
+++ b/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex
@@ -783,6 +783,7 @@
{PLUSAMPERSAND} {return currentToken();}
{SEABBR}\. {return currentToken();}
{SEABBRHYPH}\. {return currentToken();}
+ver\.di {return currentToken();}
{PRAGMA} {return currentToken();}
{FNAME} {return currentToken();}
diff --git a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
index 779bfdd..e50e2e1 100644
--- a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
+++ b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
@@ -1179,5 +1179,22 @@
assertEquals("Kosovo-Albaner", tokens[0]);
assertEquals(1, tokens.length);
}
+
+ // Regression test for ver.di (German trade union)
+ @Test
+ public void testVerdiAbbreviation() {
+ DerekoDfaTokenizer_de tok = new DerekoDfaTokenizer_de();
+
+ String[] tokens = tok.tokenize("Die Gewerkschaft ver.di fordert mehr Lohn.");
+ assertEquals("Die", tokens[0]);
+ assertEquals("Gewerkschaft", tokens[1]);
+ assertEquals("ver.di", tokens[2]);
+ assertEquals("fordert", tokens[3]);
+ assertEquals("mehr", tokens[4]);
+ assertEquals("Lohn", tokens[5]);
+ assertEquals(".", tokens[6]);
+ assertEquals(7, tokens.length);
+ }
}
+