Introduce hyphenated abreviations in german tokenizer
Change-Id: I12e01e4931b486d85202107ab82077aa16ee4aad
diff --git a/src/de/tokenizer.xfst b/src/de/tokenizer.xfst
index 9670f9a..ce4bec3 100644
--- a/src/de/tokenizer.xfst
+++ b/src/de/tokenizer.xfst
@@ -56,13 +56,15 @@
! of the IDS.
define Abbr [ @txt"de/abbrv.txt" | Letter ] %.;
+define HypAbbr [ Abbr ( %- Abbr )+ | {Ba.-Wü.}];
+
define Streetname Word {str} %.;
source all/allpost.xfst
echo - Compile Real Token
-define RealToken [Punct|Emdash|Abbr|Streetname|Word|SNS|AcronymDep|Ord|Num|Years|Times|XMLEntities|Omission];
+define RealToken [Punct|Emdash|HypAbbr|Streetname|Word|SNS|AcronymDep|Ord|Num|Years|Times|XMLEntities|Omission];
echo - Introduce Token splitter