Reorder longest match operator and update models
Change-Id: I0e7b13233b6237e7a1d99c07e2ea4e43a121ec04
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 4a16ec0..9b9f663 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -209,13 +209,12 @@
echo - Compile Real Token
-define RealToken [Punct|Word|SNS|AcronymDep|Ord|Num|Years|Times];
+define RealToken [Punct|Emdash|Word|SNS|AcronymDep|Ord|Num|Years|Times|XMLEntities|Omission];
echo - Introduce Token splitter
define Token [
- XMLEntities @-> ... NLout,
- Abbr @-> ... NLout,
+ [Abbr|Streetname] @-> ... NLout,
RealToken @-> ... NLout,
XML @-> ... NLout,
URL @-> ... NLout,
@@ -223,11 +222,8 @@
File @-> ... NLout,
Plusampersand @-> ... NLout,
Domain @-> ... NLout,
- Emoji @-> ... NLout,
- [Streetname|Omission|Emdash] @-> ... NLout
- ]
-.o. [[WS|NL]+ @-> 0 || [ .#. | NLout ] _ ]
-;
+ Emoji @-> ... NLout
+] .o. [[WS|NL]+ @-> 0 || [ .#. | NLout ] _ ];
echo - Introduce Sentence splitter
read regex Token .o. [[["."|"!"|"?"]+|"…"] @-> ... NLout \/ NLout _ ];