commit | 65c0f218e6c0ced24eaa646c71427278321e0a88 | [log] [tgz] |
---|---|---|
author | Akron <nils@diewald-online.de> | Tue Nov 09 20:46:46 2021 +0100 |
committer | Akron <nils@diewald-online.de> | Tue Nov 09 20:46:46 2021 +0100 |
tree | 63b57bf12a533685b61ba9b02c73be159c2d0488 | |
parent | c840636baba15d6d64e0a62320f6f511582de9b7 [diff] |
Simplify tokenizer whitespace handling Change-Id: If9c4d7c0c7fdc7d3a088cfe48d87d37579a205d8
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst index 9a3c2f4..4a16ec0 100644 --- a/src/tokenizer.xfst +++ b/src/tokenizer.xfst
@@ -226,8 +226,7 @@ Emoji @-> ... NLout, [Streetname|Omission|Emdash] @-> ... NLout ] -.o. [[WS|NL]+ @-> 0 || NLout _ ] -.o. [[WS|NL]+ @-> 0 || .#. _ ] +.o. [[WS|NL]+ @-> 0 || [ .#. | NLout ] _ ] ; echo - Introduce Sentence splitter