commit | b02ad07215f8811b2a4c72b1725e0ebdd01ba03d | [log] [tgz] |
---|---|---|
author | Akron <nils@diewald-online.de> | Wed Jan 19 12:41:44 2022 +0100 |
committer | Akron <nils@diewald-online.de> | Wed Jan 19 12:41:44 2022 +0100 |
tree | 0fe65a5f06467569e145acaf8a86b266a11bafa0 | |
parent | 9a594717047a2ca3d9bc16ce92916cf9fe5d4d77 [diff] [blame] |
Improve handling of apostrophes Change-Id: Id442f4e958720f970baef63aee9b8710c258e13b
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst index fb97c5c..8026c01 100644 --- a/src/tokenizer.xfst +++ b/src/tokenizer.xfst
@@ -69,7 +69,7 @@ define Char \[WS|NL|Punct|Apos]; ! |¨; -define Word Char+ ([Apos|Asterisk] Char+)*; +define Word Char+ ([Apos|Asterisk] Char+)* ([s|S] [%’|%`]); define Plusampersand @txt"txt/plusampersand.txt"; define Word [Plusampersand | Word] (Dash [Plusampersand | Word])*;