Further improve speech rule for eos with more quotation marks Change-Id: Ife5bc78b6e0beafe3a52c4cecb760bff2854cbaa

commit: e20084120fbbca6116063fdae5cd5b61a201f3b7 [log] [tgz]
author: Akron <nils@diewald-online.de> Wed Mar 09 10:10:13 2022 +0100
committer: Akron <nils@diewald-online.de> Wed Mar 09 10:10:13 2022 +0100
tree: 4a816f5d2a84f4e7eae88476aabe11d294865179
parent: e96895fdc402ab3591949da8a26dfc7b3c11bdc6 [diff]
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 3d63e5b..4193c28 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst

@@ -44,10 +44,9 @@
            [%, %,]];
 
 ! Right punctuation - excluding the characters that can be used as apostrophe
-define RPS ["”"|"›"|"»"|%"|[%’ %’]|["'" "'"]|[%‘ %‘]];
-define RP [SP|RPS|","|";"|":"|
+define RP [SP|","|";"|":"|
               ")"|"]"|"}"|
-              ! differs
+              "”"|"›"|"»"|%"|[%’ %’]|["'" "'"]|[%‘ %‘]|
               "*"|"/"|"_"]; ! Can be Markdown
 
 define Sym ["-"|"+"|"<"|">"|"*"|"/"|%=|%@|%&];
@@ -222,9 +221,9 @@
 echo - Introduce Sentence splitter
 ! And compose Whitespace ignorance
 read regex Token .o. [
-  SP NLout %" @-> ... NLout \/ _ NLout \%,
+  SP NLout ["”"|"›"|"»"|%"|%’|"'"] @-> ... NLout \/ _ NLout \%,
 ] .o. [
-  SP @-> ... NLout \/ NLout _ NLout \%"
+  SP @-> ... NLout \/ NLout _ NLout [? - "”" - "›" - "»" - %" - %’ - "'"]
 ] .o. [
   [WS|NL]+ @-> 0 || [ .#. | NLout ] _
 ];

diff --git a/testdata/tokenizer.datok b/testdata/tokenizer.datok
index 81023a6..18a99b4 100644
--- a/testdata/tokenizer.datok
+++ b/testdata/tokenizer.datok
Binary files differ

diff --git a/testdata/tokenizer.fst b/testdata/tokenizer.fst
index d1632b8..73cbda5 100644
--- a/testdata/tokenizer.fst
+++ b/testdata/tokenizer.fst
Binary files differ

diff --git a/testdata/tokenizer.matok b/testdata/tokenizer.matok
index f332244..fe2b6f1 100644
--- a/testdata/tokenizer.matok
+++ b/testdata/tokenizer.matok
Binary files differ
commit	e20084120fbbca6116063fdae5cd5b61a201f3b7	[log] [tgz]
author	Akron <nils@diewald-online.de>	Wed Mar 09 10:10:13 2022 +0100
committer	Akron <nils@diewald-online.de>	Wed Mar 09 10:10:13 2022 +0100
tree	4a816f5d2a84f4e7eae88476aabe11d294865179
parent	e96895fdc402ab3591949da8a26dfc7b3c11bdc6 [diff]