Improve handling of sentence splits including speech

Change-Id: Id699624f17e3e983b0900d48e95c1d7e782e2215
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 8026c01..3d63e5b 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -44,15 +44,11 @@
            [%, %,]];
 
 ! Right punctuation - excluding the characters that can be used as apostrophe
-define RP [SP|","|";"|":"|
+define RPS ["”"|"›"|"»"|%"|[%’ %’]|["'" "'"]|[%‘ %‘]];
+define RP [SP|RPS|","|";"|":"|
               ")"|"]"|"}"|
-              "”"|"›"|"»"|
-              %"|
               ! differs
-              ["'" "'"]|
-              "*"|"/"|"_"| ! Can be Markdown
-              ! from book
-              [%‘ %‘]|[%’ %’]];
+              "*"|"/"|"_"]; ! Can be Markdown
 
 define Sym ["-"|"+"|"<"|">"|"*"|"/"|%=|%@|%&];
 define Apos %'|%’|%`;
@@ -221,9 +217,16 @@
   File @-> ... NLout,
   Domain @-> ... NLout,
   Emoji @-> ... NLout
-] .o. [[WS|NL]+ @-> 0 || [ .#. | NLout ] _ ];
+];
 
 echo - Introduce Sentence splitter
-read regex Token .o. [[["."|"!"|"?"]+|"…"] @-> ... NLout \/ NLout _ ];
+! And compose Whitespace ignorance
+read regex Token .o. [
+  SP NLout %" @-> ... NLout \/ _ NLout \%,
+] .o. [
+  SP @-> ... NLout \/ NLout _ NLout \%"
+] .o. [
+  [WS|NL]+ @-> 0 || [ .#. | NLout ] _
+];
 
 ! foma -e "source tokenizer.xfst" -q -s && cat text.txt | flookup tokenizer.fst -x -b
\ No newline at end of file