Split tokenizer rules into language-specific and language-dependent

Change-Id: I6e5eeabe9a306ce9fc3e62340ba7b948ecc4fa0f
diff --git a/src/all/allsentencesplit.xfst b/src/all/allsentencesplit.xfst
new file mode 100644
index 0000000..bef8cbd
--- /dev/null
+++ b/src/all/allsentencesplit.xfst
@@ -0,0 +1,19 @@
+echo - Introduce Sentence splitter
+! And compose Whitespace ignorance
+
+read regex Token .o. [
+  ! Put a Token boundary behind the longest possible
+  ! sentence ending punctuation sequence,
+  ! that isn't followed by a comma
+  SentenceEnd @-> ... NLout \/ _ NLout \%,
+] .o. [
+  ! Put a Token boundary behind a punctuation                      
+  ! that is not a start of a punctuation sequence
+  SP @-> ... NLout \/ NLout _ NLout NotSentenceExtension
+] .o. [
+  ! Put a Token boundary behind ... if not followed by a small character
+  [%. %. %.] @-> ... NLout \/ _ NLout WS+ NotSmallCaps
+] .o. [
+  ! Remove whitespace between Tokens
+  [WS|NL]+ @-> 0 || [ .#. | NLout ] _
+];