Improve handling of ellipsis

Change-Id: I758e096678091f52fd3bc00b2a5f6ad1358881cc
diff --git a/Changes b/Changes
index 396c19f..46660ec 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.1.4 2022-03-11
+    - Improved handling of ellipsis.
+
 0.1.3 2022-03-08
     - Introduced refined handling of sentences including speech.
 
diff --git a/matrix_test.go b/matrix_test.go
index c017af5..3b64d5c 100644
--- a/matrix_test.go
+++ b/matrix_test.go
@@ -331,6 +331,16 @@
 	assert.Equal(len(sentences), 8)
 	assert.Equal("Neulich\nerst\nhat\nmir\nder\nkleine\nVentivegni\nvon\ndrüben\ngesagt\n:\n'\nFräulein\nEffi\n,\nwas\ngilt\ndie\nWette\n,\nwir\nsind\nhier\nnoch\nin\ndiesem\nJahre\nzu\nPolterabend\nund\nHochzeit\n.\n'\n«", sentences[5])
 	assert.Equal("»\nUnd\nwas\nsagtest\ndu\nda\n?\n«", sentences[6])
+
+	text = `»Nun, gib dich zufrieden, ich fange schon an ... Also Baron
+Innstetten!`
+
+	w.Reset()
+	assert.True(mat.Transduce(strings.NewReader(text), w))
+	sentences = strings.Split(w.String(), "\n\n")
+	assert.Equal(len(sentences), 3)
+	assert.Equal("»\nNun\n,\ngib\ndich\nzufrieden\n,\nich\nfange\nschon\nan\n...", sentences[0])
+	assert.Equal("Also\nBaron\nInnstetten\n!", sentences[1])
 }
 
 func TestMatrixFullTokenizerTokenSplitter(t *testing.T) {
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 8a93c21..cf183b7 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -209,7 +209,7 @@
 echo - Introduce Token splitter
 
 define Token [
-  [%. %. %. | RealToken] @-> ... NLout,
+  RealToken @-> ... NLout,
   XML @-> ... NLout,
   URL @-> ... NLout,
   Email @-> ... NLout,
@@ -222,12 +222,15 @@
 ! And compose Whitespace ignorance
 
 define DQuotes ["”"|%"|"»"|"«"];
+define NotSmallCaps [? - a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - z - ü - ö - ä];
 
 read regex Token .o. [
   SP NLout [DQuotes | "›" (NLout DQuotes)| %‹ (NLout DQuotes)| %’ (NLout DQuotes)| "'" (NLout DQuotes)] @-> ... NLout \/ _ NLout \%,
 ] .o. [
   SP @-> ... NLout \/ NLout _ NLout [? - "”" - %" - "»" - "«" - "›" - %‹ - %’ - "'" - NLout]
 ] .o. [
+  [%. %. %.] @-> ... NLout \/ _ NLout WS+ NotSmallCaps
+] .o. [
   [WS|NL]+ @-> 0 || [ .#. | NLout ] _
 ];
 
diff --git a/testdata/tokenizer.datok b/testdata/tokenizer.datok
index 18a99b4..df75e9b 100644
--- a/testdata/tokenizer.datok
+++ b/testdata/tokenizer.datok
Binary files differ
diff --git a/testdata/tokenizer.fst b/testdata/tokenizer.fst
index 73cbda5..5701081 100644
--- a/testdata/tokenizer.fst
+++ b/testdata/tokenizer.fst
Binary files differ
diff --git a/testdata/tokenizer.matok b/testdata/tokenizer.matok
index fe2b6f1..75ce996 100644
--- a/testdata/tokenizer.matok
+++ b/testdata/tokenizer.matok
Binary files differ