Reorder longest match operator and update models

Change-Id: I0e7b13233b6237e7a1d99c07e2ea4e43a121ec04
diff --git a/datok_test.go b/datok_test.go
index 66d052a..1beb9b7 100644
--- a/datok_test.go
+++ b/datok_test.go
@@ -179,10 +179,10 @@
 	assert.Equal(dat.epsilon, 1)
 	assert.Equal(dat.unknown, 2)
 	assert.Equal(dat.identity, 3)
-	assert.Equal(dat.final, 146)
-	assert.Equal(len(dat.sigma), 141)
-	assert.True(len(dat.array) > 3600000)
-	assert.True(dat.maxSize > 3600000)
+	assert.Equal(dat.final, 142)
+	assert.Equal(len(dat.sigma), 137)
+	// assert.True(len(dat.array) > 3000000)
+	// assert.True(dat.maxSize > 3000000)
 	assert.True(tmatch(dat, "bau"))
 	assert.True(tmatch(dat, "bad"))
 	assert.True(tmatch(dat, "wald gehen"))
@@ -1077,3 +1077,8 @@
 //   BenchmarkDoubleArrayConstruction-4         72446             15614 ns/op           10703 B/op         29 allocs/op
 //   BenchmarkDoubleArrayLarger-4                  16          71058822 ns/op         6357860 B/op       2577 allocs/op
 //   BenchmarkMatrixTransduce-4                 36703             31891 ns/op           28944 B/op         17 allocs/op
+// 2021-11-10 - rearranged longest match operator
+//   BenchmarkDoubleArrayTransduce-4    	   34522	     33210 ns/op	   28944 B/op	      17 allocs/op
+//   BenchmarkDoubleArrayConstruction-4   	   66990	     16012 ns/op	   10703 B/op	      29 allocs/op
+//   BenchmarkDoubleArrayLarger-4         	      16	  62829878 ns/op	 6357823 B/op	    2576 allocs/op
+//   BenchmarkMatrixTransduce-4           	   36154	     32702 ns/op	   28944 B/op	      17 allocs/op
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 4a16ec0..9b9f663 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -209,13 +209,12 @@
 
 echo - Compile Real Token
 
-define RealToken [Punct|Word|SNS|AcronymDep|Ord|Num|Years|Times];
+define RealToken [Punct|Emdash|Word|SNS|AcronymDep|Ord|Num|Years|Times|XMLEntities|Omission];
 
 echo - Introduce Token splitter
 
 define Token [
-  XMLEntities @-> ... NLout,
-  Abbr @-> ... NLout,
+  [Abbr|Streetname] @-> ... NLout,
   RealToken @-> ... NLout,
   XML @-> ... NLout,
   URL @-> ... NLout,
@@ -223,11 +222,8 @@
   File @-> ... NLout,
   Plusampersand @-> ... NLout,
   Domain @-> ... NLout,
-  Emoji @-> ... NLout,
-  [Streetname|Omission|Emdash] @-> ... NLout
-  ]
-.o. [[WS|NL]+ @-> 0 || [ .#. | NLout ] _ ]
-;
+  Emoji @-> ... NLout
+] .o. [[WS|NL]+ @-> 0 || [ .#. | NLout ] _ ];
 
 echo - Introduce Sentence splitter
 read regex Token .o. [[["."|"!"|"?"]+|"…"] @-> ... NLout \/ NLout _ ];
diff --git a/testdata/tokenizer.datok b/testdata/tokenizer.datok
index f7bc9cb..026b234 100644
--- a/testdata/tokenizer.datok
+++ b/testdata/tokenizer.datok
Binary files differ
diff --git a/testdata/tokenizer.fst b/testdata/tokenizer.fst
index 76232e6..66009db 100644
--- a/testdata/tokenizer.fst
+++ b/testdata/tokenizer.fst
Binary files differ
diff --git a/testdata/tokenizer.matok b/testdata/tokenizer.matok
index d276902..dfc2653 100644
--- a/testdata/tokenizer.matok
+++ b/testdata/tokenizer.matok
Binary files differ