Introduce the english model as being on the same level as german

Change-Id: Ib63095af7a93c158fde7bca8271ea6129347de90
diff --git a/Makefile b/Makefile
index 05981c6..b6fb810 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@
 buildfoma_de:
 	cd src && \
 	foma -e "source de/tokenizer.xfst" \
-	-e "save stack ../testdata/tokenizer.fst" -q -s && \
+	-e "save stack ../testdata/tokenizer_de.fst" -q -s && \
 	cd ..
 
 buildfoma_en:
@@ -17,13 +17,13 @@
 	cd ..
 
 buildmatok_de: buildfoma_de build
-	./bin/datok convert -i ./testdata/tokenizer.fst -o ./testdata/tokenizer.matok
+	./bin/datok convert -i ./testdata/tokenizer_de.fst -o ./testdata/tokenizer_de.matok
 
 buildmatok_en: buildfoma_en build
 	./bin/datok convert -i ./testdata/tokenizer_en.fst -o ./testdata/tokenizer_en.matok
 
-builddatok: buildfoma_de build
-	./bin/datok convert -i ./testdata/tokenizer.fst -o ./testdata/tokenizer.datok -d
+builddatok_de: buildfoma_de build
+	./bin/datok convert -i ./testdata/tokenizer_de.fst -o ./testdata/tokenizer_de.datok -d
 
 builddatok_en: buildfoma_en build
 	./bin/datok convert -i ./testdata/tokenizer_en.fst -o ./testdata/tokenizer_en.datok -d
diff --git a/Readme.md b/Readme.md
index 3f9683b..c7b53cd 100644
--- a/Readme.md
+++ b/Readme.md
@@ -5,9 +5,14 @@
 ![Introduction to Datok](https://raw.githubusercontent.com/KorAP/Datok/master/misc/introducing-datok.gif)
 
 Implementation of a finite state automaton for
-fast natural language tokenization, based on a finite state
+high-performance natural language tokenization, based on a finite state
 transducer generated with [Foma](https://fomafst.github.io/).
 
+The library contains precompiled tokenizer models for
+
+- [german](testdata/tokenizer_de.matok)
+- [english](testdata/tokenizer_en.matok)
+
 ## Tokenization
 
 ```
@@ -30,8 +35,7 @@
 The special `END OF TRANSMISSION` character (`\x04`) can be used to mark the end of a text.
 
 > *Caution*: When experimenting with STDIN and echo,
-> you may need to disable history expansion.
-
+> you may need to disable [history expansion](https://www.gnu.org/software/bash/manual/html_node/History-Interaction.html).
 
 ## Conversion
 
@@ -49,11 +53,13 @@
 
 ## Conventions
 
-The FST generated by Foma must adhere to the following rules,
-to be converted by Datok:
+The FST generated by [Foma](https://fomafst.github.io/) must adhere to
+the following rules, to be convertible by Datok:
 
 - Character accepting arcs need to be translated
   *only* to themselves or to ε (the empty symbol).
+  I.e. they will either be unchanged part of the
+  output or ignored (e.g. whitespace characters).
 - Multi-character symbols are not allowed,
   except for the `@_TOKEN_BOUND_@`,
   that denotes the end of a token.
@@ -90,8 +96,8 @@
 read regex Tokenizer;
 ```
 
-> *Hint*: For development it's easier to replace `@_TOKEN_BOUND_@`
-with a newline.
+> *Hint*: For development in Foma it's easier to replace
+> `@_TOKEN_BOUND_@` with a newline symbol.
 
 ## Building
 
@@ -133,7 +139,7 @@
 The final datok file can then be used as a model for the tokenizer.
 
 * This may take quite some time depending on the number
-of arcs in the FST and is therefore now deprecated.
+of arcs in the FST and is therefore not recommended in most cases.
 
 
 ## Technology
@@ -180,7 +186,7 @@
 
 Datok is published under the [Apache 2.0 License](LICENSE).
 
-The german tokenizer shipped is based on work done by the
+The german and english tokenizer shipped is based on work done by the
 [Lucene project](https://github.com/apache/lucene-solr)
 (published under the Apache License),
 [David Hall](https://github.com/dlwh/epic)
diff --git a/datok_test.go b/datok_test.go
index 8562a98..8130690 100644
--- a/datok_test.go
+++ b/datok_test.go
@@ -210,7 +210,7 @@
 	assert := assert.New(t)
 
 	if dat == nil {
-		dat = LoadDatokFile("testdata/tokenizer.datok")
+		dat = LoadDatokFile("testdata/tokenizer_de.datok")
 	}
 	assert.NotNil(dat)
 	assert.True(dat.LoadFactor() >= 60)
@@ -239,10 +239,10 @@
 
 func XTestDoubleArrayFullTokenizerBuild(t *testing.T) {
 	assert := assert.New(t)
-	tok := LoadFomaFile("testdata/tokenizer.fst")
+	tok := LoadFomaFile("testdata/tokenizer_de.fst")
 	dat := tok.ToDoubleArray()
 	assert.NotNil(dat)
-	// n, err := dat.Save("testdata/tokenizer.datok")
+	// n, err := dat.Save("testdata/tokenizer_de.datok")
 	// assert.Nil(err)
 	// assert.True(n > 500)
 }
@@ -251,7 +251,7 @@
 	assert := assert.New(t)
 
 	if dat == nil {
-		dat = LoadDatokFile("testdata/tokenizer.datok")
+		dat = LoadDatokFile("testdata/tokenizer_de.datok")
 	}
 
 	assert.NotNil(dat)
@@ -284,7 +284,7 @@
 	assert := assert.New(t)
 
 	if dat == nil {
-		dat = LoadDatokFile("testdata/tokenizer.datok")
+		dat = LoadDatokFile("testdata/tokenizer_de.datok")
 	}
 
 	assert.NotNil(dat)
@@ -378,7 +378,7 @@
 	assert := assert.New(t)
 
 	if dat == nil {
-		dat = LoadDatokFile("testdata/tokenizer.datok")
+		dat = LoadDatokFile("testdata/tokenizer_de.datok")
 	}
 
 	assert.NotNil(dat)
@@ -920,7 +920,7 @@
 	assert := assert.New(t)
 
 	if dat == nil {
-		dat = LoadDatokFile("testdata/tokenizer.datok")
+		dat = LoadDatokFile("testdata/tokenizer_de.datok")
 	}
 
 	b := make([]byte, 0, 2048)
@@ -951,7 +951,7 @@
 	assert := assert.New(t)
 
 	if dat == nil {
-		dat = LoadDatokFile("testdata/tokenizer.datok")
+		dat = LoadDatokFile("testdata/tokenizer_de.datok")
 	}
 
 	assert.NotNil(dat)
@@ -1005,7 +1005,7 @@
 	Mach's macht's was'n ist's haste willste kannste biste kriegste.`
 	r := strings.NewReader(s)
 
-	dat := LoadDatokFile("testdata/tokenizer.datok")
+	dat := LoadDatokFile("testdata/tokenizer_de.datok")
 
 	b.ResetTimer()
 
@@ -1024,7 +1024,7 @@
 // This test is deprecated as the datok file changes over time
 func XBenchmarkLoadDatokFile(b *testing.B) {
 	for i := 0; i < b.N; i++ {
-		dat := LoadDatokFile("testdata/tokenizer.datok")
+		dat := LoadDatokFile("testdata/tokenizer_de.datok")
 		if dat == nil {
 			fmt.Println("Fail!")
 			os.Exit(1)
diff --git a/matrix_test.go b/matrix_test.go
index 9d2c674..e58005f 100644
--- a/matrix_test.go
+++ b/matrix_test.go
@@ -207,7 +207,7 @@
 
 func xTestMatrixReadWriteFullTokenizer(t *testing.T) {
 	assert := assert.New(t)
-	foma := LoadFomaFile("testdata/tokenizer.fst")
+	foma := LoadFomaFile("testdata/tokenizer_de.fst")
 	assert.NotNil(foma)
 
 	mat := foma.ToMatrix()
@@ -244,7 +244,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	assert.NotNil(mat_de)
@@ -276,7 +276,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	b := make([]byte, 0, 2048)
@@ -443,7 +443,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	b := make([]byte, 0, 2048)
@@ -467,7 +467,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	b := make([]byte, 0, 2048)
@@ -1149,7 +1149,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	assert.NotNil(mat_de)
@@ -1183,7 +1183,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	assert.NotNil(mat_de)
@@ -1249,9 +1249,9 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
-	dat := LoadDatokFile("testdata/tokenizer.datok")
+	dat := LoadDatokFile("testdata/tokenizer_de.datok")
 
 	r := strings.NewReader(s)
 
@@ -1278,7 +1278,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	assert.NotNil(mat_de)
@@ -1297,7 +1297,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	assert.NotNil(mat_de)
@@ -1314,7 +1314,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	assert.NotNil(mat_de)
@@ -1362,7 +1362,7 @@
 	assert := assert.New(t)
 
 	if mat_de == nil {
-		mat_de = LoadMatrixFile("testdata/tokenizer.matok")
+		mat_de = LoadMatrixFile("testdata/tokenizer_de.matok")
 	}
 
 	assert.NotNil(mat_de)
@@ -1381,7 +1381,7 @@
 
 	r := strings.NewReader(s)
 
-	mat := LoadMatrixFile("testdata/tokenizer.matok")
+	mat := LoadMatrixFile("testdata/tokenizer_de.matok")
 
 	b.ResetTimer()
 
diff --git a/testdata/tokenizer.datok b/testdata/tokenizer_de.datok
similarity index 100%
rename from testdata/tokenizer.datok
rename to testdata/tokenizer_de.datok
Binary files differ
diff --git a/testdata/tokenizer.fst b/testdata/tokenizer_de.fst
similarity index 100%
rename from testdata/tokenizer.fst
rename to testdata/tokenizer_de.fst
Binary files differ
diff --git a/testdata/tokenizer.matok b/testdata/tokenizer_de.matok
similarity index 100%
rename from testdata/tokenizer.matok
rename to testdata/tokenizer_de.matok
Binary files differ
diff --git a/token_writer_test.go b/token_writer_test.go
index a27ae1c..868e69d 100644
--- a/token_writer_test.go
+++ b/token_writer_test.go
@@ -34,7 +34,7 @@
 func TestTokenWriterFromOptions(t *testing.T) {
 	assert := assert.New(t)
 
-	mat := LoadMatrixFile("testdata/tokenizer.matok")
+	mat := LoadMatrixFile("testdata/tokenizer_de.matok")
 	assert.NotNil(mat)
 
 	b := make([]byte, 0, 2048)