Support German gender-sensitive DET, ADJ, PRON ending (from KorAP-Tokenizer)

Change-Id: I8f20ecb913c0fe514b5936ab43287ca616695f16
diff --git a/datok_test.go b/datok_test.go
index e18ba0a..aac4b86 100644
--- a/datok_test.go
+++ b/datok_test.go
@@ -1,6 +1,7 @@
 package datok
 
 import (
+	"bufio"
 	"bytes"
 	"fmt"
 	"os"
@@ -31,6 +32,28 @@
 	return tokens[:len(tokens)-1]
 }
 
+func ttokenLines(t *testing.T, path string) []string {
+	f, err := os.Open(path)
+	if err != nil {
+		t.Fatalf("failed to open %s: %v", path, err)
+	}
+	defer f.Close()
+
+	lines := []string{}
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		lines = append(lines, line)
+	}
+	if err := scanner.Err(); err != nil {
+		t.Fatalf("failed to read %s: %v", path, err)
+	}
+	return lines
+}
+
 func TestDoubleArraySimpleString(t *testing.T) {
 	assert := assert.New(t)
 	// bau | bauamt
@@ -1157,6 +1180,43 @@
 	assert.Equal("Das\nmit\ndem\n'\nGottesgericht\n'\n,\nwie\nmanche\nhochtrabend\nversichern\n,\nist\nfreilich\nein\nUnsinn\n,\nnichts\ndavon\n,\numgekehrt\n,\nunser\nEhrenkultus\nist\nein\nGötzendienst\n,\naber\nwir\nmüssen\nuns\nihm\nunterwerfen\n,\nsolange\nder\nGötze\ngilt\n.\n«", sentences[4])
 }
 
+func TestDoubleArrayFullTokenizerGenderDontSplitFromFile(t *testing.T) {
+	assert := assert.New(t)
+
+	if dat == nil {
+		dat = LoadDatokFile("testdata/tokenizer_de.datok")
+	}
+	assert.NotNil(dat)
+
+	b := make([]byte, 0, 2048)
+	w := bytes.NewBuffer(b)
+
+	for _, token := range ttokenLines(t, "testdata/de/dontsplit.txt") {
+		tokens := ttokenize(dat, w, token)
+		assert.Equalf(1, len(tokens), "should not split %q", token)
+		if len(tokens) == 1 {
+			assert.Equalf(token, tokens[0], "token surface should match for %q", token)
+		}
+	}
+}
+
+func TestDoubleArrayFullTokenizerGenderSplitFromFile(t *testing.T) {
+	assert := assert.New(t)
+
+	if dat == nil {
+		dat = LoadDatokFile("testdata/tokenizer_de.datok")
+	}
+	assert.NotNil(dat)
+
+	b := make([]byte, 0, 2048)
+	w := bytes.NewBuffer(b)
+
+	for _, token := range ttokenLines(t, "testdata/de/split.txt") {
+		tokens := ttokenize(dat, w, token)
+		assert.Greaterf(len(tokens), 1, "should split %q", token)
+	}
+}
+
 func TestDoubleArrayLoadFactor1(t *testing.T) {
 	assert := assert.New(t)
 	tok := LoadFomaFile("testdata/abbr_bench.fst")