Fix sentence splitting tests
diff --git a/datokenizer_test.go b/datokenizer_test.go
index 05847b2..720199f 100644
--- a/datokenizer_test.go
+++ b/datokenizer_test.go
@@ -81,7 +81,8 @@
assert.Equal("--", tokens[2])
assert.Equal("D", tokens[3])
assert.Equal("", tokens[4])
- assert.Equal(5, len(tokens))
+ assert.Equal("", tokens[5])
+ assert.Equal(6, len(tokens))
}
func TestReadWriteTokenizer(t *testing.T) {
@@ -201,7 +202,7 @@
assert.True(dat.Transduce(strings.NewReader(""), w))
sentences = strings.Split(w.String(), "\n\n")
assert.Equal(len(sentences), 1)
- assert.Equal("", sentences[0])
+ assert.Equal("\n", sentences[0])
w.Reset()
assert.True(dat.Transduce(strings.NewReader("Gefunden auf wikipedia.org."), w))
@@ -213,41 +214,43 @@
sentences = strings.Split(w.String(), "\n\n")
assert.Equal(len(sentences), 2)
- /*
- w.Reset()
- assert.True(dat.Transduce(strings.NewReader("Unsere Website ist https://korap.ids-mannheim.de/?q=Baum"), w))
- sentences = strings.Split(w.String(), "\n\n")
- assert.Equal("Unsere\nWebsite\nist\nhttps://korap.ids-mannheim.de/?q=Baum\n", sentences[0])
- assert.Equal(len(sentences), 1)
+ w.Reset()
+ assert.True(dat.Transduce(strings.NewReader("Unsere Website ist https://korap.ids-mannheim.de/?q=Baum"), w))
+ sentences = strings.Split(w.String(), "\n\n")
+ assert.Equal("Unsere\nWebsite\nist\nhttps://korap.ids-mannheim.de/?q=Baum", sentences[0])
+ assert.Equal("", sentences[1])
+ assert.Equal(len(sentences), 2)
- w.Reset()
- assert.True(dat.Transduce(strings.NewReader("Unser Server ist 10.0.10.51."), w))
- sentences = strings.Split(w.String(), "\n\n")
- assert.Equal(len(sentences), 1)
+ w.Reset()
+ assert.True(dat.Transduce(strings.NewReader("Unser Server ist 10.0.10.51."), w))
+ sentences = strings.Split(w.String(), "\n\n")
+ assert.Equal("", sentences[1])
+ assert.Equal(len(sentences), 2)
- w.Reset()
- assert.True(dat.Transduce(strings.NewReader("Zu 50.4% ist es sicher"), w))
- sentences = strings.Split(w.String(), "\n\n")
- assert.Equal(len(sentences), 1)
+ w.Reset()
+ assert.True(dat.Transduce(strings.NewReader("Zu 50.4% ist es sicher"), w))
+ sentences = strings.Split(w.String(), "\n\n")
+ assert.Equal(len(sentences), 2)
- w.Reset()
- assert.True(dat.Transduce(strings.NewReader("Der Termin ist am 5.9.2018"), w))
- sentences = strings.Split(w.String(), "\n\n")
- assert.Equal(len(sentences), 1)
+ w.Reset()
+ assert.True(dat.Transduce(strings.NewReader("Der Termin ist am 5.9.2018"), w))
+ sentences = strings.Split(w.String(), "\n\n")
+ assert.Equal(len(sentences), 2)
- w.Reset()
- assert.True(dat.Transduce(strings.NewReader("Ich habe die readme.txt heruntergeladen"), w))
- sentences = strings.Split(w.String(), "\n\n")
- assert.Equal(len(sentences), 1)
- assert.Equal("Ich\nhabe\ndie\nreadme.txt\nheruntergeladen\n", sentences[0])
+ w.Reset()
+ assert.True(dat.Transduce(strings.NewReader("Ich habe die readme.txt heruntergeladen"), w))
+ sentences = strings.Split(w.String(), "\n\n")
+ assert.Equal(len(sentences), 2)
+ assert.Equal("Ich\nhabe\ndie\nreadme.txt\nheruntergeladen", sentences[0])
+ assert.Equal("", sentences[1])
- w.Reset()
- assert.True(dat.Transduce(strings.NewReader("Ausschalten!!! Hast Du nicht gehört???"), w))
- sentences = strings.Split(w.String(), "\n\n")
- assert.Equal(len(sentences), 2)
- assert.Equal("Ausschalten\n!!!", sentences[0])
- assert.Equal("Hast\nDu\nnicht\ngehört\n???\n", sentences[1])
- */
+ w.Reset()
+ assert.True(dat.Transduce(strings.NewReader("Ausschalten!!! Hast Du nicht gehört???"), w))
+ sentences = strings.Split(w.String(), "\n\n")
+ assert.Equal(len(sentences), 3)
+ assert.Equal("Ausschalten\n!!!", sentences[0])
+ assert.Equal("Hast\nDu\nnicht\ngehört\n???", sentences[1])
+ assert.Equal("", sentences[2])
/*
w.Reset()