Optimize tests by avoiding reload of tokenizers
Change-Id: Ia70dab59fc3cfe5e47a1540724336214addb824e
diff --git a/datok_test.go b/datok_test.go
index 6f2dc11..9e0d9a6 100644
--- a/datok_test.go
+++ b/datok_test.go
@@ -11,6 +11,8 @@
"github.com/stretchr/testify/assert"
)
+var dat *DaTokenizer
+
func tmatch(tok Tokenizer, s string) bool {
b := make([]byte, 0, 2048)
w := bytes.NewBuffer(b)
@@ -168,7 +170,10 @@
func TestDoubleArrayFullTokenizer(t *testing.T) {
assert := assert.New(t)
- dat := LoadDatokFile("testdata/tokenizer.datok")
+
+ if dat == nil {
+ dat = LoadDatokFile("testdata/tokenizer.datok")
+ }
assert.NotNil(dat)
assert.True(dat.LoadFactor() >= 70)
assert.Equal(dat.epsilon, 1)
@@ -207,7 +212,10 @@
func TestDoubleArrayFullTokenizerTransduce(t *testing.T) {
assert := assert.New(t)
- dat := LoadDatokFile("testdata/tokenizer.datok")
+ if dat == nil {
+ dat = LoadDatokFile("testdata/tokenizer.datok")
+ }
+
assert.NotNil(dat)
b := make([]byte, 0, 2048)
@@ -236,7 +244,11 @@
func TestDoubleArrayFullTokenizerSentenceSplitter(t *testing.T) {
assert := assert.New(t)
- dat := LoadDatokFile("testdata/tokenizer.datok")
+
+ if dat == nil {
+ dat = LoadDatokFile("testdata/tokenizer.datok")
+ }
+
assert.NotNil(dat)
b := make([]byte, 0, 2048)
@@ -326,7 +338,11 @@
func TestDoubleArrayFullTokenizerTokenSplitter(t *testing.T) {
assert := assert.New(t)
- dat := LoadDatokFile("testdata/tokenizer.datok")
+
+ if dat == nil {
+ dat = LoadDatokFile("testdata/tokenizer.datok")
+ }
+
assert.NotNil(dat)
b := make([]byte, 0, 2048)
@@ -872,7 +888,10 @@
func TestDoubleArrayFullTokenizerXML(t *testing.T) {
assert := assert.New(t)
- dat := LoadDatokFile("testdata/tokenizer.datok")
+ if dat == nil {
+ dat = LoadDatokFile("testdata/tokenizer.datok")
+ }
+
assert.NotNil(dat)
b := make([]byte, 0, 2048)
diff --git a/matrix_test.go b/matrix_test.go
index e27dd12..1a3f7b0 100644
--- a/matrix_test.go
+++ b/matrix_test.go
@@ -20,6 +20,8 @@
Archive: Ich bin kein zip. D'dorf Ku'damm Lu'hafen M'gladbach W'schaft.
Mach's macht's was'n ist's haste willste kannste biste kriegste.`
+var mat *MatrixTokenizer
+
func TestMatrixFullTokenizer(t *testing.T) {
assert := assert.New(t)
foma := LoadFomaFile("testdata/simpletok.fst")
@@ -162,7 +164,9 @@
func TestMatrixFullTokenizerTransduce(t *testing.T) {
assert := assert.New(t)
- mat := LoadMatrixFile("testdata/tokenizer.matok")
+ if mat == nil {
+ mat = LoadMatrixFile("testdata/tokenizer.matok")
+ }
assert.NotNil(mat)
@@ -191,7 +195,10 @@
func TestMatrixFullTokenizerMatrixSentenceSplitter(t *testing.T) {
assert := assert.New(t)
- mat := LoadMatrixFile("testdata/tokenizer.matok")
+
+ if mat == nil {
+ mat = LoadMatrixFile("testdata/tokenizer.matok")
+ }
b := make([]byte, 0, 2048)
w := bytes.NewBuffer(b)
@@ -288,7 +295,9 @@
func TestMatrixFullTokenizerTokenSplitter(t *testing.T) {
assert := assert.New(t)
- mat := LoadMatrixFile("testdata/tokenizer.matok")
+ if mat == nil {
+ mat = LoadMatrixFile("testdata/tokenizer.matok")
+ }
b := make([]byte, 0, 2048)
w := bytes.NewBuffer(b)
@@ -826,7 +835,9 @@
func TestMatrixFullTokenizerXML(t *testing.T) {
assert := assert.New(t)
- mat := LoadMatrixFile("testdata/tokenizer.matok")
+ if mat == nil {
+ mat = LoadMatrixFile("testdata/tokenizer.matok")
+ }
assert.NotNil(mat)
@@ -867,7 +878,9 @@
func TestMatokDatokEquivalence(t *testing.T) {
assert := assert.New(t)
- mat := LoadMatrixFile("testdata/tokenizer.matok")
+ if mat == nil {
+ mat = LoadMatrixFile("testdata/tokenizer.matok")
+ }
dat := LoadDatokFile("testdata/tokenizer.datok")
r := strings.NewReader(s)
@@ -894,7 +907,9 @@
func TestMatrixFullTokenizerCallbackTransduce(t *testing.T) {
assert := assert.New(t)
- mat := LoadMatrixFile("testdata/tokenizer.matok")
+ if mat == nil {
+ mat = LoadMatrixFile("testdata/tokenizer.matok")
+ }
assert.NotNil(mat)
@@ -911,7 +926,9 @@
func TestMatrixFullTokenizerTextTreatment(t *testing.T) {
assert := assert.New(t)
- mat := LoadMatrixFile("testdata/tokenizer.matok")
+ if mat == nil {
+ mat = LoadMatrixFile("testdata/tokenizer.matok")
+ }
assert.NotNil(mat)
@@ -926,7 +943,9 @@
func TestMatrixTrimming(t *testing.T) {
assert := assert.New(t)
- mat := LoadMatrixFile("testdata/tokenizer.matok")
+ if mat == nil {
+ mat = LoadMatrixFile("testdata/tokenizer.matok")
+ }
assert.NotNil(mat)
diff --git a/token_writer.go b/token_writer.go
index 11179d3..bccb1bd 100644
--- a/token_writer.go
+++ b/token_writer.go
@@ -89,7 +89,7 @@
// Collect sentence positions and maybe sentence boundaries
if flags&SENTENCE_POS != 0 {
- tw.SentenceEnd = func(offset int) {
+ tw.SentenceEnd = func(_ int) {
// Add end position of last token to sentence boundary
// TODO: This only works if token positions are taking into account