blob: 18ed3a242f5e2a6d88606efbe9f495a149970bb8 [file] [log] [blame]
Akron1c34ce62021-09-23 23:27:39 +02001package datok
2
3import (
4 "bytes"
5 "strings"
6 "testing"
7
8 "github.com/stretchr/testify/assert"
9)
10
11func TestFullTokenizerMatrix(t *testing.T) {
12 assert := assert.New(t)
13 foma := LoadFomaFile("testdata/simpletok.fst")
14 assert.NotNil(foma)
15
16 mat := foma.ToMatrix()
17
18 r := strings.NewReader(" wald gehen Da kann\t man was \"erleben\"!")
19 b := make([]byte, 0, 2048)
20 w := bytes.NewBuffer(b)
21 var tokens []string
22 mat.Transduce(r, w)
23 tokens = strings.Split(w.String(), "\n")
24 assert.Equal(len(tokens), 9)
25 assert.Equal("wald", tokens[0])
26 assert.Equal("gehen", tokens[1])
27 assert.Equal("Da", tokens[2])
28 assert.Equal("kann", tokens[3])
29 assert.Equal("man", tokens[4])
30 assert.Equal("was", tokens[5])
31 assert.Equal("\"erleben\"", tokens[6])
32 assert.Equal("!", tokens[7])
33}