Akron | 1c34ce6 | 2021-09-23 23:27:39 +0200 | [diff] [blame^] | 1 | package datok |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "strings" |
| 6 | "testing" |
| 7 | |
| 8 | "github.com/stretchr/testify/assert" |
| 9 | ) |
| 10 | |
| 11 | func TestFullTokenizerMatrix(t *testing.T) { |
| 12 | assert := assert.New(t) |
| 13 | foma := LoadFomaFile("testdata/simpletok.fst") |
| 14 | assert.NotNil(foma) |
| 15 | |
| 16 | mat := foma.ToMatrix() |
| 17 | |
| 18 | r := strings.NewReader(" wald gehen Da kann\t man was \"erleben\"!") |
| 19 | b := make([]byte, 0, 2048) |
| 20 | w := bytes.NewBuffer(b) |
| 21 | var tokens []string |
| 22 | mat.Transduce(r, w) |
| 23 | tokens = strings.Split(w.String(), "\n") |
| 24 | assert.Equal(len(tokens), 9) |
| 25 | assert.Equal("wald", tokens[0]) |
| 26 | assert.Equal("gehen", tokens[1]) |
| 27 | assert.Equal("Da", tokens[2]) |
| 28 | assert.Equal("kann", tokens[3]) |
| 29 | assert.Equal("man", tokens[4]) |
| 30 | assert.Equal("was", tokens[5]) |
| 31 | assert.Equal("\"erleben\"", tokens[6]) |
| 32 | assert.Equal("!", tokens[7]) |
| 33 | } |