| Akron | 1c34ce6 | 2021-09-23 23:27:39 +0200 | [diff] [blame^] | 1 | package datok | 
|  | 2 |  | 
|  | 3 | import ( | 
|  | 4 | "bytes" | 
|  | 5 | "strings" | 
|  | 6 | "testing" | 
|  | 7 |  | 
|  | 8 | "github.com/stretchr/testify/assert" | 
|  | 9 | ) | 
|  | 10 |  | 
|  | 11 | func TestFullTokenizerMatrix(t *testing.T) { | 
|  | 12 | assert := assert.New(t) | 
|  | 13 | foma := LoadFomaFile("testdata/simpletok.fst") | 
|  | 14 | assert.NotNil(foma) | 
|  | 15 |  | 
|  | 16 | mat := foma.ToMatrix() | 
|  | 17 |  | 
|  | 18 | r := strings.NewReader("  wald   gehen Da kann\t man was \"erleben\"!") | 
|  | 19 | b := make([]byte, 0, 2048) | 
|  | 20 | w := bytes.NewBuffer(b) | 
|  | 21 | var tokens []string | 
|  | 22 | mat.Transduce(r, w) | 
|  | 23 | tokens = strings.Split(w.String(), "\n") | 
|  | 24 | assert.Equal(len(tokens), 9) | 
|  | 25 | assert.Equal("wald", tokens[0]) | 
|  | 26 | assert.Equal("gehen", tokens[1]) | 
|  | 27 | assert.Equal("Da", tokens[2]) | 
|  | 28 | assert.Equal("kann", tokens[3]) | 
|  | 29 | assert.Equal("man", tokens[4]) | 
|  | 30 | assert.Equal("was", tokens[5]) | 
|  | 31 | assert.Equal("\"erleben\"", tokens[6]) | 
|  | 32 | assert.Equal("!", tokens[7]) | 
|  | 33 | } |