blob: 8cabb6733bff923f24e5159f90c23728783aeeba [file] [log] [blame]
Akron8ef408b2021-08-02 22:11:04 +02001package datokenizer
2
3import (
Akronc9d84a62021-08-03 15:56:03 +02004 "fmt"
Akron8ef408b2021-08-02 22:11:04 +02005 "testing"
6
7 "github.com/stretchr/testify/assert"
8)
9
10func TestSimpleString(t *testing.T) {
11 assert := assert.New(t)
12
13 // bau | bauamt
Akron740f3d72021-08-03 12:12:34 +020014 tok := ParseFile("testdata/bauamt.fst")
15 tok.ToDoubleArray()
16 assert.True(tok.Match("bau"))
17 assert.True(tok.Match("bauamt"))
18 assert.False(tok.Match("baum"))
Akron8ef408b2021-08-02 22:11:04 +020019}
Akron75ebe7f2021-08-03 10:34:10 +020020
21func TestSimpleBranches(t *testing.T) {
22 assert := assert.New(t)
23
24 // (bau | wahl) (amt | en)
Akron740f3d72021-08-03 12:12:34 +020025 tok := ParseFile("testdata/wahlamt.fst")
26 tok.ToDoubleArray()
27 assert.False(tok.Match("bau"))
28 assert.True(tok.Match("bauamt"))
29 assert.True(tok.Match("wahlamt"))
30 assert.True(tok.Match("bauen"))
31 assert.True(tok.Match("wahlen"))
32 assert.False(tok.Match("baum"))
Akron75ebe7f2021-08-03 10:34:10 +020033}
Akron730a79c2021-08-03 11:05:29 +020034
35func TestSimpleTokenizer(t *testing.T) {
36 assert := assert.New(t)
Akron740f3d72021-08-03 12:12:34 +020037 tok := ParseFile("testdata/simpletok.fst")
38 tok.ToDoubleArray()
39 assert.True(tok.Match("bau"))
40 assert.True(tok.Match("bad"))
41 assert.True(tok.Match("wald gehen"))
Akron730a79c2021-08-03 11:05:29 +020042}
Akron740f3d72021-08-03 12:12:34 +020043
Akron740f3d72021-08-03 12:12:34 +020044func TestFullTokenizer(t *testing.T) {
45 assert := assert.New(t)
46 tok := ParseFile("testdata/tokenizer.fst")
47 tok.ToDoubleArray()
Akronc9d84a62021-08-03 15:56:03 +020048 fmt.Println("Size:", tok.maxSize)
Akron740f3d72021-08-03 12:12:34 +020049 assert.True(tok.Match("bau"))
50 assert.True(tok.Match("bad"))
51 assert.True(tok.Match("wald gehen"))
52}