blob: f9c10ccaf48d0c11ec03e13f6c9ba865df20117b [file] [log] [blame]
Akron8ef408b2021-08-02 22:11:04 +02001package datokenizer
2
3import (
Akronc9d84a62021-08-03 15:56:03 +02004 "fmt"
Akron8ef408b2021-08-02 22:11:04 +02005 "testing"
6
7 "github.com/stretchr/testify/assert"
8)
9
10func TestSimpleString(t *testing.T) {
11 assert := assert.New(t)
12
13 // bau | bauamt
Akron740f3d72021-08-03 12:12:34 +020014 tok := ParseFile("testdata/bauamt.fst")
Akronf2120ca2021-08-03 16:26:41 +020015 dat := tok.ToDoubleArray()
16 assert.True(dat.Match("bau"))
17 assert.True(dat.Match("bauamt"))
18 assert.False(dat.Match("baum"))
Akron8ef408b2021-08-02 22:11:04 +020019}
Akron75ebe7f2021-08-03 10:34:10 +020020
21func TestSimpleBranches(t *testing.T) {
22 assert := assert.New(t)
23
24 // (bau | wahl) (amt | en)
Akron740f3d72021-08-03 12:12:34 +020025 tok := ParseFile("testdata/wahlamt.fst")
Akronf2120ca2021-08-03 16:26:41 +020026 dat := tok.ToDoubleArray()
27 assert.False(dat.Match("bau"))
28 assert.True(dat.Match("bauamt"))
29 assert.True(dat.Match("wahlamt"))
30 assert.True(dat.Match("bauen"))
31 assert.True(dat.Match("wahlen"))
32 assert.False(dat.Match("baum"))
Akron75ebe7f2021-08-03 10:34:10 +020033}
Akron730a79c2021-08-03 11:05:29 +020034
35func TestSimpleTokenizer(t *testing.T) {
36 assert := assert.New(t)
Akron740f3d72021-08-03 12:12:34 +020037 tok := ParseFile("testdata/simpletok.fst")
Akronf2120ca2021-08-03 16:26:41 +020038 dat := tok.ToDoubleArray()
39 assert.True(dat.Match("bau"))
40 assert.True(dat.Match("bad"))
41 assert.True(dat.Match("wald gehen"))
Akron730a79c2021-08-03 11:05:29 +020042}
Akron740f3d72021-08-03 12:12:34 +020043
Akron740f3d72021-08-03 12:12:34 +020044func TestFullTokenizer(t *testing.T) {
45 assert := assert.New(t)
46 tok := ParseFile("testdata/tokenizer.fst")
Akronf2120ca2021-08-03 16:26:41 +020047 dat := tok.ToDoubleArray()
48 fmt.Println("Size:", dat.maxSize)
49 assert.True(dat.Match("bau"))
50 assert.True(dat.Match("bad"))
51 assert.True(dat.Match("wald gehen"))
Akron740f3d72021-08-03 12:12:34 +020052}