blob: 291c3b9c6ae24609200bc9ad8097406ab11048dd [file] [log] [blame]
Akrone396a932021-10-19 01:06:13 +02001package datok
2
3import (
4 "bytes"
Akron4f6b28c2021-10-25 00:52:03 +02005 "strings"
Akrone396a932021-10-19 01:06:13 +02006 "testing"
7
8 "github.com/stretchr/testify/assert"
9)
10
11func TestTokenWriterSimple(t *testing.T) {
12 assert := assert.New(t)
13
14 b := make([]byte, 0, 2048)
15 w := bytes.NewBuffer(b)
16
Akron4f6b28c2021-10-25 00:52:03 +020017 tws := NewTokenWriter(w)
Akrone396a932021-10-19 01:06:13 +020018
19 assert.NotNil(tws)
20
21 tws.Token(0, []rune{'a', 'b', 'c'})
22
Akron32416ce2021-10-23 17:09:41 +020023 tws.Token(1, []rune{'d', 'e', 'f'})
Akrone396a932021-10-19 01:06:13 +020024
Akrona854faa2021-10-22 19:31:08 +020025 tws.SentenceEnd(0)
26
27 tws.TextEnd(0)
Akrone396a932021-10-19 01:06:13 +020028
29 tws.Flush()
30
Akron32416ce2021-10-23 17:09:41 +020031 assert.Equal("abc\nef\n\n\n", w.String())
Akrone396a932021-10-19 01:06:13 +020032}
Akron4f6b28c2021-10-25 00:52:03 +020033
34func TestTokenWriterFromOptions(t *testing.T) {
35 assert := assert.New(t)
36
37 b := make([]byte, 0, 2048)
38 w := bytes.NewBuffer(b)
39
40 tws := NewTokenWriterFromOptions(w, true)
41
42 mat := LoadMatrixFile("testdata/tokenizer.matok")
43
44 assert.NotNil(mat)
45
46 assert.True(mat.TransduceTokenWriter(
47 strings.NewReader("This.\x0a\x04And.\n\x04\n"), tws),
48 )
49
50 matStr := w.String()
51 assert.Equal("This\n.\n\n0 4 4 5\nAnd\n.\n\n0 3 3 4\n", matStr)
Akron8cc2dd92021-10-25 19:49:41 +020052
53 w.Reset()
54 mat.TransduceTokenWriter(strings.NewReader("\nThis.\x0a\x04\nAnd.\n\x04\n"), tws)
55
56 matStr = w.String()
57 assert.Equal("This\n.\n\n1 5 5 6\nAnd\n.\n\n1 4 4 5\n", matStr)
58
Akron4f6b28c2021-10-25 00:52:03 +020059}