Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 1 | package datok |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 5 | "strings" |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 6 | "testing" |
| 7 | |
| 8 | "github.com/stretchr/testify/assert" |
| 9 | ) |
| 10 | |
| 11 | func TestTokenWriterSimple(t *testing.T) { |
| 12 | assert := assert.New(t) |
| 13 | |
| 14 | b := make([]byte, 0, 2048) |
| 15 | w := bytes.NewBuffer(b) |
| 16 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 17 | tws := NewTokenWriter(w) |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 18 | |
| 19 | assert.NotNil(tws) |
| 20 | |
| 21 | tws.Token(0, []rune{'a', 'b', 'c'}) |
| 22 | |
Akron | 32416ce | 2021-10-23 17:09:41 +0200 | [diff] [blame] | 23 | tws.Token(1, []rune{'d', 'e', 'f'}) |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 24 | |
Akron | a854faa | 2021-10-22 19:31:08 +0200 | [diff] [blame] | 25 | tws.SentenceEnd(0) |
| 26 | |
| 27 | tws.TextEnd(0) |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 28 | |
| 29 | tws.Flush() |
| 30 | |
Akron | 32416ce | 2021-10-23 17:09:41 +0200 | [diff] [blame] | 31 | assert.Equal("abc\nef\n\n\n", w.String()) |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 32 | } |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 33 | |
| 34 | func TestTokenWriterFromOptions(t *testing.T) { |
| 35 | assert := assert.New(t) |
| 36 | |
| 37 | b := make([]byte, 0, 2048) |
| 38 | w := bytes.NewBuffer(b) |
| 39 | |
Akron | a9e0c42 | 2021-10-27 19:01:17 +0200 | [diff] [blame^] | 40 | tws := NewTokenWriterFromOptions(w, true, true, false) |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 41 | |
| 42 | mat := LoadMatrixFile("testdata/tokenizer.matok") |
| 43 | |
| 44 | assert.NotNil(mat) |
| 45 | |
| 46 | assert.True(mat.TransduceTokenWriter( |
| 47 | strings.NewReader("This.\x0a\x04And.\n\x04\n"), tws), |
| 48 | ) |
| 49 | |
| 50 | matStr := w.String() |
| 51 | assert.Equal("This\n.\n\n0 4 4 5\nAnd\n.\n\n0 3 3 4\n", matStr) |
Akron | 8cc2dd9 | 2021-10-25 19:49:41 +0200 | [diff] [blame] | 52 | |
| 53 | w.Reset() |
| 54 | mat.TransduceTokenWriter(strings.NewReader("\nThis.\x0a\x04\nAnd.\n\x04\n"), tws) |
| 55 | |
| 56 | matStr = w.String() |
| 57 | assert.Equal("This\n.\n\n1 5 5 6\nAnd\n.\n\n1 4 4 5\n", matStr) |
| 58 | |
Akron | a9e0c42 | 2021-10-27 19:01:17 +0200 | [diff] [blame^] | 59 | // |
Akron | e9431ec | 2021-10-25 21:35:33 +0200 | [diff] [blame] | 60 | // Accept newline after EOT |
Akron | a9e0c42 | 2021-10-27 19:01:17 +0200 | [diff] [blame^] | 61 | tws = NewTokenWriterFromOptions(w, true, true, true) |
Akron | e9431ec | 2021-10-25 21:35:33 +0200 | [diff] [blame] | 62 | |
| 63 | w.Reset() |
| 64 | mat.TransduceTokenWriter(strings.NewReader("\nThis.\x0a\x04\nAnd.\n\x04\n"), tws) |
| 65 | |
| 66 | matStr = w.String() |
| 67 | assert.Equal("This\n.\n\n1 5 5 6\nAnd\n.\n\n0 3 3 4\n", matStr) |
Akron | a9e0c42 | 2021-10-27 19:01:17 +0200 | [diff] [blame^] | 68 | |
| 69 | // |
| 70 | // Write no tokens |
| 71 | tws = NewTokenWriterFromOptions(w, true, false, true) |
| 72 | |
| 73 | w.Reset() |
| 74 | mat.TransduceTokenWriter(strings.NewReader("\nThis.\x0a\x04\nAnd.\n\x04\n"), tws) |
| 75 | |
| 76 | matStr = w.String() |
| 77 | assert.Equal("\n1 5 5 6\n\n0 3 3 4\n", matStr) |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 78 | } |