Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 1 | package datok |
| 2 | |
| 3 | import ( |
| 4 | "bufio" |
| 5 | "io" |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 6 | "strconv" |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 7 | ) |
| 8 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 9 | type TokenWriter struct { |
| 10 | SentenceEnd func(int) |
| 11 | TextEnd func(int) |
| 12 | Flush func() error |
| 13 | Token func(int, []rune) |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 14 | } |
| 15 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 16 | func NewTokenWriter(w io.Writer) *TokenWriter { |
| 17 | writer := bufio.NewWriter(w) |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 18 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 19 | return &TokenWriter{ |
| 20 | SentenceEnd: func(_ int) { |
| 21 | writer.WriteRune('\n') |
| 22 | }, |
| 23 | TextEnd: func(_ int) { |
| 24 | writer.WriteRune('\n') |
| 25 | writer.Flush() |
| 26 | }, |
| 27 | Token: func(offset int, buf []rune) { |
| 28 | writer.WriteString(string(buf[offset:])) |
| 29 | writer.WriteRune('\n') |
| 30 | }, |
| 31 | Flush: func() error { |
| 32 | return writer.Flush() |
| 33 | }, |
| 34 | } |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 35 | } |
| 36 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 37 | // Create a new token writer based on the options |
| 38 | func NewTokenWriterFromOptions(w io.Writer, positionFlag bool) *TokenWriter { |
| 39 | writer := bufio.NewWriter(w) |
| 40 | posC := 0 |
| 41 | pos := make([]int, 0, 200) |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 42 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 43 | tw := &TokenWriter{} |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 44 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 45 | if positionFlag { |
| 46 | tw.Token = func(offset int, buf []rune) { |
Akron | a854faa | 2021-10-22 19:31:08 +0200 | [diff] [blame] | 47 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 48 | // TODO: |
| 49 | // Store in []uint16 |
| 50 | // and write to string |
Akron | 8cc2dd9 | 2021-10-25 19:49:41 +0200 | [diff] [blame^] | 51 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 52 | posC += offset |
| 53 | pos = append(pos, posC) |
| 54 | posC += len(buf) - offset |
| 55 | pos = append(pos, posC) |
| 56 | // pos = append(pos, offset, len(buf)-offset) |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 57 | |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 58 | writer.WriteString(string(buf[offset:])) |
| 59 | writer.WriteRune('\n') |
| 60 | } |
| 61 | } else { |
| 62 | tw.Token = func(offset int, buf []rune) { |
| 63 | writer.WriteString(string(buf[offset:])) |
| 64 | writer.WriteRune('\n') |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | tw.SentenceEnd = func(_ int) { |
| 69 | writer.WriteRune('\n') |
| 70 | } |
| 71 | |
| 72 | if positionFlag { |
| 73 | tw.TextEnd = func(offset int) { |
| 74 | writer.Flush() |
| 75 | |
| 76 | writer.WriteString(strconv.Itoa(pos[0])) |
| 77 | for _, x := range pos[1:] { |
| 78 | writer.WriteByte(' ') |
| 79 | writer.WriteString(strconv.Itoa(x)) |
| 80 | } |
| 81 | writer.WriteRune('\n') |
| 82 | |
Akron | 8cc2dd9 | 2021-10-25 19:49:41 +0200 | [diff] [blame^] | 83 | posC = 0 |
Akron | 4f6b28c | 2021-10-25 00:52:03 +0200 | [diff] [blame] | 84 | pos = pos[:0] |
| 85 | } |
| 86 | } else { |
| 87 | tw.TextEnd = func(_ int) { |
| 88 | writer.WriteRune('\n') |
| 89 | writer.Flush() |
| 90 | } |
| 91 | |
| 92 | } |
| 93 | |
| 94 | tw.Flush = func() error { |
| 95 | return writer.Flush() |
| 96 | } |
| 97 | |
| 98 | return tw |
Akron | e396a93 | 2021-10-19 01:06:13 +0200 | [diff] [blame] | 99 | } |