blob: 9f4088ad1e1fee6f96e662f6d0ae8061a2d0e840 [file] [log] [blame]
Akrone396a932021-10-19 01:06:13 +02001package datok
2
3import (
4 "bufio"
5 "io"
Akron4f6b28c2021-10-25 00:52:03 +02006 "strconv"
Akrone396a932021-10-19 01:06:13 +02007)
8
Akron4f6b28c2021-10-25 00:52:03 +02009type TokenWriter struct {
10 SentenceEnd func(int)
11 TextEnd func(int)
12 Flush func() error
13 Token func(int, []rune)
Akrone396a932021-10-19 01:06:13 +020014}
15
Akron4f6b28c2021-10-25 00:52:03 +020016func NewTokenWriter(w io.Writer) *TokenWriter {
17 writer := bufio.NewWriter(w)
Akrone396a932021-10-19 01:06:13 +020018
Akron4f6b28c2021-10-25 00:52:03 +020019 return &TokenWriter{
20 SentenceEnd: func(_ int) {
21 writer.WriteRune('\n')
22 },
23 TextEnd: func(_ int) {
24 writer.WriteRune('\n')
25 writer.Flush()
26 },
27 Token: func(offset int, buf []rune) {
28 writer.WriteString(string(buf[offset:]))
29 writer.WriteRune('\n')
30 },
31 Flush: func() error {
32 return writer.Flush()
33 },
34 }
Akrone396a932021-10-19 01:06:13 +020035}
36
Akron4f6b28c2021-10-25 00:52:03 +020037// Create a new token writer based on the options
38func NewTokenWriterFromOptions(w io.Writer, positionFlag bool) *TokenWriter {
39 writer := bufio.NewWriter(w)
40 posC := 0
41 pos := make([]int, 0, 200)
Akrone396a932021-10-19 01:06:13 +020042
Akron4f6b28c2021-10-25 00:52:03 +020043 tw := &TokenWriter{}
Akrone396a932021-10-19 01:06:13 +020044
Akron4f6b28c2021-10-25 00:52:03 +020045 if positionFlag {
46 tw.Token = func(offset int, buf []rune) {
Akrona854faa2021-10-22 19:31:08 +020047
Akron4f6b28c2021-10-25 00:52:03 +020048 // TODO:
49 // Store in []uint16
50 // and write to string
Akron8cc2dd92021-10-25 19:49:41 +020051
Akron4f6b28c2021-10-25 00:52:03 +020052 posC += offset
53 pos = append(pos, posC)
54 posC += len(buf) - offset
55 pos = append(pos, posC)
56 // pos = append(pos, offset, len(buf)-offset)
Akrone396a932021-10-19 01:06:13 +020057
Akron4f6b28c2021-10-25 00:52:03 +020058 writer.WriteString(string(buf[offset:]))
59 writer.WriteRune('\n')
60 }
61 } else {
62 tw.Token = func(offset int, buf []rune) {
63 writer.WriteString(string(buf[offset:]))
64 writer.WriteRune('\n')
65 }
66 }
67
68 tw.SentenceEnd = func(_ int) {
69 writer.WriteRune('\n')
70 }
71
72 if positionFlag {
73 tw.TextEnd = func(offset int) {
74 writer.Flush()
75
76 writer.WriteString(strconv.Itoa(pos[0]))
77 for _, x := range pos[1:] {
78 writer.WriteByte(' ')
79 writer.WriteString(strconv.Itoa(x))
80 }
81 writer.WriteRune('\n')
82
Akron8cc2dd92021-10-25 19:49:41 +020083 posC = 0
Akron4f6b28c2021-10-25 00:52:03 +020084 pos = pos[:0]
85 }
86 } else {
87 tw.TextEnd = func(_ int) {
88 writer.WriteRune('\n')
89 writer.Flush()
90 }
91
92 }
93
94 tw.Flush = func() error {
95 return writer.Flush()
96 }
97
98 return tw
Akrone396a932021-10-19 01:06:13 +020099}