blob: d75b2617701626d5008399acf8bd776fd31bb024 [file] [log] [blame]
Akrone396a932021-10-19 01:06:13 +02001package datok
2
3import (
4 "bufio"
5 "io"
Akron4f6b28c2021-10-25 00:52:03 +02006 "strconv"
Akrone396a932021-10-19 01:06:13 +02007)
8
Akron4f6b28c2021-10-25 00:52:03 +02009type TokenWriter struct {
10 SentenceEnd func(int)
11 TextEnd func(int)
12 Flush func() error
13 Token func(int, []rune)
Akrone396a932021-10-19 01:06:13 +020014}
15
Akron4f6b28c2021-10-25 00:52:03 +020016func NewTokenWriter(w io.Writer) *TokenWriter {
17 writer := bufio.NewWriter(w)
Akrone396a932021-10-19 01:06:13 +020018
Akron4f6b28c2021-10-25 00:52:03 +020019 return &TokenWriter{
20 SentenceEnd: func(_ int) {
21 writer.WriteRune('\n')
22 },
23 TextEnd: func(_ int) {
24 writer.WriteRune('\n')
25 writer.Flush()
26 },
27 Token: func(offset int, buf []rune) {
28 writer.WriteString(string(buf[offset:]))
29 writer.WriteRune('\n')
30 },
31 Flush: func() error {
32 return writer.Flush()
33 },
34 }
Akrone396a932021-10-19 01:06:13 +020035}
36
Akron4f6b28c2021-10-25 00:52:03 +020037// Create a new token writer based on the options
38func NewTokenWriterFromOptions(w io.Writer, positionFlag bool) *TokenWriter {
39 writer := bufio.NewWriter(w)
40 posC := 0
41 pos := make([]int, 0, 200)
Akrone396a932021-10-19 01:06:13 +020042
Akron4f6b28c2021-10-25 00:52:03 +020043 tw := &TokenWriter{}
Akrone396a932021-10-19 01:06:13 +020044
Akron4f6b28c2021-10-25 00:52:03 +020045 if positionFlag {
46 tw.Token = func(offset int, buf []rune) {
Akrona854faa2021-10-22 19:31:08 +020047
Akron4f6b28c2021-10-25 00:52:03 +020048 // TODO:
49 // Store in []uint16
50 // and write to string
51 posC += offset
52 pos = append(pos, posC)
53 posC += len(buf) - offset
54 pos = append(pos, posC)
55 // pos = append(pos, offset, len(buf)-offset)
Akrone396a932021-10-19 01:06:13 +020056
Akron4f6b28c2021-10-25 00:52:03 +020057 writer.WriteString(string(buf[offset:]))
58 writer.WriteRune('\n')
59 }
60 } else {
61 tw.Token = func(offset int, buf []rune) {
62 writer.WriteString(string(buf[offset:]))
63 writer.WriteRune('\n')
64 }
65 }
66
67 tw.SentenceEnd = func(_ int) {
68 writer.WriteRune('\n')
69 }
70
71 if positionFlag {
72 tw.TextEnd = func(offset int) {
73 writer.Flush()
74
75 writer.WriteString(strconv.Itoa(pos[0]))
76 for _, x := range pos[1:] {
77 writer.WriteByte(' ')
78 writer.WriteString(strconv.Itoa(x))
79 }
80 writer.WriteRune('\n')
81
82 posC = 0 - offset
83 pos = pos[:0]
84 }
85 } else {
86 tw.TextEnd = func(_ int) {
87 writer.WriteRune('\n')
88 writer.Flush()
89 }
90
91 }
92
93 tw.Flush = func() error {
94 return writer.Flush()
95 }
96
97 return tw
Akrone396a932021-10-19 01:06:13 +020098}