blob: 32c5a99e5fec5441496844b49a510e69b31a43d5 [file] [log] [blame]
Akrone396a932021-10-19 01:06:13 +02001package datok
2
3import (
4 "bufio"
5 "io"
Akron4f6b28c2021-10-25 00:52:03 +02006 "strconv"
Akrone396a932021-10-19 01:06:13 +02007)
8
Akron4f6b28c2021-10-25 00:52:03 +02009type TokenWriter struct {
10 SentenceEnd func(int)
11 TextEnd func(int)
12 Flush func() error
13 Token func(int, []rune)
Akrone396a932021-10-19 01:06:13 +020014}
15
Akron4f6b28c2021-10-25 00:52:03 +020016func NewTokenWriter(w io.Writer) *TokenWriter {
17 writer := bufio.NewWriter(w)
Akrone396a932021-10-19 01:06:13 +020018
Akron4f6b28c2021-10-25 00:52:03 +020019 return &TokenWriter{
20 SentenceEnd: func(_ int) {
21 writer.WriteRune('\n')
22 },
23 TextEnd: func(_ int) {
24 writer.WriteRune('\n')
25 writer.Flush()
26 },
27 Token: func(offset int, buf []rune) {
28 writer.WriteString(string(buf[offset:]))
29 writer.WriteRune('\n')
30 },
31 Flush: func() error {
32 return writer.Flush()
33 },
34 }
Akrone396a932021-10-19 01:06:13 +020035}
36
Akron4f6b28c2021-10-25 00:52:03 +020037// Create a new token writer based on the options
Akrone9431ec2021-10-25 21:35:33 +020038func NewTokenWriterFromOptions(w io.Writer, positionFlag bool, newlineAfterEot bool) *TokenWriter {
Akron4f6b28c2021-10-25 00:52:03 +020039 writer := bufio.NewWriter(w)
40 posC := 0
41 pos := make([]int, 0, 200)
Akrone396a932021-10-19 01:06:13 +020042
Akron4f6b28c2021-10-25 00:52:03 +020043 tw := &TokenWriter{}
Akrone396a932021-10-19 01:06:13 +020044
Akron4f6b28c2021-10-25 00:52:03 +020045 if positionFlag {
46 tw.Token = func(offset int, buf []rune) {
Akrona854faa2021-10-22 19:31:08 +020047
Akron4f6b28c2021-10-25 00:52:03 +020048 // TODO:
49 // Store in []uint16
50 // and write to string
Akron8cc2dd92021-10-25 19:49:41 +020051
Akrone9431ec2021-10-25 21:35:33 +020052 // Accept newline after EOT
53 if newlineAfterEot && posC == 0 && buf[0] == '\n' && writer.Buffered() != 0 {
54 posC--
55 }
56
Akron4f6b28c2021-10-25 00:52:03 +020057 posC += offset
58 pos = append(pos, posC)
59 posC += len(buf) - offset
60 pos = append(pos, posC)
Akrone396a932021-10-19 01:06:13 +020061
Akron4f6b28c2021-10-25 00:52:03 +020062 writer.WriteString(string(buf[offset:]))
63 writer.WriteRune('\n')
64 }
65 } else {
66 tw.Token = func(offset int, buf []rune) {
67 writer.WriteString(string(buf[offset:]))
68 writer.WriteRune('\n')
69 }
70 }
71
72 tw.SentenceEnd = func(_ int) {
73 writer.WriteRune('\n')
74 }
75
76 if positionFlag {
Akrone9431ec2021-10-25 21:35:33 +020077 tw.TextEnd = func(_ int) {
Akron4f6b28c2021-10-25 00:52:03 +020078 writer.Flush()
79
80 writer.WriteString(strconv.Itoa(pos[0]))
81 for _, x := range pos[1:] {
82 writer.WriteByte(' ')
83 writer.WriteString(strconv.Itoa(x))
84 }
85 writer.WriteRune('\n')
86
Akron8cc2dd92021-10-25 19:49:41 +020087 posC = 0
Akron4f6b28c2021-10-25 00:52:03 +020088 pos = pos[:0]
89 }
90 } else {
91 tw.TextEnd = func(_ int) {
92 writer.WriteRune('\n')
93 writer.Flush()
94 }
95
96 }
97
98 tw.Flush = func() error {
99 return writer.Flush()
100 }
101
102 return tw
Akrone396a932021-10-19 01:06:13 +0200103}