Introduce --[no]-tokens flag
Change-Id: I3aff53491151d8fe4e00d9f6747f8f12f6051a54
diff --git a/.gitignore b/.gitignore
index e781219..0f795b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,5 @@
\#*
*.info
datok
-old_*
\ No newline at end of file
+old_*
+effi*
\ No newline at end of file
diff --git a/cmd/datok.go b/cmd/datok.go
index e31745a..95919e3 100644
--- a/cmd/datok.go
+++ b/cmd/datok.go
@@ -74,6 +74,7 @@
tw := datok.NewTokenWriterFromOptions(
os.Stdout,
cli.Tokenize.Positions,
+ cli.Tokenize.Tokens,
cli.Tokenize.NewlineAfterEOT,
)
diff --git a/token_writer.go b/token_writer.go
index 32c5a99..5e4b72d 100644
--- a/token_writer.go
+++ b/token_writer.go
@@ -35,7 +35,7 @@
}
// Create a new token writer based on the options
-func NewTokenWriterFromOptions(w io.Writer, positionFlag bool, newlineAfterEot bool) *TokenWriter {
+func NewTokenWriterFromOptions(w io.Writer, positionFlag bool, tokenFlag bool, newlineAfterEot bool) *TokenWriter {
writer := bufio.NewWriter(w)
posC := 0
pos := make([]int, 0, 200)
@@ -59,8 +59,10 @@
posC += len(buf) - offset
pos = append(pos, posC)
- writer.WriteString(string(buf[offset:]))
- writer.WriteRune('\n')
+ if tokenFlag {
+ writer.WriteString(string(buf[offset:]))
+ writer.WriteRune('\n')
+ }
}
} else {
tw.Token = func(offset int, buf []rune) {
diff --git a/token_writer_test.go b/token_writer_test.go
index f7bd1f7..a75b3d9 100644
--- a/token_writer_test.go
+++ b/token_writer_test.go
@@ -37,7 +37,7 @@
b := make([]byte, 0, 2048)
w := bytes.NewBuffer(b)
- tws := NewTokenWriterFromOptions(w, true, false)
+ tws := NewTokenWriterFromOptions(w, true, true, false)
mat := LoadMatrixFile("testdata/tokenizer.matok")
@@ -56,12 +56,23 @@
matStr = w.String()
assert.Equal("This\n.\n\n1 5 5 6\nAnd\n.\n\n1 4 4 5\n", matStr)
+ //
// Accept newline after EOT
- tws = NewTokenWriterFromOptions(w, true, true)
+ tws = NewTokenWriterFromOptions(w, true, true, true)
w.Reset()
mat.TransduceTokenWriter(strings.NewReader("\nThis.\x0a\x04\nAnd.\n\x04\n"), tws)
matStr = w.String()
assert.Equal("This\n.\n\n1 5 5 6\nAnd\n.\n\n0 3 3 4\n", matStr)
+
+ //
+ // Write no tokens
+ tws = NewTokenWriterFromOptions(w, true, false, true)
+
+ w.Reset()
+ mat.TransduceTokenWriter(strings.NewReader("\nThis.\x0a\x04\nAnd.\n\x04\n"), tws)
+
+ matStr = w.String()
+ assert.Equal("\n1 5 5 6\n\n0 3 3 4\n", matStr)
}