Ignore newline after EOT with a flag

Change-Id: Ia18cc0cbb1dda6311c6b2b8db1fae52c4b6335e0
diff --git a/cmd/datok.go b/cmd/datok.go
index 9314a93..e31745a 100644
--- a/cmd/datok.go
+++ b/cmd/datok.go
@@ -17,9 +17,10 @@
 		DoubleArray bool   `kong:"optional,short='d',help='Convert to Double Array instead of Matrix representation'"`
 	} `kong:"cmd, help='Convert a foma file to a Matrix or Double Array tokenizer'"`
 	Tokenize struct {
-		Tokenizer string `kong:"required,short='t',help='The Matrix or Double Array Tokenizer file'"`
-		Positions bool   `kong:"optional,negatable,default=false,short='p',help='Print token offsets'"`
-		Tokens    bool   `kong:"optional,negatable,default=true,help="Print token surfaces""`
+		Tokenizer       string `kong:"required,short='t',help='The Matrix or Double Array Tokenizer file'"`
+		Positions       bool   `kong:"optional,negatable,default=false,short='p',help='Print token offsets'"`
+		Tokens          bool   `kong:"optional,negatable,default=true,help='Print token surfaces'"`
+		NewlineAfterEOT bool   `kong:"optional,negatable,help='Ignore newline after EOT'"`
 	} `kong:"cmd, help='Tokenize a text'"`
 }
 
@@ -70,7 +71,11 @@
 	}
 
 	// Create token writer based on the options defined
-	tw := datok.NewTokenWriterFromOptions(os.Stdout, cli.Tokenize.Positions)
+	tw := datok.NewTokenWriterFromOptions(
+		os.Stdout,
+		cli.Tokenize.Positions,
+		cli.Tokenize.NewlineAfterEOT,
+	)
 
 	// Program is running in a pipe
 	fileInfo, _ := os.Stdin.Stat()
diff --git a/token_writer.go b/token_writer.go
index 9f4088a..32c5a99 100644
--- a/token_writer.go
+++ b/token_writer.go
@@ -35,7 +35,7 @@
 }
 
 // Create a new token writer based on the options
-func NewTokenWriterFromOptions(w io.Writer, positionFlag bool) *TokenWriter {
+func NewTokenWriterFromOptions(w io.Writer, positionFlag bool, newlineAfterEot bool) *TokenWriter {
 	writer := bufio.NewWriter(w)
 	posC := 0
 	pos := make([]int, 0, 200)
@@ -49,11 +49,15 @@
 			//   Store in []uint16
 			//   and write to string
 
+			// Accept newline after EOT
+			if newlineAfterEot && posC == 0 && buf[0] == '\n' && writer.Buffered() != 0 {
+				posC--
+			}
+
 			posC += offset
 			pos = append(pos, posC)
 			posC += len(buf) - offset
 			pos = append(pos, posC)
-			//		pos = append(pos, offset, len(buf)-offset)
 
 			writer.WriteString(string(buf[offset:]))
 			writer.WriteRune('\n')
@@ -70,7 +74,7 @@
 	}
 
 	if positionFlag {
-		tw.TextEnd = func(offset int) {
+		tw.TextEnd = func(_ int) {
 			writer.Flush()
 
 			writer.WriteString(strconv.Itoa(pos[0]))
diff --git a/token_writer_test.go b/token_writer_test.go
index 291c3b9..f7bd1f7 100644
--- a/token_writer_test.go
+++ b/token_writer_test.go
@@ -37,7 +37,7 @@
 	b := make([]byte, 0, 2048)
 	w := bytes.NewBuffer(b)
 
-	tws := NewTokenWriterFromOptions(w, true)
+	tws := NewTokenWriterFromOptions(w, true, false)
 
 	mat := LoadMatrixFile("testdata/tokenizer.matok")
 
@@ -56,4 +56,12 @@
 	matStr = w.String()
 	assert.Equal("This\n.\n\n1 5 5 6\nAnd\n.\n\n1 4 4 5\n", matStr)
 
+	// Accept newline after EOT
+	tws = NewTokenWriterFromOptions(w, true, true)
+
+	w.Reset()
+	mat.TransduceTokenWriter(strings.NewReader("\nThis.\x0a\x04\nAnd.\n\x04\n"), tws)
+
+	matStr = w.String()
+	assert.Equal("This\n.\n\n1 5 5 6\nAnd\n.\n\n0 3 3 4\n", matStr)
 }