Improve offset handling in buffers

Change-Id: I1f66695a852fda1c1bd8fb1fdd418c5ecda54f66
diff --git a/datok.go b/datok.go
index 7b85809..45a9a19 100644
--- a/datok.go
+++ b/datok.go
@@ -724,6 +724,27 @@
 	return string(out)
 }
 
+// Show the current state of the buffer,
+// for testing puroses
+func showBufferNew(buffer []rune, bufft int, buffc int, buffi int) string {
+	out := make([]rune, 0, 1024)
+	for x := 0; x < len(buffer); x++ {
+		if buffi == x {
+			out = append(out, '^')
+		}
+		if bufft == x {
+			out = append(out, '|')
+		}
+		if buffc == x {
+			out = append(out, '[', buffer[x], ']')
+		} else {
+			out = append(out, buffer[x])
+		}
+	}
+	return string(out)
+}
+
+// Transduce input to ouutput
 func (dat *DaTokenizer) Transduce(r io.Reader, w io.Writer) bool {
 	return dat.TransduceTokenWriter(r, NewTokenWriterSimple(w))
 }
@@ -768,9 +789,13 @@
 	//   Store a translation buffer as well, so characters don't
 	//   have to be translated multiple times!
 	buffer := make([]rune, 1024)
-	buffo := 0 // Buffer offset
+	bufft := 0 // Buffer token offset
+	buffc := 0 // Buffer current symbol
 	buffi := 0 // Buffer length
 
+	// The buffer is organized as follows:
+	// [   t[....c..]..i]
+
 	reader := bufio.NewReader(r)
 	defer w.Flush()
 
@@ -786,7 +811,7 @@
 
 		if newchar {
 			// Get from reader if buffer is empty
-			if buffo >= buffi {
+			if buffc >= buffi {
 				if eof {
 					break
 				}
@@ -801,10 +826,10 @@
 				buffi++
 			}
 
-			char = buffer[buffo]
+			char = buffer[buffc]
 
 			if DEBUG {
-				fmt.Println("Current char", string(char), int(char), showBuffer(buffer, buffo, buffi))
+				fmt.Println("Current char", string(char), int(char), showBufferNew(buffer, bufft, buffc, buffi))
 			}
 
 			eot = false
@@ -835,10 +860,10 @@
 			if dat.array[dat.array[t0].getBase()+uint32(dat.epsilon)].getCheck() == t0 {
 				// Remember state for backtracking to last tokenend state
 				epsilonState = t0
-				epsilonOffset = buffo
+				epsilonOffset = buffc
 
 				if DEBUG {
-					fmt.Println("epsilonOffset is set to", buffo)
+					fmt.Println("epsilonOffset is set to", buffc)
 				}
 			}
 		}
@@ -876,11 +901,11 @@
 				// Try again with epsilon symbol, in case everything else failed
 				t0 = epsilonState
 				epsilonState = 0 // reset
-				buffo = epsilonOffset
+				buffc = epsilonOffset
 				a = dat.epsilon
 
 				if DEBUG {
-					fmt.Println("Get from epsilon stack and set buffo!", showBuffer(buffer, buffo, buffi))
+					fmt.Println("Get from epsilon stack and set buffo!", showBufferNew(buffer, bufft, buffc, buffi))
 				}
 
 			} else {
@@ -898,24 +923,25 @@
 		// Transition consumes a character
 		if a != dat.epsilon {
 
-			buffo++
+			buffc++
 
 			// Transition does not produce a character
-			if buffo == 1 && ta.isNonToken() {
+			if buffc-bufft == 1 && ta.isNonToken() {
 				if DEBUG {
-					fmt.Println("Nontoken forward", showBuffer(buffer, buffo, buffi))
+					fmt.Println("Nontoken forward", showBufferNew(buffer, bufft, buffc, buffi))
 				}
-				rewindBuffer = true
+				bufft++
+				// rewindBuffer = true
 			}
 
 		} else {
 
 			// Transition marks the end of a token - so flush the buffer
-			if buffo > 0 {
+			if buffc-bufft > 0 {
 				if DEBUG {
-					fmt.Println("-> Flush buffer: [", string(buffer[:buffo]), "]", showBuffer(buffer, buffo, buffi))
+					fmt.Println("-> Flush buffer: [", string(buffer[bufft:buffc]), "]", showBuffer(buffer, buffc, buffi))
 				}
-				w.Token(0, buffer[:buffo])
+				w.Token(0, buffer[bufft:buffc])
 				rewindBuffer = true
 				sentenceEnd = false
 				textEnd = false
@@ -929,31 +955,33 @@
 		if rewindBuffer {
 
 			if DEBUG {
-				fmt.Println("-> Rewind buffer", buffo, buffi, epsilonOffset)
+				fmt.Println("-> Rewind buffer", bufft, buffc, buffi, epsilonOffset)
 			}
 
 			// TODO: Better as a ring buffer
-			for x, i := range buffer[buffo:buffi] {
+			for x, i := range buffer[buffc:buffi] {
 				buffer[x] = i
 			}
 
-			buffi -= buffo
+			buffi -= buffc
 			// epsilonOffset -= buffo
 			epsilonOffset = 0
 			epsilonState = 0
 
-			buffo = 0
-			if DEBUG {
-				fmt.Println("Remaining:", showBuffer(buffer, buffo, buffi))
-			}
+			buffc = 0
+			bufft = 0
 
-			if eot {
-				eot = false
-				textEnd = true
-				w.TextEnd(0)
-				if DEBUG {
-					fmt.Println("END OF TEXT")
-				}
+			if DEBUG {
+				fmt.Println("Remaining:", showBufferNew(buffer, bufft, buffc, buffi))
+			}
+		}
+
+		if eot {
+			eot = false
+			textEnd = true
+			w.TextEnd(0)
+			if DEBUG {
+				fmt.Println("END OF TEXT")
 			}
 		}
 
@@ -1029,9 +1057,9 @@
 	} else if epsilonState != 0 {
 		t0 = epsilonState
 		epsilonState = 0 // reset
-		buffo = epsilonOffset
+		buffc = epsilonOffset
 		if DEBUG {
-			fmt.Println("Get from epsilon stack and set buffo!", showBuffer(buffer, buffo, buffi))
+			fmt.Println("Get from epsilon stack and set buffo!", showBufferNew(buffer, bufft, buffc, buffi))
 		}
 		goto PARSECHAR
 	}
diff --git a/datok_test.go b/datok_test.go
index 38eb474..6f2dc11 100644
--- a/datok_test.go
+++ b/datok_test.go
@@ -1038,8 +1038,13 @@
 //   BenchmarkToDoubleArray-4                   63663             17675 ns/op           10703 B/op         29 allocs/op
 //   BenchmarkToDoubleArrayLarger-4                16          83535733 ns/op         6357874 B/op       2577 allocs/op
 //   BenchmarkTransduceMatrix-4                 45362             25258 ns/op           12408 B/op          6 allocs/op
-// 2021-10-21 - Introduxe EOT
+// 2021-10-22 - Introduxe EOT
 //   BenchmarkDoubleArrayTransduce-4            43820             27661 ns/op           12408 B/op          6 allocs/op
 //   BenchmarkDoubleArrayConstruction-4         68259             16608 ns/op           10703 B/op         29 allocs/op
 //   BenchmarkDoubleArrayLarger-4                  16          69889532 ns/op         6357901 B/op       2578 allocs/op
 //   BenchmarkMatrixTransduce-4                 49426             25105 ns/op           12408 B/op          6 allocs/op
+// 2021-10-23 - Improve offset handling
+//   BenchmarkDoubleArrayTransduce-4            41890             29729 ns/op           12408 B/op          6 allocs/op
+//   BenchmarkDoubleArrayConstruction-4         74510             15879 ns/op           10703 B/op         29 allocs/op
+//   BenchmarkDoubleArrayLarger-4                  18          73752383 ns/op         6357956 B/op       2579 allocs/op
+//   BenchmarkMatrixTransduce-4                 46870             27140 ns/op           12408 B/op          6 allocs/op
diff --git a/matrix.go b/matrix.go
index 8c68959..98fc32c 100644
--- a/matrix.go
+++ b/matrix.go
@@ -313,10 +313,15 @@
 	return mat
 }
 
+// Transduce input to ouutput
 func (mat *MatrixTokenizer) Transduce(r io.Reader, w io.Writer) bool {
 	return mat.TransduceTokenWriter(r, NewTokenWriterSimple(w))
 }
 
+// TransduceTokenWriter transduces an input string against
+// the matrix FSA. The rules are always greedy. If the
+// automaton fails, it takes the last possible token ending
+// branch.
 func (mat *MatrixTokenizer) TransduceTokenWriter(r io.Reader, w TokenWriterI) bool {
 	var a int
 	var t0 uint32
@@ -335,9 +340,13 @@
 	textEnd := false
 
 	buffer := make([]rune, 1024)
-	buffo := 0 // Buffer offset
+	bufft := 0 // Buffer token offset
+	buffc := 0 // Buffer current symbol
 	buffi := 0 // Buffer length
 
+	// The buffer is organized as follows:
+	// [   t[....c..]..i]
+
 	reader := bufio.NewReader(r)
 	defer w.Flush()
 
@@ -353,7 +362,7 @@
 
 		if newchar {
 			// Get from reader if buffer is empty
-			if buffo >= buffi {
+			if buffc >= buffi {
 				if eof {
 					break
 				}
@@ -368,10 +377,10 @@
 				buffi++
 			}
 
-			char = buffer[buffo]
+			char = buffer[buffc]
 
 			if DEBUG {
-				fmt.Println("Current char", string(char), int(char), showBuffer(buffer, buffo, buffi))
+				fmt.Println("Current char", string(char), int(char), showBufferNew(buffer, bufft, buffc, buffi))
 			}
 
 			eot = false
@@ -408,10 +417,10 @@
 				// Just Remove
 				t0 &= ^FIRSTBIT
 				epsilonState = t0
-				epsilonOffset = buffo
+				epsilonOffset = buffc
 
 				if DEBUG {
-					fmt.Println("epsilonOffset is set to", buffo)
+					fmt.Println("epsilonOffset is set to", buffc)
 				}
 			}
 		}
@@ -445,11 +454,11 @@
 				// Try again with epsilon symbol, in case everything else failed
 				t0 = epsilonState
 				epsilonState = 0 // reset
-				buffo = epsilonOffset
+				buffc = epsilonOffset
 				a = mat.epsilon
 
 				if DEBUG {
-					fmt.Println("Get from epsilon stack and set buffo!", showBuffer(buffer, buffo, buffi))
+					fmt.Println("Get from epsilon stack and set buffo!", showBufferNew(buffer, bufft, buffc, buffi))
 				}
 
 			} else {
@@ -467,23 +476,24 @@
 		// Transition consumes a character
 		if a != mat.epsilon {
 
-			buffo++
+			buffc++
 
 			// Transition does not produce a character
-			if buffo == 1 && (t&FIRSTBIT) != 0 {
+			if buffc-bufft == 1 && (t&FIRSTBIT) != 0 {
 				if DEBUG {
-					fmt.Println("Nontoken forward", showBuffer(buffer, buffo, buffi))
+					fmt.Println("Nontoken forward", showBufferNew(buffer, bufft, buffc, buffi))
 				}
-				rewindBuffer = true
+				bufft++
+				// rewindBuffer = true
 			}
 
 		} else {
 			// Transition marks the end of a token - so flush the buffer
-			if buffo > 0 {
+			if buffc-bufft > 0 {
 				if DEBUG {
-					fmt.Println("-> Flush buffer: [", string(buffer[:buffo]), "]", showBuffer(buffer, buffo, buffi))
+					fmt.Println("-> Flush buffer: [", string(buffer[bufft:buffc]), "]", showBufferNew(buffer, bufft, buffc, buffi))
 				}
-				w.Token(0, buffer[:buffo])
+				w.Token(0, buffer[bufft:buffc])
 				rewindBuffer = true
 				sentenceEnd = false
 				textEnd = false
@@ -491,44 +501,41 @@
 				sentenceEnd = true
 				w.SentenceEnd(0)
 			}
-			if DEBUG {
-				fmt.Println("-> Newline")
-			}
-			// writer.WriteRune('\n')
 		}
 
 		// Rewind the buffer if necessary
 		if rewindBuffer {
 
 			if DEBUG {
-				fmt.Println("-> Rewind buffer", buffo, buffi, epsilonOffset)
+				fmt.Println("-> Rewind buffer", bufft, buffc, buffi, epsilonOffset)
 			}
 
 			// TODO: Better as a ring buffer
-			for x, i := range buffer[buffo:buffi] {
+			for x, i := range buffer[buffc:buffi] {
 				buffer[x] = i
 			}
 
-			buffi -= buffo
+			buffi -= buffc
 			// epsilonOffset -= buffo
 			epsilonOffset = 0
 			epsilonState = 0
 
-			buffo = 0
-			if DEBUG {
-				fmt.Println("Remaining:", showBuffer(buffer, buffo, buffi))
-			}
+			buffc = 0
+			bufft = 0
 
-			if eot {
-				eot = false
-				textEnd = true
-				w.TextEnd(0)
-				if DEBUG {
-					fmt.Println("END OF TEXT")
-				}
+			if DEBUG {
+				fmt.Println("Remaining:", showBufferNew(buffer, bufft, buffc, buffi))
 			}
 		}
 
+		if eot {
+			eot = false
+			textEnd = true
+			w.TextEnd(0)
+			if DEBUG {
+				fmt.Println("END OF TEXT")
+			}
+		}
 		t &= ^FIRSTBIT
 
 		newchar = true
@@ -562,9 +569,9 @@
 	} else if epsilonState != 0 {
 		t0 = epsilonState
 		epsilonState = 0 // reset
-		buffo = epsilonOffset
+		buffc = epsilonOffset
 		if DEBUG {
-			fmt.Println("Get from epsilon stack and set buffo!", showBuffer(buffer, buffo, buffi))
+			fmt.Println("Get from epsilon stack and set buffo!", showBufferNew(buffer, bufft, buffc, buffi))
 		}
 		goto PARSECHARM
 	}