Introduce EOT (end-of-transmission) marker
Change-Id: I7946e95c80fd7cd6ac1e0dd2fe5b188105f30534
diff --git a/matrix.go b/matrix.go
index 10680c3..9e130d5 100644
--- a/matrix.go
+++ b/matrix.go
@@ -11,6 +11,7 @@
const (
MAMAGIC = "MATOK"
+ EOT = 4
)
type MatrixTokenizer struct {
@@ -327,9 +328,15 @@
epsilonState := uint32(0)
epsilonOffset := 0
+ // TEMP
+ loopcounter := 0
+
// Remember if the last transition was epsilon
sentenceEnd := false
+ // Remember if a text end was already set
+ textEnd := false
+
buffer := make([]rune, 1024)
buffo := 0 // Buffer offset
buffi := 0 // Buffer length
@@ -341,6 +348,7 @@
var err error
eof := false
+ eot := false
newchar := true
PARSECHARM:
@@ -366,13 +374,18 @@
char = buffer[buffo]
if DEBUG {
- fmt.Println("Current char", string(char), showBuffer(buffer, buffo, buffi))
+ fmt.Println("Current char", string(char), int(char), showBuffer(buffer, buffo, buffi))
}
+ eot = false
+
// TODO:
// Better not repeatedly check for a!
// Possibly keep a buffer with a.
if int(char) < 256 {
+ if int(char) == EOT {
+ eot = true
+ }
a = mat.sigmaASCII[int(char)]
} else {
a, ok = mat.sigma[char]
@@ -447,6 +460,7 @@
}
newchar = false
+ eot = false
continue
}
@@ -475,9 +489,10 @@
w.Token(0, buffer[:buffo])
rewindBuffer = true
sentenceEnd = false
+ textEnd = false
} else {
sentenceEnd = true
- w.SentenceEnd()
+ w.SentenceEnd(0)
}
if DEBUG {
fmt.Println("-> Newline")
@@ -506,6 +521,15 @@
if DEBUG {
fmt.Println("Remaining:", showBuffer(buffer, buffo, buffi))
}
+
+ if eot {
+ eot = false
+ textEnd = true
+ w.TextEnd(0)
+ if DEBUG {
+ fmt.Println("END OF TEXT")
+ }
+ }
}
t &= ^FIRSTBIT
@@ -516,6 +540,11 @@
// Prevent endless epsilon loops!
}
+ if loopcounter > 100 {
+ return false
+ }
+ loopcounter++
+
// Input reader is not yet finished
if !eof {
if DEBUG {
@@ -528,7 +557,7 @@
fmt.Println("Entering final check")
}
- // Check epsilon transitions until a final state is reached
+ // Check epsilon transitions as long as possible
t0 = t
t = mat.array[(int(mat.epsilon)-1)*mat.stateCount+int(t0)]
a = mat.epsilon
@@ -552,11 +581,17 @@
// sentence split was reached. This may be controversial and therefore
// optional via parameter.
if !sentenceEnd {
- // writer.WriteRune('\n')
- // ::Sentenceend
- w.SentenceEnd()
+ w.SentenceEnd(0)
if DEBUG {
- fmt.Println("-> Newline")
+ fmt.Println("Sentence end")
+ }
+ }
+
+ if !textEnd {
+ w.TextEnd(0)
+
+ if DEBUG {
+ fmt.Println("Text end")
}
}