Change exit operations to returning nil
diff --git a/datokenizer.go b/datokenizer.go
index 066fe73..3c6310b 100644
--- a/datokenizer.go
+++ b/datokenizer.go
@@ -15,13 +15,13 @@
// TODO:
// - replace maxSize with the check value
-// - Strip first state and make everything start with 0!
// - Add checksum to serialization.
// - Mark epsilon transitions in bytes
// - Introduce methods on BC array entries instead of
// jumping into the entries all the time!
// - Instead of memoizing the loadFactor, better remember
// the number of set transitions
+// - Replace table with a map
import (
"bufio"
@@ -107,14 +107,14 @@
f, err := os.Open(file)
if err != nil {
log.Error().Err(err)
- os.Exit(0)
+ return nil
}
defer f.Close()
gz, err := gzip.NewReader(f)
if err != nil {
log.Error().Err(err)
- os.Exit(0)
+ return nil
}
defer gz.Close()
@@ -153,7 +153,7 @@
break
}
log.Error().Err(err)
- os.Exit(0)
+ return nil
}
// Read parser mode for the following lines
@@ -206,25 +206,25 @@
*/
if elem[6] != "1" {
log.Error().Msg("The FST needs to be deterministic")
- os.Exit(1)
+ return nil
}
if elem[9] != "1" {
log.Error().Msg("The FST needs to be epsilon free")
- os.Exit(1)
+ return nil
}
elemint[0], err = strconv.Atoi(elem[1])
if err != nil {
log.Error().Msg("Can't read arccount")
- os.Exit(1)
+ return nil
}
tok.arcCount = elemint[0]
elemint[0], err = strconv.Atoi(elem[2])
if err != nil {
log.Error().Msg("Can't read statecount")
- os.Exit(1)
+ return nil
}
// States start at 1 in Mizobuchi et al (2000),
@@ -342,12 +342,12 @@
":" +
string(tok.sigmaRev[outSym]) +
")")
- os.Exit(1)
+ return nil
}
} else if inSym == tok.epsilon {
log.Error().Msg("General epsilon transitions are not supported")
- os.Exit(1)
+ return nil
}
// Create an edge based on the collected information
@@ -408,7 +408,7 @@
if err != nil {
log.Error().Err(err)
- os.Exit(0)
+ return nil
}
tok.sigmaCount = number
@@ -444,7 +444,7 @@
default:
{
log.Error().Msg("MCS not supported: " + line)
- os.Exit(1)
+ return nil
}
}
continue
@@ -453,11 +453,11 @@
line, err = r.ReadString('\n')
if err != nil {
log.Error().Err(err)
- os.Exit(0)
+ return nil
}
if len(line) != 1 {
log.Error().Msg("MCS not supported:" + line)
- os.Exit(0)
+ return nil
}
symbol = rune('\n')
}
@@ -888,14 +888,14 @@
f, err := os.Open(file)
if err != nil {
log.Error().Err(err)
- os.Exit(0)
+ return nil
}
defer f.Close()
gz, err := gzip.NewReader(f)
if err != nil {
log.Error().Err(err)
- os.Exit(0)
+ return nil
}
defer gz.Close()
@@ -1027,8 +1027,9 @@
// FSA. The rules are always greedy. If the automaton fails,
// it takes the last possible token ending branch.
//
-// Based on Match with additional support
-// for NONTOKEN and TOKENEND handling
+// Based on Mizobuchi et al (2000), p. 129,
+// with additional support for IDENTITY, UNKNOWN
+// and EPSILON transitions and NONTOKEN and TOKENEND handling.
func (dat *DaTokenizer) Transduce(r io.Reader, w io.Writer) bool {
var a int
var t0 uint32