Fix buffer flushing to work with tei2korapxml
Change-Id: I54eef64bea40f4ebb528e7f9870b9277000a4c13
diff --git a/cmd/datok.go b/cmd/datok.go
index e2a4efb..1c9d99b 100644
--- a/cmd/datok.go
+++ b/cmd/datok.go
@@ -16,7 +16,7 @@
Foma string `kong:"required,short='i',help='The Foma FST file'"`
Tokenizer string `kong:"required,short='o',help='The Tokenizer file'"`
DoubleArray bool `kong:"optional,short='d',help='Convert to Double Array instead of Matrix representation'"`
- } `kong:"cmd, help='Convert a foma file to a Matrix or Double Array tokenizer'"`
+ } `kong:"cmd, help='Convert a compiled foma FST file to a Matrix or Double Array tokenizer'"`
Tokenize struct {
Tokenizer string `kong:"required,short='t',help='The Matrix or Double Array Tokenizer file'"`
Input string `kong:"required,arg='',type='existingfile',help='Input file to tokenize (use - for STDIN)'"`
@@ -98,6 +98,7 @@
// Create token writer based on the options defined
tw := datok.NewTokenWriter(os.Stdout, flags)
+ defer os.Stdout.Close()
var r io.Reader
@@ -106,6 +107,7 @@
fileInfo, _ := os.Stdin.Stat()
if fileInfo.Mode()&os.ModeCharDevice == 0 {
r = os.Stdin
+ defer os.Stdin.Close()
} else {
log.Fatalln("Unable to read from STDIN")
os.Exit(1)
diff --git a/matrix.go b/matrix.go
index 44b2180..ecb22be 100644
--- a/matrix.go
+++ b/matrix.go
@@ -369,9 +369,16 @@
// No more runes to read
if err != nil {
- eof = true
- break
+ if err == io.EOF {
+ eof = true
+ break
+ }
+
+ log.Fatalln(err)
+ os.Exit(1)
+ return false
}
+
buffer[buffi] = char
buffi++
}
diff --git a/token_writer.go b/token_writer.go
index bccb1bd..39d97fa 100644
--- a/token_writer.go
+++ b/token_writer.go
@@ -106,6 +106,7 @@
} else if flags&SENTENCES != 0 {
tw.SentenceEnd = func(_ int) {
writer.WriteByte('\n')
+ writer.Flush()
}
// Ignore sentence boundaries
@@ -116,7 +117,6 @@
// Write token or sentence positions
if flags&(TOKEN_POS|SENTENCE_POS) != 0 {
tw.TextEnd = func(_ int) {
- writer.Flush()
// Write token positions
if flags&TOKEN_POS != 0 {
@@ -140,6 +140,8 @@
sentB = true
}
+ writer.Flush()
+
posC = 0
pos = pos[:0]
}