Serialize and deserialize matrix representation
diff --git a/datok.go b/datok.go
index 4f7ac6f..7812981 100644
--- a/datok.go
+++ b/datok.go
@@ -39,7 +39,7 @@
 
 const (
 	DEBUG            = false
-	MAGIC            = "DATOK"
+	DAMAGIC          = "DATOK"
 	VERSION          = uint16(1)
 	FIRSTBIT  uint32 = 1 << 31
 	SECONDBIT uint32 = 1 << 30
@@ -489,7 +489,7 @@
 	defer wb.Flush()
 
 	// Store magical header
-	all, err := wb.Write([]byte(MAGIC))
+	all, err := wb.Write([]byte(DAMAGIC))
 	if err != nil {
 		log.Println(err)
 		return int64(all), err
@@ -614,7 +614,7 @@
 	r := bufio.NewReader(ior)
 
 	buf := make([]byte, 1024)
-	buf = buf[0:len(MAGIC)]
+	buf = buf[0:len(DAMAGIC)]
 
 	_, err := r.Read(buf)
 
@@ -623,7 +623,7 @@
 		return nil
 	}
 
-	if string(MAGIC) != string(buf) {
+	if string(DAMAGIC) != string(buf) {
 		log.Println("Not a datok file")
 		return nil
 	}
@@ -907,7 +907,8 @@
 			}
 
 			buffi -= buffo
-			epsilonOffset -= buffo
+			// epsilonOffset -= buffo
+			epsilonOffset = buffo
 			buffo = 0
 			if DEBUG {
 				fmt.Println("Remaining:", showBuffer(buffer, buffo, buffi))
diff --git a/matrix.go b/matrix.go
index 356efa9..4ce82c9 100644
--- a/matrix.go
+++ b/matrix.go
@@ -2,22 +2,29 @@
 
 import (
 	"bufio"
+	"compress/gzip"
 	"fmt"
 	"io"
+	"log"
+	"os"
+)
+
+const (
+	MAMAGIC = "MATOK"
 )
 
 type MatrixTokenizer struct {
 	sigma      map[rune]int
 	sigmaASCII [256]int
-	array      []int
+	array      []uint32
 	stateCount int
 
 	// Special symbols in sigma
 	epsilon  int
 	unknown  int
 	identity int
-	final    int
-	tokenend int
+	// final    int
+	// tokenend int
 }
 
 // ToMatrix turns the intermediate tokenizer into a
@@ -25,16 +32,16 @@
 func (auto *Automaton) ToMatrix() *MatrixTokenizer {
 
 	mat := &MatrixTokenizer{
-		sigma:      make(map[rune]int),
-		final:      auto.final,
-		unknown:    auto.unknown,
-		identity:   auto.identity,
-		epsilon:    auto.epsilon,
-		tokenend:   auto.tokenend,
+		sigma: make(map[rune]int),
+		// final:      auto.final,
+		unknown:  auto.unknown,
+		identity: auto.identity,
+		epsilon:  auto.epsilon,
+		// tokenend:   auto.tokenend,
 		stateCount: auto.stateCount,
 	}
 
-	mat.array = make([]int, (auto.stateCount+1)*(auto.sigmaCount+1))
+	mat.array = make([]uint32, (auto.stateCount+1)*(auto.sigmaCount))
 
 	for num, sym := range auto.sigmaRev {
 		if int(sym) < 256 {
@@ -48,9 +55,9 @@
 	remember := make([]bool, auto.stateCount+2)
 
 	// Store all transitions in matrix
-	var toMatrix func([]int, int)
+	var toMatrix func([]uint32, int)
 
-	toMatrix = func(matrix []int, start int) {
+	toMatrix = func(matrix []uint32, start int) {
 		if start > auto.stateCount {
 			panic("stateCount is smaller")
 		}
@@ -59,11 +66,11 @@
 		}
 		remember[start] = true
 		for alpha, t := range auto.transitions[start] {
-			matrix[(alpha-1)*auto.stateCount+start] = t.end
+			matrix[(alpha-1)*auto.stateCount+start] = uint32(t.end)
 
 			// Mark nontoken transitions
 			if t.nontoken {
-				matrix[(alpha-1)*auto.stateCount+start] *= -1
+				matrix[(alpha-1)*auto.stateCount+start] |= FIRSTBIT
 			}
 
 			toMatrix(matrix, t.end)
@@ -75,15 +82,258 @@
 	return mat
 }
 
+// Save stores the matrix data in a file
+func (mat *MatrixTokenizer) Save(file string) (n int64, err error) {
+	f, err := os.Create(file)
+	if err != nil {
+		log.Println(err)
+		return 0, err
+	}
+	defer f.Close()
+	gz := gzip.NewWriter(f)
+	defer gz.Close()
+	n, err = mat.WriteTo(gz)
+	if err != nil {
+		log.Println(err)
+		return n, err
+	}
+	gz.Flush()
+	return n, nil
+}
+
+// WriteTo stores the matrix data in an io.Writer.
+func (mat *MatrixTokenizer) WriteTo(w io.Writer) (n int64, err error) {
+
+	wb := bufio.NewWriter(w)
+	defer wb.Flush()
+
+	// Store magical header
+	all, err := wb.Write([]byte(MAMAGIC))
+	if err != nil {
+		log.Println(err)
+		return int64(all), err
+	}
+
+	// Get sigma as a list
+	sigmalist := make([]rune, len(mat.sigma)+12)
+	max := 0
+	for sym, num := range mat.sigma {
+		sigmalist[num] = sym
+		if num > max {
+			max = num
+		}
+	}
+
+	sigmalist = sigmalist[:max+1]
+
+	buf := make([]byte, 0, 12)
+	bo.PutUint16(buf[0:2], VERSION)
+	bo.PutUint16(buf[2:4], uint16(mat.epsilon))
+	bo.PutUint16(buf[4:6], uint16(mat.unknown))
+	bo.PutUint16(buf[6:8], uint16(mat.identity))
+	bo.PutUint16(buf[8:10], uint16(mat.stateCount))
+	bo.PutUint16(buf[10:12], uint16(len(sigmalist)))
+	// bo.PutUint32(buf[12:16], uint32(len(mat.array)*2)) // Legacy support
+	more, err := wb.Write(buf[0:12])
+	if err != nil {
+		log.Println(err)
+		return int64(all), err
+	}
+
+	all += more
+
+	// Write sigma
+	for _, sym := range sigmalist {
+
+		more, err = wb.WriteRune(sym)
+		if err != nil {
+			log.Println(err)
+			return int64(all), err
+		}
+		all += more
+	}
+
+	if err != nil {
+		log.Println(err)
+		return int64(all), err
+	}
+
+	// Test marker - could be checksum
+	more, err = wb.Write([]byte("M"))
+	if err != nil {
+		log.Println(err)
+		return int64(all), err
+	}
+	all += more
+
+	// for x := 0; x < len(dat.array); x++ {
+	for _, x := range mat.array {
+		bo.PutUint32(buf[0:4], uint32(x))
+		more, err = wb.Write(buf[0:4])
+		if err != nil {
+			log.Println(err)
+			return int64(all), err
+		}
+		all += more
+		if more != 4 {
+			log.Println("Can not write base uint32")
+			return int64(all), err
+		}
+		/*
+			bo.PutUint32(buf[0:4], bc.check)
+			more, err = wb.Write(buf[0:4])
+			if err != nil {
+				log.Println(err)
+				return int64(all), err
+			}
+			all += more
+			if more != 4 {
+				log.Println("Can not write check uint32")
+				return int64(all), err
+			}
+		*/
+	}
+
+	return int64(all), err
+}
+
+// LoadDatokFile reads a double array represented tokenizer
+// from a file.
+func LoadMatrixFile(file string) *MatrixTokenizer {
+	f, err := os.Open(file)
+	if err != nil {
+		log.Println(err)
+		return nil
+	}
+	defer f.Close()
+
+	gz, err := gzip.NewReader(f)
+	if err != nil {
+		log.Println(err)
+		return nil
+	}
+	defer gz.Close()
+
+	// Todo: Read the whole file!
+	return ParseMatrix(gz)
+}
+
+// LoadMatrixFile reads a matrix represented tokenizer
+// from an io.Reader
+func ParseMatrix(ior io.Reader) *MatrixTokenizer {
+
+	// Initialize tokenizer with default values
+	mat := &MatrixTokenizer{
+		sigma:    make(map[rune]int),
+		epsilon:  0,
+		unknown:  0,
+		identity: 0,
+		// final:      0,
+		stateCount: 0,
+		// transCount: 0,
+	}
+
+	r := bufio.NewReader(ior)
+
+	buf := make([]byte, 1024)
+	buf = buf[0:len(MAMAGIC)]
+
+	_, err := r.Read(buf)
+
+	if err != nil {
+		log.Println(err)
+		return nil
+	}
+
+	if string(MAMAGIC) != string(buf) {
+		log.Println("Not a matok file")
+		return nil
+	}
+
+	more, err := io.ReadFull(r, buf[0:12])
+	if err != nil {
+		log.Println(err)
+		return nil
+	}
+
+	if more != 12 {
+		log.Println("Read bytes do not fit")
+		return nil
+	}
+
+	version := bo.Uint16(buf[0:2])
+
+	if version != VERSION {
+		log.Println("Version not compatible")
+		return nil
+	}
+
+	mat.epsilon = int(bo.Uint16(buf[2:4]))
+	mat.unknown = int(bo.Uint16(buf[4:6]))
+	mat.identity = int(bo.Uint16(buf[6:8]))
+	mat.stateCount = int(bo.Uint16(buf[8:10]))
+
+	sigmaCount := int(bo.Uint16(buf[10:12]))
+	arraySize := (mat.stateCount + 1) * (sigmaCount + 1)
+	// int(bo.Uint32(buf[12:16]))
+
+	// Shouldn't be relevant though
+	// mat.maxSize = arraySize - 1
+
+	for x := 0; x < sigmaCount; x++ {
+		sym, _, err := r.ReadRune()
+		if err == nil && sym != 0 {
+			if int(sym) < 256 {
+				mat.sigmaASCII[int(sym)] = x
+			}
+			mat.sigma[sym] = x
+		}
+	}
+
+	_, err = io.ReadFull(r, buf[0:1])
+
+	if err != nil {
+		log.Print(err)
+		return nil
+	}
+
+	if string("M") != string(buf[0:1]) {
+		log.Println("Not a matok file")
+		return nil
+	}
+
+	// Read based on length
+	mat.array = make([]uint32, arraySize)
+
+	dataArray, err := io.ReadAll(r)
+
+	if err == io.EOF {
+		log.Println(err)
+		return nil
+	}
+
+	if len(dataArray) < arraySize*4 {
+		log.Println("Not enough bytes read", len(dataArray), arraySize)
+		return nil
+	}
+
+	for x := 0; x < arraySize; x++ {
+		//		mat.array[x] = bo.Uint32(dataArray[x*8 : (x*8)+4])
+		mat.array[x] = bo.Uint32(dataArray[x*4 : (x*4)+4])
+	}
+
+	return mat
+}
+
 func (mat *MatrixTokenizer) Transduce(r io.Reader, w io.Writer) bool {
 	var a int
-	var t0 int
-	t := int(1) // Initial state
+	var t0 uint32
+	t := uint32(1) // Initial state
 	var ok, rewindBuffer bool
 
 	// Remember the last position of a possible tokenend,
 	// in case the automaton fails.
-	epsilonState := int(0)
+	epsilonState := uint32(0)
 	epsilonOffset := 0
 
 	// Remember if the last transition was epsilon
@@ -150,10 +400,24 @@
 
 			// Check for epsilon transitions and remember
 
-			if mat.array[(mat.epsilon-1)*mat.stateCount+t0] != 0 {
+			// TODO: Can t0 be negative here?
+			if mat.array[(mat.epsilon-1)*mat.stateCount+int(t0)] != 0 {
 				// Remember state for backtracking to last tokenend state
+
+				// Maybe not necessary - and should be simpler!
+				// Just Remove
+				t0 &= ^FIRSTBIT
+				/*
+					if (t0 & FIRSTBIT) != 0 {
+						t0 ^= FIRSTBIT
+					}
+				*/
 				epsilonState = t0
 				epsilonOffset = buffo
+
+				if DEBUG {
+					fmt.Println("epsilonOffset is set to", buffo)
+				}
 			}
 		}
 
@@ -219,7 +483,7 @@
 
 			// Transition does not produce a character
 			// if buffo == 1 && ta.isNonToken() {
-			if buffo == 1 && t < 0 {
+			if buffo == 1 && (t&FIRSTBIT) != 0 {
 				if DEBUG {
 					fmt.Println("Nontoken forward", showBuffer(buffer, buffo, buffi))
 				}
@@ -248,13 +512,20 @@
 		// Rewind the buffer if necessary
 		if rewindBuffer {
 
+			if DEBUG {
+				fmt.Println("-> Rewind buffer", buffo, buffi, epsilonOffset)
+			}
+
 			// TODO: Better as a ring buffer
 			for x, i := range buffer[buffo:buffi] {
 				buffer[x] = i
 			}
 
 			buffi -= buffo
-			epsilonOffset -= buffo
+			// epsilonOffset -= buffo
+			epsilonOffset = 0
+			epsilonState = 0
+
 			buffo = 0
 			if DEBUG {
 				fmt.Println("Remaining:", showBuffer(buffer, buffo, buffi))
@@ -274,9 +545,12 @@
 		*/
 
 		// Ignore nontoken mark
-		if t < 0 {
-			t *= -1
-		}
+		/*
+			if t < 0 {
+				t *= -1
+			}
+		*/
+		t &= ^FIRSTBIT
 
 		newchar = true
 
@@ -340,7 +614,7 @@
 	newchar = false
 	// if dat.array[t].getCheck() == t0 {
 	// t can't be < 0
-	if t > 0 {
+	if t != 0 {
 		// Remember state for backtracking to last tokenend state
 		goto PARSECHARM
 
diff --git a/matrix_test.go b/matrix_test.go
index 49a1523..37a61b2 100644
--- a/matrix_test.go
+++ b/matrix_test.go
@@ -55,6 +55,36 @@
 	assert.Equal(6, len(tokens))
 }
 
+func TestReadWriteMatrixTokenizer(t *testing.T) {
+	assert := assert.New(t)
+	foma := LoadFomaFile("testdata/simpletok.fst")
+	assert.NotNil(foma)
+
+	mat := foma.ToMatrix()
+	assert.NotNil(foma)
+
+	assert.True(tmatch(mat, "bau"))
+	assert.True(tmatch(mat, "bad"))
+	assert.True(tmatch(mat, "wald gehen"))
+	b := make([]byte, 0, 1024)
+	buf := bytes.NewBuffer(b)
+	n, err := mat.WriteTo(buf)
+	assert.Nil(err)
+	assert.Equal(int64(248), n)
+	mat2 := ParseMatrix(buf)
+	assert.NotNil(mat2)
+	assert.Equal(mat.sigma, mat2.sigma)
+	assert.Equal(mat.epsilon, mat2.epsilon)
+	assert.Equal(mat.unknown, mat2.unknown)
+	assert.Equal(mat.identity, mat2.identity)
+	assert.Equal(mat.stateCount, mat2.stateCount)
+	assert.Equal(len(mat.array), len(mat2.array))
+	assert.Equal(mat.array, mat2.array)
+	assert.True(tmatch(mat2, "bau"))
+	assert.True(tmatch(mat2, "bad"))
+	assert.True(tmatch(mat2, "wald gehen"))
+}
+
 func TestFullTokenizerMatrixSentenceSplitter(t *testing.T) {
 	assert := assert.New(t)
 	foma := LoadFomaFile("testdata/tokenizer.fst")