Add skip-method proposed by Morita et al. (2001)
diff --git a/datokenizer.go b/datokenizer.go
index a213b33..3e01396 100644
--- a/datokenizer.go
+++ b/datokenizer.go
@@ -30,6 +30,7 @@
"encoding/binary"
"fmt"
"io"
+ "math"
"os"
"sort"
"strconv"
@@ -573,6 +574,7 @@
// Set base to the first free slot in the double array
base = dat.xCheck(A)
+ // base = dat.xCheckSkip(A)
// base = dat.xCheckNiu(A, &block_begin_pos)
dat.array[t].setBase(base)
@@ -771,6 +773,25 @@
return base
}
+// This is an implementation of xCheck with the skip-improvement
+// proposed by Morita et al. (2001)
+func (dat *DaTokenizer) xCheckSkip(symbols []int) uint32 {
+
+ // Start at the first entry of the double array list
+ base := uint32(math.Abs(float64(dat.maxSize-1) * .9))
+
+OVERLAP:
+ // Resize the array if necessary
+ dat.resize(int(base) + dat.final)
+ for _, a := range symbols {
+ if dat.array[int(base)+a].getCheck() != 0 {
+ base++
+ goto OVERLAP
+ }
+ }
+ return base
+}
+
// This is an implementation of xCheck wit an improvement
// proposed by Niu et al. (2013)
func (dat *DaTokenizer) xCheckNiu(symbols []int, block_begin_pos *uint32) uint32 {