Optimize loading of datok files
diff --git a/datokenizer.go b/datokenizer.go
index 270b61e..f1f8b0c 100644
--- a/datokenizer.go
+++ b/datokenizer.go
@@ -987,22 +987,20 @@
// Read based on length
dat.array = make([]uint32, arraySize)
- for x := 0; x < arraySize; x++ {
- more, err = io.ReadFull(r, buf[0:4])
- if err != nil {
- if err == io.EOF {
- fmt.Println(arraySize, x)
- break
- }
- log.Error().Err(err)
- return nil
- }
- if more != 4 {
- log.Error().Msg("Not enough bytes read")
- return nil
- }
+ dataArray, err := io.ReadAll(r)
- dat.array[x] = bo.Uint32(buf[0:4])
+ if err == io.EOF {
+ log.Error().Err(err)
+ return nil
+ }
+
+ if len(dataArray) < arraySize*4 {
+ log.Error().Msg("Not enough bytes read")
+ return nil
+ }
+
+ for x := 0; x < arraySize; x++ {
+ dat.array[x] = bo.Uint32(dataArray[x*4 : (x*4)+4])
}
return dat
diff --git a/datokenizer_test.go b/datokenizer_test.go
index e7e37d5..ade17cf 100644
--- a/datokenizer_test.go
+++ b/datokenizer_test.go
@@ -847,9 +847,23 @@
os.Exit(1)
}
}
- // 2021-08-11 (go 1.16)
- // go test -bench=. -test.benchmem
- // BenchmarkTransduce-4 19069 60609 ns/op 11048 B/op 137 allocs/op
- // 2021-08-112 (go 1.16)
- // BenchmarkTransduce-4 20833 55241 ns/op 9676 B/op 3 allocs/op
}
+
+func BenchmarkLoadDatokFile(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ dat := LoadDatokFile("testdata/tokenizer.datok")
+ if dat == nil {
+ fmt.Println("Fail!")
+ os.Exit(1)
+ }
+ }
+}
+
+// 2021-08-11 (go 1.16)
+// go test -bench=. -test.benchmem
+// BenchmarkTransduce-4 19069 60609 ns/op 11048 B/op 137 allocs/op
+// 2021-08-112 (go 1.16)
+// BenchmarkTransduce-4 20833 55241 ns/op 9676 B/op 3 allocs/op
+// BenchmarkLoadDatokFile-4 4 258418169 ns/op 29916470 B/op 5697 allocs/op
+// BenchmarkTransduce-4 19430 58133 ns/op 18696 B/op 3 allocs/op
+// BenchmarkLoadDatokFile-4 8 139071939 ns/op 203158377 B/op 5742 allocs/op