Minor performance improvements
Change-Id: I6552b7dc082b97c28bc889a378208d0588da755b
diff --git a/matrix.go b/matrix.go
index eb88086..e2d9858 100644
--- a/matrix.go
+++ b/matrix.go
@@ -55,9 +55,13 @@
if num > auto.sigmaCount {
panic("sigmaCount is smaller")
}
- if num > max {
- max = num
- }
+
+ // Find max
+ // see https://dev.to/jobinrjohnson/branchless-programming-does-it-really-matter-20j4
+ max -= ((max - num) & ((max - num) >> 31))
+ // if num > max {
+ // max = num
+ // }
}
// Add final entry to the list (maybe not necessary actually)
@@ -137,9 +141,13 @@
max := 0
for sym, num := range mat.sigma {
sigmalist[num] = sym
- if num > max {
- max = num
- }
+
+ // Find max
+ // see https://dev.to/jobinrjohnson/branchless-programming-does-it-really-matter-20j4
+ max -= ((max - num) & ((max - num) >> 31))
+ // if num > max {
+ // max = num
+ // }
}
// Add final entry to the list (maybe not necessary actually)
@@ -411,9 +419,7 @@
// Better not repeatedly check for a!
// Possibly keep a buffer with a.
if int(char) < 256 {
- if int(char) == EOT {
- eot = true
- }
+ eot = int(char) == EOT
// mat.SigmaASCII[] is initialized with mat.identity
a = mat.sigmaASCII[int(char)]
@@ -513,6 +519,7 @@
break
}
}
+ // This will hopefully be branchless by the compiler
if DEBUG {
log.Println("-> Flush buffer: [", string(buffer[bufft:buffc]), "]", showBufferNew(buffer, bufft, buffc, buffi))
@@ -527,9 +534,7 @@
log.Println("-> Rewind buffer", bufft, buffc, buffi, epsilonOffset)
}
- for x, i := range buffer[buffc:buffi] {
- buffer[x] = i
- }
+ copy(buffer[0:], buffer[buffc:buffi])
buffi -= buffc
epsilonState = 0
@@ -575,6 +580,7 @@
buffc++
// Transition does not produce a character
+ // Hopefully generated branchless code
if buffc-bufft == 1 && (t&FIRSTBIT) != 0 {
if DEBUG {
log.Println("Nontoken forward", showBufferNew(buffer, bufft, buffc, buffi))
@@ -601,10 +607,7 @@
log.Println("-> Rewind buffer", bufft, buffc, buffi, epsilonOffset)
}
- // buffer = buffer[buffc:]
- for x, i := range buffer[buffc:buffi] {
- buffer[x] = i
- }
+ copy(buffer[0:], buffer[buffc:buffi])
buffi -= buffc
// epsilonOffset -= buffo