| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 1 | package datokenizer | 
|  | 2 |  | 
|  | 3 | /** | 
|  | 4 | * The file reader is basically a port of foma2js, | 
|  | 5 | * licensed under the Apache License, version 2, | 
|  | 6 | * and written by Mans Hulden. | 
|  | 7 | */ | 
|  | 8 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 9 | // TODO: | 
|  | 10 | // - replace maxSize with the check value | 
|  | 11 | // - Strip first state and make everything start with 0! | 
|  | 12 | // - Serialize! | 
|  | 13 | // - Split Tokenizer and DATokenizer | 
|  | 14 |  | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 15 | import ( | 
|  | 16 | "bufio" | 
|  | 17 | "compress/gzip" | 
|  | 18 | "fmt" | 
|  | 19 | "io" | 
|  | 20 | "os" | 
| Akron | c9d84a6 | 2021-08-03 15:56:03 +0200 | [diff] [blame] | 21 | "sort" | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 22 | "strconv" | 
|  | 23 | "strings" | 
|  | 24 | "unicode/utf8" | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 25 |  | 
|  | 26 | "github.com/rs/zerolog/log" | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 27 | ) | 
|  | 28 |  | 
|  | 29 | const ( | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 30 | PROPS   = 1 | 
|  | 31 | SIGMA   = 2 | 
|  | 32 | STATES  = 3 | 
|  | 33 | NONE    = 4 | 
|  | 34 | NEWLINE = '\u000a' | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 35 | DEBUG   = false | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 36 | ) | 
|  | 37 |  | 
|  | 38 | // Special symbols in sigma | 
| Akron | 730a79c | 2021-08-03 11:05:29 +0200 | [diff] [blame] | 39 | var EPSILON = -1 | 
|  | 40 | var UNKNOWN = -1 | 
|  | 41 | var IDENTITY = -1 | 
|  | 42 | var FINAL = -1 | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 43 |  | 
|  | 44 | type mapping struct { | 
|  | 45 | source int | 
|  | 46 | target int | 
|  | 47 | } | 
|  | 48 |  | 
|  | 49 | type edge struct { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 50 | inSym  int | 
|  | 51 | outSym int | 
|  | 52 | end    int | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 53 | } | 
|  | 54 |  | 
|  | 55 | type Tokenizer struct { | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 56 | // sigma       map[rune]int | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 57 | sigmaRev    map[int]rune | 
|  | 58 | arcCount    int | 
|  | 59 | stateCount  int | 
|  | 60 | sigmaCount  int | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 61 | transitions []map[int]*edge | 
|  | 62 | } | 
|  | 63 |  | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 64 | type DaTokenizer struct { | 
|  | 65 | sigma map[rune]int | 
|  | 66 | // sigmaRev map[int]rune | 
|  | 67 | maxSize int | 
|  | 68 | array   []int | 
|  | 69 | } | 
|  | 70 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 71 | func ParseFile(file string) *Tokenizer { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 72 | f, err := os.Open(file) | 
|  | 73 | if err != nil { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 74 | log.Error().Err(err) | 
|  | 75 | os.Exit(0) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 76 | } | 
|  | 77 | defer f.Close() | 
|  | 78 |  | 
|  | 79 | gz, err := gzip.NewReader(f) | 
|  | 80 | if err != nil { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 81 | log.Error().Err(err) | 
|  | 82 | os.Exit(0) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 83 | } | 
|  | 84 | defer gz.Close() | 
|  | 85 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 86 | return Parse(gz) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 87 | } | 
|  | 88 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 89 | func Parse(ior io.Reader) *Tokenizer { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 90 | r := bufio.NewReader(ior) | 
|  | 91 |  | 
|  | 92 | tok := &Tokenizer{ | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 93 | // sigma:    make(map[rune]int), | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 94 | sigmaRev: make(map[int]rune), | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 95 | } | 
|  | 96 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 97 | var state, inSym, outSym, end, final int | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 98 |  | 
|  | 99 | mode := 0 | 
|  | 100 | var elem []string | 
|  | 101 | var elemint [5]int | 
|  | 102 |  | 
|  | 103 | for { | 
|  | 104 | line, err := r.ReadString('\n') | 
|  | 105 | if err != nil { | 
|  | 106 | if err == io.EOF { | 
|  | 107 | break | 
|  | 108 | } | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 109 | log.Error().Err(err) | 
|  | 110 | os.Exit(0) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 111 | } | 
|  | 112 | if strings.HasPrefix(line, "##foma-net") { | 
|  | 113 | continue | 
|  | 114 | } | 
|  | 115 | if strings.HasPrefix(line, "##props##") { | 
|  | 116 | mode = PROPS | 
|  | 117 | continue | 
|  | 118 | } | 
|  | 119 | if strings.HasPrefix(line, "##states##") { | 
|  | 120 | mode = STATES | 
|  | 121 |  | 
|  | 122 | // Adds a final transition symbol to sigma | 
|  | 123 | // written as '#' in Mizobuchi et al (2000) | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 124 | tok.sigmaCount++ | 
|  | 125 | FINAL = tok.sigmaCount | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 126 | continue | 
|  | 127 | } | 
|  | 128 | if strings.HasPrefix(line, "##sigma##") { | 
|  | 129 | mode = SIGMA | 
|  | 130 | continue | 
|  | 131 | } | 
|  | 132 | if strings.HasPrefix(line, "##end##") { | 
|  | 133 | mode = NONE | 
|  | 134 | continue | 
|  | 135 | } | 
|  | 136 |  | 
|  | 137 | switch mode { | 
|  | 138 | case PROPS: | 
|  | 139 | { | 
|  | 140 | elem = strings.Split(line, " ") | 
|  | 141 | /* | 
|  | 142 | fmt.Println("arity:            " + elem[0]) | 
|  | 143 | fmt.Println("arccount:         " + elem[1]) | 
|  | 144 | fmt.Println("statecount:       " + elem[2]) | 
|  | 145 | fmt.Println("linecount:        " + elem[3]) | 
|  | 146 | fmt.Println("finalcount:       " + elem[4]) | 
|  | 147 | fmt.Println("pathcount:        " + elem[5]) | 
|  | 148 | fmt.Println("is_deterministic: " + elem[6]) | 
|  | 149 | fmt.Println("is_pruned:        " + elem[7]) | 
|  | 150 | fmt.Println("is_minimized:     " + elem[8]) | 
|  | 151 | fmt.Println("is_epsilon_free:  " + elem[9]) | 
|  | 152 | fmt.Println("is_loop_free:     " + elem[10]) | 
|  | 153 | fmt.Println("extras:           " + elem[11]) | 
|  | 154 | fmt.Println("name:             " + elem[12]) | 
|  | 155 | */ | 
|  | 156 | if elem[6] != "1" { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 157 | log.Error().Msg("The FST needs to be deterministic") | 
|  | 158 | os.Exit(1) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 159 | } | 
|  | 160 | if elem[9] != "1" { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 161 | log.Error().Msg("The FST needs to be epsilon free") | 
|  | 162 | os.Exit(1) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 163 | } | 
|  | 164 |  | 
|  | 165 | elemint[0], err = strconv.Atoi(elem[1]) | 
|  | 166 | if err != nil { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 167 | log.Error().Msg("Can't read arccount") | 
|  | 168 | os.Exit(1) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 169 | } | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 170 | tok.arcCount = elemint[0] | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 171 |  | 
|  | 172 | // States start at 1 in Mizobuchi et al (2000), | 
|  | 173 | // as the state 0 is associated with a fail. | 
|  | 174 | // Initialize states and transitions | 
|  | 175 | elemint[0], err = strconv.Atoi(elem[2]) | 
|  | 176 | if err != nil { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 177 | log.Error().Msg("Can't read statecount") | 
|  | 178 | os.Exit(1) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 179 | } | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 180 | tok.stateCount = elemint[0] | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 181 | tok.transitions = make([]map[int]*edge, elemint[0]+1) | 
|  | 182 | continue | 
|  | 183 | } | 
|  | 184 | case STATES: | 
|  | 185 | { | 
|  | 186 | elem = strings.Split(line[0:len(line)-1], " ") | 
|  | 187 | if elem[0] == "-1" { | 
|  | 188 | continue | 
|  | 189 | } | 
|  | 190 | elemint[0], err = strconv.Atoi(elem[0]) | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 191 | if err != nil { | 
|  | 192 | break | 
|  | 193 | } | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 194 |  | 
|  | 195 | if len(elem) > 1 { | 
|  | 196 | elemint[1], err = strconv.Atoi(elem[1]) | 
|  | 197 | if err != nil { | 
|  | 198 | break | 
|  | 199 | } | 
|  | 200 | if len(elem) > 2 { | 
|  | 201 | elemint[2], err = strconv.Atoi(elem[2]) | 
|  | 202 | if err != nil { | 
|  | 203 | break | 
|  | 204 | } | 
|  | 205 | if len(elem) > 3 { | 
|  | 206 | elemint[3], err = strconv.Atoi(elem[3]) | 
|  | 207 | if err != nil { | 
|  | 208 | break | 
|  | 209 | } | 
|  | 210 | if len(elem) > 4 { | 
|  | 211 | elemint[4], err = strconv.Atoi(elem[4]) | 
|  | 212 | if err != nil { | 
|  | 213 | break | 
|  | 214 | } | 
|  | 215 | } | 
|  | 216 | } | 
|  | 217 | } | 
|  | 218 | } | 
|  | 219 |  | 
|  | 220 | switch len(elem) { | 
|  | 221 | case 5: | 
|  | 222 | { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 223 | state = elemint[0] | 
|  | 224 | inSym = elemint[1] | 
|  | 225 | outSym = elemint[2] | 
|  | 226 | end = elemint[3] | 
|  | 227 | final = elemint[4] | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 228 | } | 
|  | 229 | case 4: | 
|  | 230 | { | 
|  | 231 | if elemint[1] == -1 { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 232 | state = elemint[0] | 
|  | 233 | final = elemint[3] | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 234 | } else { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 235 | state = elemint[0] | 
|  | 236 | inSym = elemint[1] | 
|  | 237 | end = elemint[2] | 
|  | 238 | final = elemint[3] | 
|  | 239 | outSym = inSym | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 240 | } | 
|  | 241 | } | 
|  | 242 | case 3: | 
|  | 243 | { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 244 | inSym = elemint[0] | 
|  | 245 | outSym = elemint[1] | 
|  | 246 | end = elemint[2] | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 247 | } | 
|  | 248 | case 2: | 
|  | 249 | { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 250 | inSym = elemint[0] | 
|  | 251 | end = elemint[1] | 
|  | 252 | outSym = inSym | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 253 | } | 
|  | 254 | } | 
|  | 255 |  | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 256 | // While the states in foma start with 0, the states in the | 
|  | 257 | // Mizobuchi FSA start with one - so we increase every state by 1. | 
|  | 258 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 259 | if inSym != outSym { | 
|  | 260 |  | 
|  | 261 | // Allow any epsilon to become a newline | 
|  | 262 | if !(inSym == EPSILON && tok.sigmaRev[outSym] == NEWLINE) && | 
|  | 263 |  | 
|  | 264 | // Allow any whitespace to be ignored | 
|  | 265 | !(inSym != EPSILON && outSym == EPSILON) && | 
|  | 266 |  | 
|  | 267 | // Allow any whitespace to become a new line | 
|  | 268 | !(tok.sigmaRev[outSym] == NEWLINE) { | 
|  | 269 |  | 
|  | 270 | log.Error().Msg( | 
|  | 271 | "Unsupported transition: " + | 
|  | 272 | strconv.Itoa(state) + | 
|  | 273 | " -> " + strconv.Itoa(end) + | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 274 | " (" + | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 275 | strconv.Itoa(inSym) + | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 276 | ":" + | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 277 | strconv.Itoa(outSym) + | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 278 | ") (" + | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 279 | string(tok.sigmaRev[inSym]) + | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 280 | ":" + | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 281 | string(tok.sigmaRev[outSym]) + | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 282 | ")") | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 283 | os.Exit(1) | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 284 | } | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 285 | } | 
|  | 286 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 287 | // This collects all edges until arrstate changes | 
|  | 288 |  | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 289 | // TODO: | 
|  | 290 | //   if arrin == EPSILON && arrout == TOKENEND, mark state as newline | 
|  | 291 | //   if the next transition is the same, remove TOKENEND and add SENTENCEEND | 
|  | 292 | //   This requires to remove the transition alltogether and marks the state instead. | 
|  | 293 |  | 
|  | 294 | // TODO: | 
|  | 295 | //   if arrout == EPSILON, mark the transition as NOTOKEN | 
|  | 296 |  | 
|  | 297 | targetObj := &edge{ | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 298 | inSym:  inSym, | 
|  | 299 | outSym: outSym, | 
|  | 300 | end:    end + 1, | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 301 | } | 
|  | 302 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 303 | // Initialize outgoing states | 
|  | 304 | if tok.transitions[state+1] == nil { | 
|  | 305 | tok.transitions[state+1] = make(map[int]*edge) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 306 | } | 
|  | 307 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 308 | // Ignore transitions with invalid symbols | 
|  | 309 | if inSym >= 0 { | 
|  | 310 | tok.transitions[state+1][inSym] = targetObj | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 311 | } | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 312 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 313 | // Add final transition | 
|  | 314 | if final == 1 { | 
|  | 315 | tok.transitions[state+1][FINAL] = &edge{} | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 316 | } | 
|  | 317 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 318 | if DEBUG { | 
|  | 319 | fmt.Println("Add", | 
|  | 320 | state+1, "->", end+1, | 
|  | 321 | "(", | 
|  | 322 | inSym, | 
|  | 323 | ":", | 
|  | 324 | outSym, | 
|  | 325 | ") (", | 
|  | 326 | string(tok.sigmaRev[inSym]), | 
|  | 327 | ":", | 
|  | 328 | string(tok.sigmaRev[outSym]), | 
|  | 329 | ")") | 
|  | 330 | } | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 331 |  | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 332 | continue | 
|  | 333 | } | 
|  | 334 | case SIGMA: | 
|  | 335 | { | 
|  | 336 | elem = strings.SplitN(line[0:len(line)-1], " ", 2) | 
|  | 337 |  | 
|  | 338 | // Turn string into sigma id | 
|  | 339 | number, err := strconv.Atoi(elem[0]) | 
|  | 340 |  | 
|  | 341 | if err != nil { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 342 | log.Error().Err(err) | 
|  | 343 | os.Exit(0) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 344 | } | 
|  | 345 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 346 | tok.sigmaCount = number | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 347 |  | 
|  | 348 | var symbol rune | 
|  | 349 |  | 
|  | 350 | // Read rune | 
|  | 351 | if utf8.RuneCountInString(elem[1]) == 1 { | 
|  | 352 | symbol = []rune(elem[1])[0] | 
|  | 353 |  | 
|  | 354 | // Probably a MCS | 
|  | 355 | } else if utf8.RuneCountInString(elem[1]) > 1 { | 
|  | 356 | switch elem[1] { | 
|  | 357 | case "@_EPSILON_SYMBOL_@": | 
|  | 358 | { | 
|  | 359 | EPSILON = number | 
|  | 360 | continue | 
|  | 361 | } | 
|  | 362 | case "@_UNKNOWN_SYMBOL_@": | 
|  | 363 | { | 
|  | 364 | UNKNOWN = number | 
|  | 365 | continue | 
|  | 366 | } | 
|  | 367 |  | 
|  | 368 | case "@_IDENTITY_SYMBOL_@": | 
|  | 369 | { | 
|  | 370 | IDENTITY = number | 
|  | 371 | continue | 
|  | 372 | } | 
|  | 373 | default: | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 374 | { | 
|  | 375 | log.Error().Msg("MCS not supported: " + line) | 
|  | 376 | os.Exit(1) | 
|  | 377 | } | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 378 | } | 
|  | 379 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 380 | } else { // Probably a new line symbol | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 381 | line, err = r.ReadString('\n') | 
|  | 382 | if err != nil { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 383 | log.Error().Err(err) | 
|  | 384 | os.Exit(0) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 385 | } | 
|  | 386 | if len(line) != 1 { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 387 | log.Error().Msg("MCS not supported:" + line) | 
|  | 388 | os.Exit(0) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 389 | } | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 390 | symbol = rune(NEWLINE) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 391 | } | 
|  | 392 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 393 | tok.sigmaRev[number] = symbol | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 394 | } | 
|  | 395 | } | 
|  | 396 | } | 
|  | 397 |  | 
|  | 398 | return tok | 
|  | 399 | } | 
|  | 400 |  | 
|  | 401 | // Implementation of Mizobuchi et al (2000), p.128 | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 402 | func (tok *Tokenizer) ToDoubleArray() *DaTokenizer { | 
|  | 403 |  | 
|  | 404 | dat := &DaTokenizer{ | 
|  | 405 | sigma: make(map[rune]int), | 
|  | 406 | } | 
|  | 407 |  | 
|  | 408 | for num, sym := range tok.sigmaRev { | 
|  | 409 | dat.sigma[sym] = num | 
|  | 410 | } | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 411 |  | 
|  | 412 | mark := 0 | 
|  | 413 | size := 0 | 
|  | 414 |  | 
|  | 415 | // Create a mapping from s to t | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 416 | table := make([]*mapping, tok.arcCount+1) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 417 |  | 
|  | 418 | table[size] = &mapping{source: 1, target: 1} | 
|  | 419 | size++ | 
|  | 420 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 421 | // Allocate space for the outgoing symbol range | 
|  | 422 | A := make([]int, 0, tok.sigmaCount) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 423 |  | 
|  | 424 | for mark < size { | 
|  | 425 | s := table[mark].source // This is a state in Ms | 
|  | 426 | t := table[mark].target // This is a state in Mt | 
|  | 427 | mark++ | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 428 |  | 
|  | 429 | // Following the paper, here the state t can be remembered | 
|  | 430 | // in the set of states St | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 431 | A = A[:0] | 
|  | 432 | tok.get_set(s, &A) | 
|  | 433 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 434 | // Set base to the first free slot in the double array | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 435 | dat.setBase(t, dat.xCheck(A)) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 436 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 437 | // Iterate over all outgoing symbols | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 438 | for _, a := range A { | 
|  | 439 |  | 
|  | 440 | if a != FINAL { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 441 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 442 | // Aka g(s, a) | 
|  | 443 | s1 := tok.transitions[s][a].end | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 444 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 445 | // Store the transition | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 446 | t1 := dat.getBase(t) + a | 
|  | 447 | dat.setCheck(t1, t) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 448 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 449 | // Check for representative states | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 450 | r := in_table(s1, table, size) | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 451 |  | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 452 | if r == 0 { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 453 | // Remember the mapping | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 454 | table[size] = &mapping{source: s1, target: t1} | 
|  | 455 | size++ | 
|  | 456 | } else { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 457 | // Overwrite with the representative state | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 458 | dat.setBase(t1, -1*r) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 459 | } | 
|  | 460 | } else { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 461 | // Store a final transition | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 462 | dat.setCheck(dat.getBase(t)+FINAL, t) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 463 | } | 
|  | 464 | } | 
|  | 465 | } | 
|  | 466 |  | 
|  | 467 | // Following Mizobuchi et al (2000) the size of the | 
|  | 468 | // FSA should be stored in check(1). | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 469 | dat.setCheck(1, dat.maxSize+1) | 
|  | 470 | dat.array = dat.array[:dat.maxSize+1] | 
|  | 471 | return dat | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 472 | } | 
|  | 473 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 474 | // Resize double array when necessary | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 475 | func (tok *DaTokenizer) resize(l int) { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 476 | // TODO: | 
|  | 477 | //   This is a bit too aggressive atm and should be calmed down. | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 478 | if len(tok.array) <= l { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 479 | tok.array = append(tok.array, make([]int, l)...) | 
|  | 480 | } | 
|  | 481 | } | 
|  | 482 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 483 | // Set base value in double array | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 484 | func (tok *DaTokenizer) setBase(p int, v int) { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 485 | l := p*2 + 1 | 
|  | 486 | tok.resize(l) | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 487 | if tok.maxSize < l { | 
|  | 488 | tok.maxSize = l | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 489 | } | 
|  | 490 | tok.array[p*2] = v | 
|  | 491 | } | 
|  | 492 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 493 | // Get base value in double array | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 494 | func (tok *DaTokenizer) getBase(p int) int { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 495 | if p*2 >= len(tok.array) { | 
|  | 496 | return 0 | 
|  | 497 | } | 
|  | 498 | return tok.array[p*2] | 
|  | 499 | } | 
|  | 500 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 501 | // Set check value in double array | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 502 | func (tok *DaTokenizer) setCheck(p int, v int) { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 503 | l := p*2 + 1 | 
|  | 504 | tok.resize(l) | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 505 | if tok.maxSize < l { | 
|  | 506 | tok.maxSize = l | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 507 | } | 
|  | 508 | tok.array[(p*2)+1] = v | 
|  | 509 | } | 
|  | 510 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 511 | // Get check value in double array | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 512 | func (tok *DaTokenizer) getCheck(p int) int { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 513 | if (p*2)+1 >= len(tok.array) { | 
|  | 514 | return 0 | 
|  | 515 | } | 
|  | 516 | return tok.array[(p*2)+1] | 
|  | 517 | } | 
|  | 518 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 519 | // Set size of double array | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 520 | func (tok *DaTokenizer) setSize(p, v int) { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 521 | tok.setCheck(1, v) | 
|  | 522 | } | 
|  | 523 |  | 
|  | 524 | // Get size of double array | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 525 | func (tok *DaTokenizer) getSize(p int) int { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 526 | return tok.getCheck(1) | 
|  | 527 | } | 
|  | 528 |  | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 529 | // Check the table if a mapping of s | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 530 | // exists and return this as a representative. | 
|  | 531 | // Currently iterates through the whole table | 
|  | 532 | // in a bruteforce manner. | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 533 | func in_table(s int, table []*mapping, size int) int { | 
|  | 534 | for x := 0; x < size; x++ { | 
|  | 535 | if table[x].source == s { | 
|  | 536 | return table[x].target | 
|  | 537 | } | 
|  | 538 | } | 
|  | 539 | return 0 | 
|  | 540 | } | 
|  | 541 |  | 
|  | 542 | // Set alphabet A to the list of all symbols | 
|  | 543 | // outgoing from s | 
|  | 544 | func (tok *Tokenizer) get_set(s int, A *[]int) { | 
| Akron | 75ebe7f | 2021-08-03 10:34:10 +0200 | [diff] [blame] | 545 | for a := range tok.transitions[s] { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 546 | *A = append(*A, a) | 
|  | 547 | } | 
| Akron | c9d84a6 | 2021-08-03 15:56:03 +0200 | [diff] [blame] | 548 |  | 
|  | 549 | // Not required, but simplifies bug hunting | 
|  | 550 | sort.Ints(*A) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 551 | } | 
|  | 552 |  | 
|  | 553 | // Based on Mizobuchi et al (2000), p. 124 | 
|  | 554 | // This iterates for every state through the complete double array | 
|  | 555 | // structure until it finds a gap that fits all outgoing transitions | 
|  | 556 | // of the state. This is extremely slow, but is only necessary in the | 
|  | 557 | // construction phase of the tokenizer. | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 558 | func (dat *DaTokenizer) xCheck(symbols []int) int { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 559 |  | 
|  | 560 | // Start at the first entry of the double array list | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 561 | base := 1 | 
|  | 562 |  | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 563 | OVERLAP: | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 564 |  | 
|  | 565 | // Resize the array if necessary | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 566 | dat.resize((base + FINAL) * 2) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 567 | for _, a := range symbols { | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 568 | if dat.getCheck(base+a) != 0 { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 569 | base++ | 
|  | 570 | goto OVERLAP | 
|  | 571 | } | 
|  | 572 | } | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 573 | return base | 
|  | 574 | } | 
|  | 575 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 576 | // Match an input string against the double array | 
|  | 577 | // FSA. | 
|  | 578 | // | 
|  | 579 | // Based on Mizobuchi et al (2000), p. 129, | 
|  | 580 | // with additional support for IDENTITY, UNKNOWN | 
|  | 581 | // and EPSILON transitions. | 
| Akron | f2120ca | 2021-08-03 16:26:41 +0200 | [diff] [blame^] | 582 | func (tok *DaTokenizer) Match(input string) bool { | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 583 | var a int | 
|  | 584 | var tu int | 
|  | 585 | var ok bool | 
|  | 586 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 587 | t := 1 // Initial state | 
|  | 588 | chars := []rune(input) | 
|  | 589 | i := 0 | 
|  | 590 |  | 
| Akron | 49d27ee | 2021-08-03 11:58:13 +0200 | [diff] [blame] | 591 | for i < len(chars) { | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 592 | a, ok = tok.sigma[chars[i]] | 
| Akron | 730a79c | 2021-08-03 11:05:29 +0200 | [diff] [blame] | 593 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 594 | // Support identity symbol if character is not in sigma | 
| Akron | 730a79c | 2021-08-03 11:05:29 +0200 | [diff] [blame] | 595 | if !ok && IDENTITY != -1 { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 596 | if DEBUG { | 
|  | 597 | fmt.Println("IDENTITY symbol", string(chars[i]), "->", IDENTITY) | 
|  | 598 | } | 
| Akron | 730a79c | 2021-08-03 11:05:29 +0200 | [diff] [blame] | 599 | a = IDENTITY | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 600 | } else if DEBUG { | 
| Akron | 49d27ee | 2021-08-03 11:58:13 +0200 | [diff] [blame] | 601 | fmt.Println("Sigma transition is okay for [", string(chars[i]), "]") | 
| Akron | 730a79c | 2021-08-03 11:05:29 +0200 | [diff] [blame] | 602 | } | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 603 | tu = t | 
| Akron | 730a79c | 2021-08-03 11:05:29 +0200 | [diff] [blame] | 604 | CHECK: | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 605 | t = tok.getBase(tu) + a | 
| Akron | 730a79c | 2021-08-03 11:05:29 +0200 | [diff] [blame] | 606 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 607 | // Check if the transition is valid according to the double array | 
|  | 608 | if t > tok.getCheck(1) || tok.getCheck(t) != tu { | 
|  | 609 |  | 
|  | 610 | if DEBUG { | 
|  | 611 | fmt.Println("Match is not fine!", t, "and", tok.getCheck(t), "vs", tu) | 
| Akron | 730a79c | 2021-08-03 11:05:29 +0200 | [diff] [blame] | 612 | } | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 613 |  | 
|  | 614 | if !ok && a == IDENTITY { | 
|  | 615 | // Try again with unknown symbol, in case identity failed | 
|  | 616 | if DEBUG { | 
|  | 617 | fmt.Println("UNKNOWN symbol", string(chars[i]), "->", UNKNOWN) | 
|  | 618 | } | 
|  | 619 | a = UNKNOWN | 
|  | 620 |  | 
|  | 621 | } else if a != EPSILON { | 
|  | 622 | // Try again with epsilon symbol, in case everything else failed | 
|  | 623 | if DEBUG { | 
|  | 624 | fmt.Println("EPSILON symbol", string(chars[i]), "->", EPSILON) | 
|  | 625 | } | 
|  | 626 | a = EPSILON | 
|  | 627 | } else { | 
|  | 628 | break | 
|  | 629 | } | 
|  | 630 | goto CHECK | 
|  | 631 | } else if tok.getBase(t) < 0 { | 
| Akron | 730a79c | 2021-08-03 11:05:29 +0200 | [diff] [blame] | 632 | // Move to representative state | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 633 | t = -1 * tok.getBase(t) | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 634 | } | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 635 |  | 
|  | 636 | // Transition is fine | 
| Akron | 49d27ee | 2021-08-03 11:58:13 +0200 | [diff] [blame] | 637 | if a != EPSILON { | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 638 | // Character consumed | 
| Akron | 49d27ee | 2021-08-03 11:58:13 +0200 | [diff] [blame] | 639 | i++ | 
|  | 640 | } | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 641 | // TODO: | 
|  | 642 | //   Prevent endless epsilon loops! | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 643 | } | 
|  | 644 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 645 | if i != len(chars) { | 
|  | 646 | if DEBUG { | 
|  | 647 | fmt.Println("Not at the end") | 
|  | 648 | } | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 649 | return false | 
|  | 650 | } | 
|  | 651 |  | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 652 | FINALCHECK: | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 653 |  | 
|  | 654 | // Automaton is in a final state | 
|  | 655 | if tok.getCheck(tok.getBase(t)+FINAL) == t { | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 656 | return true | 
|  | 657 | } | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 658 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 659 | // Check epsilon transitions until a final state is reached | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 660 | tu = t | 
|  | 661 | a = EPSILON | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 662 | t = tok.getBase(tu) + a | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 663 |  | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 664 | // Epsilon transition failed | 
|  | 665 | if t > tok.getCheck(1) || tok.getCheck(t) != tu { | 
|  | 666 | if DEBUG { | 
|  | 667 | fmt.Println("Match is not fine!", t, "and", tok.getCheck(t), "vs", tu) | 
|  | 668 | } | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 669 | return false | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 670 |  | 
|  | 671 | } else if tok.getBase(t) < 0 { | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 672 | // Move to representative state | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 673 | t = -1 * tok.getBase(t) | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 674 | } | 
| Akron | 740f3d7 | 2021-08-03 12:12:34 +0200 | [diff] [blame] | 675 |  | 
| Akron | 465a099 | 2021-08-03 11:28:48 +0200 | [diff] [blame] | 676 | goto FINALCHECK | 
| Akron | 8ef408b | 2021-08-02 22:11:04 +0200 | [diff] [blame] | 677 | } |