Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 1 | package matcher |
| 2 | |
Akron | bf5149c | 2025-05-20 15:53:41 +0200 | [diff] [blame] | 3 | // matcher is a function that takes a pattern and a node and returns true if the node matches the pattern. |
| 4 | // It is used to match a pattern against a node in the AST. |
| 5 | |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 6 | import ( |
Akron | bf5149c | 2025-05-20 15:53:41 +0200 | [diff] [blame] | 7 | "encoding/json" |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 8 | "testing" |
| 9 | |
Akron | fa55bb2 | 2025-05-26 15:10:42 +0200 | [diff] [blame] | 10 | "github.com/KorAP/KoralPipe-TermMapper/ast" |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 11 | "github.com/stretchr/testify/assert" |
| 12 | ) |
| 13 | |
Akron | d5850f8 | 2025-05-23 16:44:44 +0200 | [diff] [blame] | 14 | func TestNewMatcherValidation(t *testing.T) { |
| 15 | tests := []struct { |
| 16 | name string |
| 17 | pattern ast.Pattern |
| 18 | replacement ast.Replacement |
| 19 | expectedError string |
| 20 | }{ |
| 21 | { |
| 22 | name: "Valid pattern and replacement", |
| 23 | pattern: ast.Pattern{ |
| 24 | Root: &ast.Term{ |
| 25 | Foundry: "opennlp", |
| 26 | Key: "DET", |
| 27 | Layer: "p", |
| 28 | Match: ast.MatchEqual, |
| 29 | }, |
| 30 | }, |
| 31 | replacement: ast.Replacement{ |
| 32 | Root: &ast.Term{ |
| 33 | Foundry: "opennlp", |
| 34 | Key: "COMBINED_DET", |
| 35 | Layer: "p", |
| 36 | Match: ast.MatchEqual, |
| 37 | }, |
| 38 | }, |
| 39 | expectedError: "", |
| 40 | }, |
| 41 | { |
| 42 | name: "Invalid pattern - CatchallNode", |
| 43 | pattern: ast.Pattern{ |
| 44 | Root: &ast.CatchallNode{ |
| 45 | NodeType: "custom", |
| 46 | }, |
| 47 | }, |
| 48 | replacement: ast.Replacement{ |
| 49 | Root: &ast.Term{ |
| 50 | Foundry: "opennlp", |
| 51 | Key: "DET", |
| 52 | Layer: "p", |
| 53 | Match: ast.MatchEqual, |
| 54 | }, |
| 55 | }, |
| 56 | expectedError: "invalid pattern: catchall nodes are not allowed in pattern/replacement ASTs", |
| 57 | }, |
| 58 | { |
| 59 | name: "Invalid replacement - CatchallNode", |
| 60 | pattern: ast.Pattern{ |
| 61 | Root: &ast.Term{ |
| 62 | Foundry: "opennlp", |
| 63 | Key: "DET", |
| 64 | Layer: "p", |
| 65 | Match: ast.MatchEqual, |
| 66 | }, |
| 67 | }, |
| 68 | replacement: ast.Replacement{ |
| 69 | Root: &ast.CatchallNode{ |
| 70 | NodeType: "custom", |
| 71 | }, |
| 72 | }, |
| 73 | expectedError: "invalid replacement: catchall nodes are not allowed in pattern/replacement ASTs", |
| 74 | }, |
| 75 | { |
| 76 | name: "Invalid pattern - Empty TermGroup", |
| 77 | pattern: ast.Pattern{ |
| 78 | Root: &ast.TermGroup{ |
| 79 | Operands: []ast.Node{}, |
| 80 | Relation: ast.AndRelation, |
| 81 | }, |
| 82 | }, |
| 83 | replacement: ast.Replacement{ |
| 84 | Root: &ast.Term{ |
| 85 | Foundry: "opennlp", |
| 86 | Key: "DET", |
| 87 | Layer: "p", |
| 88 | Match: ast.MatchEqual, |
| 89 | }, |
| 90 | }, |
| 91 | expectedError: "invalid pattern: empty term group", |
| 92 | }, |
| 93 | { |
| 94 | name: "Invalid pattern - Nested CatchallNode", |
| 95 | pattern: ast.Pattern{ |
| 96 | Root: &ast.TermGroup{ |
| 97 | Operands: []ast.Node{ |
| 98 | &ast.Term{ |
| 99 | Foundry: "opennlp", |
| 100 | Key: "DET", |
| 101 | Layer: "p", |
| 102 | Match: ast.MatchEqual, |
| 103 | }, |
| 104 | &ast.CatchallNode{ |
| 105 | NodeType: "custom", |
| 106 | }, |
| 107 | }, |
| 108 | Relation: ast.AndRelation, |
| 109 | }, |
| 110 | }, |
| 111 | replacement: ast.Replacement{ |
| 112 | Root: &ast.Term{ |
| 113 | Foundry: "opennlp", |
| 114 | Key: "DET", |
| 115 | Layer: "p", |
| 116 | Match: ast.MatchEqual, |
| 117 | }, |
| 118 | }, |
| 119 | expectedError: "invalid pattern: invalid operand: catchall nodes are not allowed in pattern/replacement ASTs", |
| 120 | }, |
| 121 | } |
| 122 | |
| 123 | for _, tt := range tests { |
| 124 | t.Run(tt.name, func(t *testing.T) { |
| 125 | matcher, err := NewMatcher(tt.pattern, tt.replacement) |
| 126 | if tt.expectedError != "" { |
| 127 | assert.Error(t, err) |
| 128 | assert.Equal(t, tt.expectedError, err.Error()) |
| 129 | assert.Nil(t, matcher) |
| 130 | } else { |
| 131 | assert.NoError(t, err) |
| 132 | assert.NotNil(t, matcher) |
| 133 | } |
| 134 | }) |
| 135 | } |
| 136 | } |
| 137 | |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 138 | func TestMatchSimplePattern(t *testing.T) { |
| 139 | // Create a simple pattern: match a term with DET |
| 140 | pattern := ast.Pattern{ |
| 141 | Root: &ast.Term{ |
| 142 | Foundry: "opennlp", |
| 143 | Key: "DET", |
| 144 | Layer: "p", |
| 145 | Match: ast.MatchEqual, |
| 146 | }, |
| 147 | } |
| 148 | |
| 149 | // Create a simple replacement |
| 150 | replacement := ast.Replacement{ |
| 151 | Root: &ast.Term{ |
| 152 | Foundry: "opennlp", |
| 153 | Key: "COMBINED_DET", |
| 154 | Layer: "p", |
| 155 | Match: ast.MatchEqual, |
| 156 | }, |
| 157 | } |
| 158 | |
Akron | d5850f8 | 2025-05-23 16:44:44 +0200 | [diff] [blame] | 159 | m, err := NewMatcher(pattern, replacement) |
| 160 | assert.NoError(t, err) |
| 161 | assert.NotNil(t, m) |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 162 | |
| 163 | tests := []struct { |
| 164 | name string |
| 165 | input ast.Node |
| 166 | expected bool |
| 167 | }{ |
| 168 | { |
| 169 | name: "Exact match", |
| 170 | input: &ast.Term{ |
| 171 | Foundry: "opennlp", |
| 172 | Key: "DET", |
| 173 | Layer: "p", |
| 174 | Match: ast.MatchEqual, |
| 175 | }, |
| 176 | expected: true, |
| 177 | }, |
| 178 | { |
| 179 | name: "Different key", |
| 180 | input: &ast.Term{ |
| 181 | Foundry: "opennlp", |
| 182 | Key: "NOUN", |
| 183 | Layer: "p", |
| 184 | Match: ast.MatchEqual, |
| 185 | }, |
| 186 | expected: false, |
| 187 | }, |
| 188 | { |
| 189 | name: "Different foundry", |
| 190 | input: &ast.Term{ |
| 191 | Foundry: "different", |
| 192 | Key: "DET", |
| 193 | Layer: "p", |
| 194 | Match: ast.MatchEqual, |
| 195 | }, |
| 196 | expected: false, |
| 197 | }, |
| 198 | { |
| 199 | name: "Different match type", |
| 200 | input: &ast.Term{ |
| 201 | Foundry: "opennlp", |
| 202 | Key: "DET", |
| 203 | Layer: "p", |
| 204 | Match: ast.MatchNotEqual, |
| 205 | }, |
| 206 | expected: false, |
| 207 | }, |
| 208 | { |
Akron | bf5149c | 2025-05-20 15:53:41 +0200 | [diff] [blame] | 209 | name: "Nested node", |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 210 | input: &ast.Token{ |
| 211 | Wrap: &ast.Term{ |
| 212 | Foundry: "opennlp", |
| 213 | Key: "DET", |
| 214 | Layer: "p", |
| 215 | Match: ast.MatchEqual, |
| 216 | }, |
| 217 | }, |
Akron | bf5149c | 2025-05-20 15:53:41 +0200 | [diff] [blame] | 218 | expected: true, |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 219 | }, |
| 220 | } |
| 221 | |
| 222 | for _, tt := range tests { |
| 223 | t.Run(tt.name, func(t *testing.T) { |
| 224 | result := m.Match(tt.input) |
| 225 | assert.Equal(t, tt.expected, result) |
| 226 | }) |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | func TestMatchComplexPattern(t *testing.T) { |
| 231 | // Create a complex pattern: DET AND (AdjType=Pdt OR PronType=Ind) |
| 232 | pattern := ast.Pattern{ |
| 233 | Root: &ast.Token{ |
| 234 | Wrap: &ast.TermGroup{ |
| 235 | Operands: []ast.Node{ |
| 236 | &ast.Term{ |
| 237 | Foundry: "opennlp", |
| 238 | Key: "DET", |
| 239 | Layer: "p", |
| 240 | Match: ast.MatchEqual, |
| 241 | }, |
| 242 | &ast.TermGroup{ |
| 243 | Operands: []ast.Node{ |
| 244 | &ast.Term{ |
| 245 | Foundry: "opennlp", |
| 246 | Key: "AdjType", |
| 247 | Layer: "m", |
| 248 | Match: ast.MatchEqual, |
| 249 | Value: "Pdt", |
| 250 | }, |
| 251 | &ast.Term{ |
| 252 | Foundry: "opennlp", |
| 253 | Key: "PronType", |
| 254 | Layer: "m", |
| 255 | Match: ast.MatchEqual, |
| 256 | Value: "Ind", |
| 257 | }, |
| 258 | }, |
| 259 | Relation: ast.OrRelation, |
| 260 | }, |
| 261 | }, |
| 262 | Relation: ast.AndRelation, |
| 263 | }, |
| 264 | }, |
| 265 | } |
| 266 | |
| 267 | replacement := ast.Replacement{ |
| 268 | Root: &ast.Token{ |
| 269 | Wrap: &ast.Term{ |
| 270 | Foundry: "opennlp", |
| 271 | Key: "COMBINED_DET", |
| 272 | Layer: "p", |
| 273 | Match: ast.MatchEqual, |
| 274 | }, |
| 275 | }, |
| 276 | } |
| 277 | |
Akron | d5850f8 | 2025-05-23 16:44:44 +0200 | [diff] [blame] | 278 | m, err := NewMatcher(pattern, replacement) |
| 279 | assert.NoError(t, err) |
| 280 | assert.NotNil(t, m) |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 281 | |
| 282 | tests := []struct { |
| 283 | name string |
| 284 | input ast.Node |
| 285 | expected bool |
| 286 | }{ |
| 287 | { |
| 288 | name: "Match with AdjType=Pdt", |
| 289 | input: &ast.Token{ |
| 290 | Wrap: &ast.TermGroup{ |
| 291 | Operands: []ast.Node{ |
| 292 | &ast.Term{ |
| 293 | Foundry: "opennlp", |
| 294 | Key: "DET", |
| 295 | Layer: "p", |
| 296 | Match: ast.MatchEqual, |
| 297 | }, |
| 298 | &ast.Term{ |
| 299 | Foundry: "opennlp", |
| 300 | Key: "AdjType", |
| 301 | Layer: "m", |
| 302 | Match: ast.MatchEqual, |
| 303 | Value: "Pdt", |
| 304 | }, |
| 305 | }, |
| 306 | Relation: ast.AndRelation, |
| 307 | }, |
| 308 | }, |
| 309 | expected: true, |
| 310 | }, |
| 311 | { |
| 312 | name: "Match with PronType=Ind", |
| 313 | input: &ast.Token{ |
| 314 | Wrap: &ast.TermGroup{ |
| 315 | Operands: []ast.Node{ |
| 316 | &ast.Term{ |
| 317 | Foundry: "opennlp", |
| 318 | Key: "DET", |
| 319 | Layer: "p", |
| 320 | Match: ast.MatchEqual, |
| 321 | }, |
| 322 | &ast.Term{ |
| 323 | Foundry: "opennlp", |
| 324 | Key: "PronType", |
| 325 | Layer: "m", |
| 326 | Match: ast.MatchEqual, |
| 327 | Value: "Ind", |
| 328 | }, |
| 329 | }, |
| 330 | Relation: ast.AndRelation, |
| 331 | }, |
| 332 | }, |
| 333 | expected: true, |
| 334 | }, |
| 335 | { |
| 336 | name: "No match - missing DET", |
| 337 | input: &ast.Token{ |
| 338 | Wrap: &ast.TermGroup{ |
| 339 | Operands: []ast.Node{ |
| 340 | &ast.Term{ |
| 341 | Foundry: "opennlp", |
| 342 | Key: "NOUN", |
| 343 | Layer: "p", |
| 344 | Match: ast.MatchEqual, |
| 345 | }, |
| 346 | &ast.Term{ |
| 347 | Foundry: "opennlp", |
| 348 | Key: "AdjType", |
| 349 | Layer: "m", |
| 350 | Match: ast.MatchEqual, |
| 351 | Value: "Pdt", |
| 352 | }, |
| 353 | }, |
| 354 | Relation: ast.AndRelation, |
| 355 | }, |
| 356 | }, |
| 357 | expected: false, |
| 358 | }, |
| 359 | { |
| 360 | name: "No match - wrong value", |
| 361 | input: &ast.Token{ |
| 362 | Wrap: &ast.TermGroup{ |
| 363 | Operands: []ast.Node{ |
| 364 | &ast.Term{ |
| 365 | Foundry: "opennlp", |
| 366 | Key: "DET", |
| 367 | Layer: "p", |
| 368 | Match: ast.MatchEqual, |
| 369 | }, |
| 370 | &ast.Term{ |
| 371 | Foundry: "opennlp", |
| 372 | Key: "AdjType", |
| 373 | Layer: "m", |
| 374 | Match: ast.MatchEqual, |
| 375 | Value: "Wrong", |
| 376 | }, |
| 377 | }, |
| 378 | Relation: ast.AndRelation, |
| 379 | }, |
| 380 | }, |
| 381 | expected: false, |
| 382 | }, |
| 383 | } |
| 384 | |
| 385 | for _, tt := range tests { |
| 386 | t.Run(tt.name, func(t *testing.T) { |
| 387 | result := m.Match(tt.input) |
| 388 | assert.Equal(t, tt.expected, result) |
| 389 | }) |
| 390 | } |
| 391 | } |
| 392 | |
| 393 | func TestReplace(t *testing.T) { |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 394 | pattern := ast.Pattern{ |
Akron | d5850f8 | 2025-05-23 16:44:44 +0200 | [diff] [blame] | 395 | Root: &ast.Term{ |
| 396 | Foundry: "opennlp", |
| 397 | Key: "DET", |
| 398 | Layer: "p", |
| 399 | Match: ast.MatchEqual, |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 400 | }, |
| 401 | } |
| 402 | |
| 403 | replacement := ast.Replacement{ |
| 404 | Root: &ast.Term{ |
| 405 | Foundry: "opennlp", |
| 406 | Key: "COMBINED_DET", |
| 407 | Layer: "p", |
| 408 | Match: ast.MatchEqual, |
| 409 | }, |
| 410 | } |
| 411 | |
Akron | d5850f8 | 2025-05-23 16:44:44 +0200 | [diff] [blame] | 412 | m, err := NewMatcher(pattern, replacement) |
| 413 | assert.NoError(t, err) |
| 414 | assert.NotNil(t, m) |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 415 | |
| 416 | tests := []struct { |
| 417 | name string |
| 418 | input ast.Node |
| 419 | expected ast.Node |
| 420 | }{ |
| 421 | { |
| 422 | name: "Replace matching pattern", |
| 423 | input: &ast.TermGroup{ |
| 424 | Operands: []ast.Node{ |
| 425 | &ast.Term{ |
| 426 | Foundry: "opennlp", |
| 427 | Key: "DET", |
| 428 | Layer: "p", |
| 429 | Match: ast.MatchEqual, |
| 430 | }, |
| 431 | &ast.Term{ |
| 432 | Foundry: "opennlp", |
| 433 | Key: "AdjType", |
| 434 | Layer: "m", |
| 435 | Match: ast.MatchEqual, |
| 436 | Value: "Pdt", |
| 437 | }, |
| 438 | }, |
| 439 | Relation: ast.AndRelation, |
| 440 | }, |
Akron | d5850f8 | 2025-05-23 16:44:44 +0200 | [diff] [blame] | 441 | expected: &ast.TermGroup{ |
| 442 | Operands: []ast.Node{ |
| 443 | &ast.Term{ |
| 444 | Foundry: "opennlp", |
| 445 | Key: "COMBINED_DET", |
| 446 | Layer: "p", |
| 447 | Match: ast.MatchEqual, |
| 448 | }, |
| 449 | &ast.Term{ |
| 450 | Foundry: "opennlp", |
| 451 | Key: "AdjType", |
| 452 | Layer: "m", |
| 453 | Match: ast.MatchEqual, |
| 454 | Value: "Pdt", |
| 455 | }, |
| 456 | }, |
| 457 | Relation: ast.AndRelation, |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 458 | }, |
| 459 | }, |
| 460 | { |
| 461 | name: "No replacement for non-matching pattern", |
| 462 | input: &ast.TermGroup{ |
| 463 | Operands: []ast.Node{ |
| 464 | &ast.Term{ |
| 465 | Foundry: "opennlp", |
| 466 | Key: "NOUN", |
| 467 | Layer: "p", |
| 468 | Match: ast.MatchEqual, |
| 469 | }, |
| 470 | &ast.Term{ |
| 471 | Foundry: "opennlp", |
| 472 | Key: "AdjType", |
| 473 | Layer: "m", |
| 474 | Match: ast.MatchEqual, |
| 475 | Value: "Pdt", |
| 476 | }, |
| 477 | }, |
| 478 | Relation: ast.AndRelation, |
| 479 | }, |
| 480 | expected: &ast.TermGroup{ |
| 481 | Operands: []ast.Node{ |
| 482 | &ast.Term{ |
| 483 | Foundry: "opennlp", |
| 484 | Key: "NOUN", |
| 485 | Layer: "p", |
| 486 | Match: ast.MatchEqual, |
| 487 | }, |
| 488 | &ast.Term{ |
| 489 | Foundry: "opennlp", |
| 490 | Key: "AdjType", |
| 491 | Layer: "m", |
| 492 | Match: ast.MatchEqual, |
| 493 | Value: "Pdt", |
| 494 | }, |
| 495 | }, |
| 496 | Relation: ast.AndRelation, |
| 497 | }, |
| 498 | }, |
| 499 | { |
| 500 | name: "Replace in nested structure", |
| 501 | input: &ast.Token{ |
| 502 | Wrap: &ast.TermGroup{ |
| 503 | Operands: []ast.Node{ |
| 504 | &ast.TermGroup{ |
| 505 | Operands: []ast.Node{ |
| 506 | &ast.Term{ |
| 507 | Foundry: "opennlp", |
| 508 | Key: "DET", |
| 509 | Layer: "p", |
| 510 | Match: ast.MatchEqual, |
| 511 | }, |
| 512 | &ast.Term{ |
| 513 | Foundry: "opennlp", |
| 514 | Key: "AdjType", |
| 515 | Layer: "m", |
| 516 | Match: ast.MatchEqual, |
| 517 | Value: "Pdt", |
| 518 | }, |
| 519 | }, |
| 520 | Relation: ast.AndRelation, |
| 521 | }, |
| 522 | &ast.Term{ |
| 523 | Foundry: "opennlp", |
| 524 | Key: "NOUN", |
| 525 | Layer: "p", |
| 526 | Match: ast.MatchEqual, |
| 527 | }, |
| 528 | }, |
| 529 | Relation: ast.AndRelation, |
| 530 | }, |
| 531 | }, |
| 532 | expected: &ast.Token{ |
| 533 | Wrap: &ast.TermGroup{ |
| 534 | Operands: []ast.Node{ |
| 535 | &ast.Term{ |
| 536 | Foundry: "opennlp", |
| 537 | Key: "COMBINED_DET", |
| 538 | Layer: "p", |
| 539 | Match: ast.MatchEqual, |
| 540 | }, |
| 541 | &ast.Term{ |
| 542 | Foundry: "opennlp", |
| 543 | Key: "NOUN", |
| 544 | Layer: "p", |
| 545 | Match: ast.MatchEqual, |
| 546 | }, |
| 547 | }, |
| 548 | Relation: ast.AndRelation, |
| 549 | }, |
| 550 | }, |
| 551 | }, |
| 552 | } |
| 553 | |
| 554 | for _, tt := range tests { |
| 555 | t.Run(tt.name, func(t *testing.T) { |
| 556 | result := m.Replace(tt.input) |
| 557 | assert.Equal(t, tt.expected, result) |
| 558 | }) |
| 559 | } |
| 560 | } |
| 561 | |
| 562 | func TestMatchNodeOrder(t *testing.T) { |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 563 | pattern := ast.Pattern{ |
| 564 | Root: &ast.TermGroup{ |
| 565 | Operands: []ast.Node{ |
| 566 | &ast.Term{ |
| 567 | Foundry: "opennlp", |
| 568 | Key: "DET", |
| 569 | Layer: "p", |
| 570 | Match: ast.MatchEqual, |
| 571 | }, |
| 572 | &ast.Term{ |
| 573 | Foundry: "opennlp", |
| 574 | Key: "AdjType", |
| 575 | Layer: "m", |
| 576 | Match: ast.MatchEqual, |
| 577 | Value: "Pdt", |
| 578 | }, |
| 579 | }, |
| 580 | Relation: ast.AndRelation, |
| 581 | }, |
| 582 | } |
| 583 | |
| 584 | replacement := ast.Replacement{ |
| 585 | Root: &ast.Term{ |
| 586 | Foundry: "opennlp", |
| 587 | Key: "COMBINED_DET", |
| 588 | Layer: "p", |
| 589 | Match: ast.MatchEqual, |
| 590 | }, |
| 591 | } |
| 592 | |
Akron | d5850f8 | 2025-05-23 16:44:44 +0200 | [diff] [blame] | 593 | m, err := NewMatcher(pattern, replacement) |
| 594 | assert.NoError(t, err) |
| 595 | assert.NotNil(t, m) |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 596 | |
| 597 | // Test with operands in different orders |
| 598 | input1 := &ast.TermGroup{ |
| 599 | Operands: []ast.Node{ |
| 600 | &ast.Term{ |
| 601 | Foundry: "opennlp", |
| 602 | Key: "DET", |
| 603 | Layer: "p", |
| 604 | Match: ast.MatchEqual, |
| 605 | }, |
| 606 | &ast.Term{ |
| 607 | Foundry: "opennlp", |
| 608 | Key: "AdjType", |
| 609 | Layer: "m", |
| 610 | Match: ast.MatchEqual, |
| 611 | Value: "Pdt", |
| 612 | }, |
| 613 | }, |
| 614 | Relation: ast.AndRelation, |
| 615 | } |
| 616 | |
| 617 | input2 := &ast.TermGroup{ |
| 618 | Operands: []ast.Node{ |
| 619 | &ast.Term{ |
| 620 | Foundry: "opennlp", |
| 621 | Key: "AdjType", |
| 622 | Layer: "m", |
| 623 | Match: ast.MatchEqual, |
| 624 | Value: "Pdt", |
| 625 | }, |
| 626 | &ast.Term{ |
| 627 | Foundry: "opennlp", |
| 628 | Key: "DET", |
| 629 | Layer: "p", |
| 630 | Match: ast.MatchEqual, |
| 631 | }, |
| 632 | }, |
| 633 | Relation: ast.AndRelation, |
| 634 | } |
| 635 | |
| 636 | assert.True(t, m.Match(input1), "Should match with original order") |
| 637 | assert.True(t, m.Match(input2), "Should match with reversed order") |
| 638 | } |
Akron | bf5149c | 2025-05-20 15:53:41 +0200 | [diff] [blame] | 639 | |
| 640 | func TestMatchWithUnknownNodes(t *testing.T) { |
Akron | bf5149c | 2025-05-20 15:53:41 +0200 | [diff] [blame] | 641 | pattern := ast.Pattern{ |
| 642 | Root: &ast.Term{ |
| 643 | Foundry: "opennlp", |
| 644 | Key: "DET", |
| 645 | Layer: "p", |
| 646 | Match: ast.MatchEqual, |
| 647 | }, |
| 648 | } |
| 649 | |
| 650 | replacement := ast.Replacement{ |
| 651 | Root: &ast.Term{ |
| 652 | Foundry: "opennlp", |
| 653 | Key: "COMBINED_DET", |
| 654 | Layer: "p", |
| 655 | Match: ast.MatchEqual, |
| 656 | }, |
| 657 | } |
| 658 | |
Akron | d5850f8 | 2025-05-23 16:44:44 +0200 | [diff] [blame] | 659 | m, err := NewMatcher(pattern, replacement) |
| 660 | assert.NoError(t, err) |
| 661 | assert.NotNil(t, m) |
Akron | bf5149c | 2025-05-20 15:53:41 +0200 | [diff] [blame] | 662 | |
| 663 | tests := []struct { |
| 664 | name string |
| 665 | input ast.Node |
| 666 | expected bool |
| 667 | }{ |
| 668 | { |
| 669 | name: "Match term inside unknown node with wrap", |
| 670 | input: &ast.CatchallNode{ |
| 671 | NodeType: "koral:custom", |
| 672 | RawContent: json.RawMessage(`{ |
| 673 | "@type": "koral:custom", |
| 674 | "customField": "value" |
| 675 | }`), |
| 676 | Wrap: &ast.Term{ |
| 677 | Foundry: "opennlp", |
| 678 | Key: "DET", |
| 679 | Layer: "p", |
| 680 | Match: ast.MatchEqual, |
| 681 | }, |
| 682 | }, |
| 683 | expected: true, |
| 684 | }, |
| 685 | { |
| 686 | name: "Match term inside unknown node's operands", |
| 687 | input: &ast.CatchallNode{ |
| 688 | NodeType: "koral:custom", |
| 689 | RawContent: json.RawMessage(`{ |
| 690 | "@type": "koral:custom", |
| 691 | "customField": "value" |
| 692 | }`), |
| 693 | Operands: []ast.Node{ |
| 694 | &ast.Term{ |
| 695 | Foundry: "opennlp", |
| 696 | Key: "DET", |
| 697 | Layer: "p", |
| 698 | Match: ast.MatchEqual, |
| 699 | }, |
| 700 | }, |
| 701 | }, |
| 702 | expected: true, |
| 703 | }, |
| 704 | { |
| 705 | name: "No match in unknown node with different term", |
| 706 | input: &ast.CatchallNode{ |
| 707 | NodeType: "koral:custom", |
| 708 | RawContent: json.RawMessage(`{ |
| 709 | "@type": "koral:custom", |
| 710 | "customField": "value" |
| 711 | }`), |
| 712 | Wrap: &ast.Term{ |
| 713 | Foundry: "opennlp", |
| 714 | Key: "NOUN", |
| 715 | Layer: "p", |
| 716 | Match: ast.MatchEqual, |
| 717 | }, |
| 718 | }, |
| 719 | expected: false, |
| 720 | }, |
| 721 | { |
| 722 | name: "Match in deeply nested unknown nodes", |
| 723 | input: &ast.CatchallNode{ |
| 724 | NodeType: "koral:outer", |
| 725 | RawContent: json.RawMessage(`{ |
| 726 | "@type": "koral:outer", |
| 727 | "outerField": "value" |
| 728 | }`), |
| 729 | Wrap: &ast.CatchallNode{ |
| 730 | NodeType: "koral:inner", |
| 731 | RawContent: json.RawMessage(`{ |
| 732 | "@type": "koral:inner", |
| 733 | "innerField": "value" |
| 734 | }`), |
| 735 | Wrap: &ast.Term{ |
| 736 | Foundry: "opennlp", |
| 737 | Key: "DET", |
| 738 | Layer: "p", |
| 739 | Match: ast.MatchEqual, |
| 740 | }, |
| 741 | }, |
| 742 | }, |
| 743 | expected: true, |
| 744 | }, |
| 745 | { |
| 746 | name: "Match in mixed known and unknown nodes", |
| 747 | input: &ast.Token{ |
| 748 | Wrap: &ast.CatchallNode{ |
| 749 | NodeType: "koral:custom", |
| 750 | RawContent: json.RawMessage(`{ |
| 751 | "@type": "koral:custom", |
| 752 | "customField": "value" |
| 753 | }`), |
| 754 | Operands: []ast.Node{ |
| 755 | &ast.TermGroup{ |
| 756 | Operands: []ast.Node{ |
| 757 | &ast.Term{ |
| 758 | Foundry: "opennlp", |
| 759 | Key: "DET", |
| 760 | Layer: "p", |
| 761 | Match: ast.MatchEqual, |
| 762 | }, |
| 763 | }, |
| 764 | Relation: ast.AndRelation, |
| 765 | }, |
| 766 | }, |
| 767 | }, |
| 768 | }, |
| 769 | expected: true, |
| 770 | }, |
| 771 | } |
| 772 | |
| 773 | for _, tt := range tests { |
| 774 | t.Run(tt.name, func(t *testing.T) { |
| 775 | result := m.Match(tt.input) |
| 776 | assert.Equal(t, tt.expected, result) |
| 777 | |
| 778 | if tt.expected { |
| 779 | // Test replacement when there's a match |
| 780 | replaced := m.Replace(tt.input) |
| 781 | // Verify the replacement happened somewhere in the structure |
| 782 | containsReplacement := false |
| 783 | var checkNode func(ast.Node) |
| 784 | checkNode = func(node ast.Node) { |
| 785 | switch n := node.(type) { |
| 786 | case *ast.Term: |
| 787 | if n.Key == "COMBINED_DET" { |
| 788 | containsReplacement = true |
| 789 | } |
| 790 | case *ast.Token: |
| 791 | if n.Wrap != nil { |
| 792 | checkNode(n.Wrap) |
| 793 | } |
| 794 | case *ast.TermGroup: |
| 795 | for _, op := range n.Operands { |
| 796 | checkNode(op) |
| 797 | } |
| 798 | case *ast.CatchallNode: |
| 799 | if n.Wrap != nil { |
| 800 | checkNode(n.Wrap) |
| 801 | } |
| 802 | for _, op := range n.Operands { |
| 803 | checkNode(op) |
| 804 | } |
| 805 | } |
| 806 | } |
| 807 | checkNode(replaced) |
| 808 | assert.True(t, containsReplacement, "Replacement should be found in the result") |
| 809 | } |
| 810 | }) |
| 811 | } |
| 812 | } |