blob: 5c0cc56c05c5567bf9aa2db0ebfe31cdc025479d [file] [log] [blame]
Akron22322ec2025-05-21 11:17:30 +02001package parser
2
3import (
4 "testing"
5
Akronfa55bb22025-05-26 15:10:42 +02006 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akron22322ec2025-05-21 11:17:30 +02007 "github.com/stretchr/testify/assert"
8 "github.com/stretchr/testify/require"
9)
10
11func TestGrammarParserSimpleTerm(t *testing.T) {
12 tests := []struct {
13 name string
14 input string
15 defaultFoundry string
16 defaultLayer string
17 expected *SimpleTerm
18 expectError bool
19 }{
20 {
21 name: "Foundry layer key value",
22 input: "[opennlp/p=PIDAT:new]",
23 defaultFoundry: "opennlp",
24 defaultLayer: "p",
25 expected: &SimpleTerm{
26 WithFoundryLayer: &FoundryLayerTerm{
27 Foundry: "opennlp",
28 Layer: "p",
29 Key: "PIDAT",
30 Value: "new",
31 },
32 },
33 },
34 {
35 name: "Foundry layer key",
36 input: "[opennlp/p=PIDAT]",
37 defaultFoundry: "opennlp",
38 defaultLayer: "p",
39 expected: &SimpleTerm{
40 WithFoundryLayer: &FoundryLayerTerm{
41 Foundry: "opennlp",
42 Layer: "p",
43 Key: "PIDAT",
44 },
45 },
46 },
47 {
48 name: "Layer key",
49 input: "[p=PIDAT]",
50 defaultFoundry: "opennlp",
51 defaultLayer: "p",
52 expected: &SimpleTerm{
53 WithLayer: &LayerTerm{
54 Layer: "p",
55 Key: "PIDAT",
56 },
57 },
58 },
59 {
60 name: "Simple key",
61 input: "[PIDAT]",
62 defaultFoundry: "opennlp",
63 defaultLayer: "p",
64 expected: &SimpleTerm{
65 SimpleKey: &KeyTerm{
66 Key: "PIDAT",
67 },
68 },
69 },
Akron121c66e2025-06-02 16:34:05 +020070 {
71 name: "Special symbol",
72 input: "[$\\(]",
73 defaultFoundry: "opennlp",
74 defaultLayer: "p",
75 expected: &SimpleTerm{
76 SimpleKey: &KeyTerm{
77 Key: "$(",
78 },
79 },
80 },
81 {
82 name: "Multiple escaped characters",
83 input: "[\\&\\|\\=]",
84 defaultFoundry: "opennlp",
85 defaultLayer: "p",
86 expected: &SimpleTerm{
87 SimpleKey: &KeyTerm{
88 Key: "&|=",
89 },
90 },
91 },
Akroncc25e932025-06-02 19:39:43 +020092 {
93 name: "Foundry wildcard key",
94 input: "[opennlp/*=PIDAT]",
95 defaultFoundry: "opennlp",
96 defaultLayer: "p",
97 expected: &SimpleTerm{
98 WithFoundryWildcard: &FoundryWildcardTerm{
99 Foundry: "opennlp",
100 Key: "PIDAT",
101 },
102 },
103 },
Akron22322ec2025-05-21 11:17:30 +0200104 }
105
106 for _, tt := range tests {
107 t.Run(tt.name, func(t *testing.T) {
108 parser, err := NewGrammarParser(tt.defaultFoundry, tt.defaultLayer)
109 require.NoError(t, err)
110
Akronbb5065f2025-05-21 12:44:05 +0200111 grammar, err := parser.tokenParser.ParseString("", tt.input)
Akron22322ec2025-05-21 11:17:30 +0200112 if tt.expectError {
113 assert.Error(t, err)
114 return
115 }
116 require.NoError(t, err)
Akronbb5065f2025-05-21 12:44:05 +0200117 require.NotNil(t, grammar.Token, "Expected token expression")
Akron121c66e2025-06-02 16:34:05 +0200118
119 // For testing purposes, unescape the key in the simple term
120 if grammar.Token.Expr.First.Simple.SimpleKey != nil {
121 grammar.Token.Expr.First.Simple.SimpleKey.Key = unescapeString(grammar.Token.Expr.First.Simple.SimpleKey.Key)
122 }
123
Akron22322ec2025-05-21 11:17:30 +0200124 assert.Equal(t, tt.expected, grammar.Token.Expr.First.Simple)
125 })
126 }
127}
128
129func TestGrammarParser(t *testing.T) {
130 tests := []struct {
131 name string
132 input string
133 defaultFoundry string
134 defaultLayer string
135 expected ast.Node
136 expectError bool
137 }{
138 {
139 name: "Simple term with foundry and layer",
140 input: "[opennlp/p=PIDAT]",
141 defaultFoundry: "opennlp",
142 defaultLayer: "p",
143 expected: &ast.Token{
144 Wrap: &ast.Term{
145 Foundry: "opennlp",
146 Key: "PIDAT",
147 Layer: "p",
148 Match: ast.MatchEqual,
149 },
150 },
151 },
152 {
153 name: "Term group with and relation",
154 input: "[opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
155 defaultFoundry: "opennlp",
156 defaultLayer: "p",
157 expected: &ast.Token{
158 Wrap: &ast.TermGroup{
159 Operands: []ast.Node{
160 &ast.Term{
161 Foundry: "opennlp",
162 Key: "PIDAT",
163 Layer: "p",
164 Match: ast.MatchEqual,
165 },
166 &ast.Term{
167 Foundry: "opennlp",
168 Key: "AdjType",
169 Layer: "p",
170 Match: ast.MatchEqual,
171 Value: "Pdt",
172 },
173 },
174 Relation: ast.AndRelation,
175 },
176 },
177 },
178 {
179 name: "Term group with or relation",
180 input: "[opennlp/p=PronType:Ind | opennlp/p=PronType:Neg]",
181 defaultFoundry: "opennlp",
182 defaultLayer: "p",
183 expected: &ast.Token{
184 Wrap: &ast.TermGroup{
185 Operands: []ast.Node{
186 &ast.Term{
187 Foundry: "opennlp",
188 Key: "PronType",
189 Layer: "p",
190 Match: ast.MatchEqual,
191 Value: "Ind",
192 },
193 &ast.Term{
194 Foundry: "opennlp",
195 Key: "PronType",
196 Layer: "p",
197 Match: ast.MatchEqual,
198 Value: "Neg",
199 },
200 },
201 Relation: ast.OrRelation,
202 },
203 },
204 },
205 {
206 name: "Complex term group",
207 input: "[opennlp/p=PIDAT & (opennlp/p=PronType:Ind | opennlp/p=PronType:Neg)]",
208 defaultFoundry: "opennlp",
209 defaultLayer: "p",
210 expected: &ast.Token{
211 Wrap: &ast.TermGroup{
212 Operands: []ast.Node{
213 &ast.Term{
214 Foundry: "opennlp",
215 Key: "PIDAT",
216 Layer: "p",
217 Match: ast.MatchEqual,
218 },
219 &ast.TermGroup{
220 Operands: []ast.Node{
221 &ast.Term{
222 Foundry: "opennlp",
223 Key: "PronType",
224 Layer: "p",
225 Match: ast.MatchEqual,
226 Value: "Ind",
227 },
228 &ast.Term{
229 Foundry: "opennlp",
230 Key: "PronType",
231 Layer: "p",
232 Match: ast.MatchEqual,
233 Value: "Neg",
234 },
235 },
236 Relation: ast.OrRelation,
237 },
238 },
239 Relation: ast.AndRelation,
240 },
241 },
242 },
Akroncc25e932025-06-02 19:39:43 +0200243 {
244 name: "Wildcard pattern",
245 input: "[opennlp/*=PIDAT]",
246 defaultFoundry: "opennlp",
247 defaultLayer: "p",
248 expected: &ast.Token{
249 Wrap: &ast.Term{
250 Foundry: "opennlp",
251 Key: "PIDAT",
252 Layer: "p",
253 Match: ast.MatchEqual,
254 },
255 },
256 },
Akron22322ec2025-05-21 11:17:30 +0200257 }
258
259 for _, tt := range tests {
260 t.Run(tt.name, func(t *testing.T) {
261 parser, err := NewGrammarParser(tt.defaultFoundry, tt.defaultLayer)
262 require.NoError(t, err)
263
264 result, err := parser.Parse(tt.input)
265 if tt.expectError {
266 assert.Error(t, err)
267 return
268 }
269 require.NoError(t, err)
270 assert.Equal(t, tt.expected, result)
271 })
272 }
273}
Akronbb5065f2025-05-21 12:44:05 +0200274
275func TestMappingRules(t *testing.T) {
276 tests := []struct {
277 name string
278 input string
279 expected *MappingResult
280 wantErr bool
281 }{
282 {
283 name: "Simple PIDAT mapping",
284 input: "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
285 expected: &MappingResult{
286 Upper: &ast.Token{
287 Wrap: &ast.Term{
288 Key: "PIDAT",
289 Match: ast.MatchEqual,
290 },
291 },
292 Lower: &ast.Token{
293 Wrap: &ast.TermGroup{
294 Relation: ast.AndRelation,
295 Operands: []ast.Node{
296 &ast.Term{
297 Foundry: "opennlp",
298 Layer: "p",
299 Key: "PIDAT",
300 Match: ast.MatchEqual,
301 },
302 &ast.Term{
303 Foundry: "opennlp",
304 Layer: "p",
305 Key: "AdjType",
306 Value: "Pdt",
307 Match: ast.MatchEqual,
308 },
309 },
310 },
311 },
312 },
313 },
314 {
315 name: "PAV mapping",
316 input: "[PAV] <> [ADV & PronType:Dem]",
317 expected: &MappingResult{
318 Upper: &ast.Token{
319 Wrap: &ast.Term{
320 Key: "PAV",
321 Match: ast.MatchEqual,
322 },
323 },
324 Lower: &ast.Token{
325 Wrap: &ast.TermGroup{
326 Relation: ast.AndRelation,
327 Operands: []ast.Node{
328 &ast.Term{
329 Key: "ADV",
330 Match: ast.MatchEqual,
331 },
332 &ast.Term{
333 Key: "PronType",
334 Value: "Dem",
335 Match: ast.MatchEqual,
336 },
337 },
338 },
339 },
340 },
341 },
342 {
Akron76b87972025-06-02 16:59:59 +0200343 name: "PAV mapping with special character",
344 input: "[$\\(] <> [ADV & PronType:Dem]",
345 expected: &MappingResult{
346 Upper: &ast.Token{
347 Wrap: &ast.Term{
348 Key: "$(",
349 Match: ast.MatchEqual,
350 },
351 },
352 Lower: &ast.Token{
353 Wrap: &ast.TermGroup{
354 Relation: ast.AndRelation,
355 Operands: []ast.Node{
356 &ast.Term{
357 Key: "ADV",
358 Match: ast.MatchEqual,
359 },
360 &ast.Term{
361 Key: "PronType",
362 Value: "Dem",
363 Match: ast.MatchEqual,
364 },
365 },
366 },
367 },
368 },
369 },
370 {
Akronbb5065f2025-05-21 12:44:05 +0200371 name: "Invalid mapping syntax",
372 input: "[PAV] -> [ADV]",
373 wantErr: true,
374 },
375 {
376 name: "Missing closing bracket",
377 input: "[PAV <> [ADV]",
378 wantErr: true,
379 },
380 }
381
382 parser, err := NewGrammarParser("", "")
383 assert.NoError(t, err)
384
385 for _, tt := range tests {
386 t.Run(tt.name, func(t *testing.T) {
387 result, err := parser.ParseMapping(tt.input)
388 if tt.wantErr {
389 assert.Error(t, err)
390 return
391 }
Akroncc25e932025-06-02 19:39:43 +0200392 assert.NoError(t, err, "Input: %s", tt.input)
Akronbb5065f2025-05-21 12:44:05 +0200393 assert.Equal(t, tt.expected, result)
394 })
395 }
396}