blob: d5d4020ec76e08ec79a544d4b4c188e81d551ba6 [file] [log] [blame]
Akron32d53de2025-05-22 13:45:32 +02001package mapper
2
3import (
4 "encoding/json"
5 "os"
6 "path/filepath"
7 "testing"
8
Akronfa55bb22025-05-26 15:10:42 +02009 "github.com/KorAP/KoralPipe-TermMapper/ast"
10 "github.com/KorAP/KoralPipe-TermMapper/matcher"
Akron32d53de2025-05-22 13:45:32 +020011 "github.com/stretchr/testify/assert"
12 "github.com/stretchr/testify/require"
13)
14
15func TestMapper(t *testing.T) {
16 // Create a temporary config file
17 tmpDir := t.TempDir()
18 configFile := filepath.Join(tmpDir, "test-config.yaml")
19
20 configContent := `- id: test-mapper
21 foundryA: opennlp
22 layerA: p
23 foundryB: upos
24 layerB: p
25 mappings:
26 - "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]"
27 - "[DET] <> [opennlp/p=DET]"`
28
29 err := os.WriteFile(configFile, []byte(configContent), 0644)
30 require.NoError(t, err)
31
32 // Create a new mapper
33 m, err := NewMapper(configFile)
34 require.NoError(t, err)
35
36 tests := []struct {
37 name string
38 mappingID string
39 opts MappingOptions
40 input string
41 expected string
42 expectError bool
43 }{
44 {
45 name: "Simple A to B mapping",
46 mappingID: "test-mapper",
47 opts: MappingOptions{
48 Direction: AtoB,
49 },
50 input: `{
51 "@type": "koral:token",
52 "wrap": {
53 "@type": "koral:term",
54 "foundry": "opennlp",
55 "key": "PIDAT",
56 "layer": "p",
57 "match": "match:eq"
58 }
59 }`,
60 expected: `{
61 "@type": "koral:token",
62 "wrap": {
63 "@type": "koral:termGroup",
64 "operands": [
65 {
66 "@type": "koral:term",
67 "foundry": "opennlp",
68 "key": "PIDAT",
69 "layer": "p",
70 "match": "match:eq"
71 },
72 {
73 "@type": "koral:term",
74 "foundry": "opennlp",
75 "key": "AdjType",
76 "layer": "p",
77 "match": "match:eq",
78 "value": "Pdt"
79 }
80 ],
81 "relation": "relation:and"
82 }
83 }`,
84 },
85 {
86 name: "B to A mapping",
87 mappingID: "test-mapper",
88 opts: MappingOptions{
89 Direction: BtoA,
90 },
91 input: `{
92 "@type": "koral:token",
93 "wrap": {
94 "@type": "koral:termGroup",
95 "operands": [
96 {
97 "@type": "koral:term",
98 "foundry": "opennlp",
99 "key": "PIDAT",
100 "layer": "p",
101 "match": "match:eq"
102 },
103 {
104 "@type": "koral:term",
105 "foundry": "opennlp",
106 "key": "AdjType",
107 "layer": "p",
108 "match": "match:eq",
109 "value": "Pdt"
110 }
111 ],
112 "relation": "relation:and"
113 }
114 }`,
115 expected: `{
116 "@type": "koral:token",
117 "wrap": {
118 "@type": "koral:term",
119 "foundry": "opennlp",
120 "key": "PIDAT",
121 "layer": "p",
122 "match": "match:eq"
123 }
124 }`,
125 },
126 {
127 name: "Mapping with foundry override",
128 mappingID: "test-mapper",
129 opts: MappingOptions{
130 Direction: AtoB,
131 FoundryB: "custom",
132 },
133 input: `{
134 "@type": "koral:token",
135 "wrap": {
136 "@type": "koral:term",
137 "foundry": "opennlp",
138 "key": "PIDAT",
139 "layer": "p",
140 "match": "match:eq"
141 }
142 }`,
143 expected: `{
144 "@type": "koral:token",
145 "wrap": {
146 "@type": "koral:termGroup",
147 "operands": [
148 {
149 "@type": "koral:term",
150 "foundry": "custom",
151 "key": "PIDAT",
152 "layer": "p",
153 "match": "match:eq"
154 },
155 {
156 "@type": "koral:term",
157 "foundry": "custom",
158 "key": "AdjType",
159 "layer": "p",
160 "match": "match:eq",
161 "value": "Pdt"
162 }
163 ],
164 "relation": "relation:and"
165 }
166 }`,
167 },
168 {
169 name: "Invalid mapping ID",
170 mappingID: "nonexistent",
171 opts: MappingOptions{
172 Direction: AtoB,
173 },
174 input: `{
175 "@type": "koral:token",
176 "wrap": {
177 "@type": "koral:term",
178 "foundry": "opennlp",
179 "key": "PIDAT",
180 "layer": "p",
181 "match": "match:eq"
182 }
183 }`,
184 expectError: true,
185 },
186 {
187 name: "Invalid direction",
188 mappingID: "test-mapper",
189 opts: MappingOptions{
190 Direction: "invalid",
191 },
192 input: `{
193 "@type": "koral:token",
194 "wrap": {
195 "@type": "koral:term",
196 "foundry": "opennlp",
197 "key": "PIDAT",
198 "layer": "p",
199 "match": "match:eq"
200 }
201 }`,
202 expectError: true,
203 },
204 }
205
206 for _, tt := range tests {
207 t.Run(tt.name, func(t *testing.T) {
208 // Parse input JSON
209 var inputData interface{}
210 err := json.Unmarshal([]byte(tt.input), &inputData)
211 require.NoError(t, err)
212
213 // Apply mappings
214 result, err := m.ApplyMappings(tt.mappingID, tt.opts, inputData)
215 if tt.expectError {
216 assert.Error(t, err)
217 return
218 }
219 require.NoError(t, err)
220
221 // Parse expected JSON
222 var expectedData interface{}
223 err = json.Unmarshal([]byte(tt.expected), &expectedData)
224 require.NoError(t, err)
225
226 // Compare results
227 assert.Equal(t, expectedData, result)
228 })
229 }
230}
Akrond5850f82025-05-23 16:44:44 +0200231
232func TestMatchComplexPatterns(t *testing.T) {
233 tests := []struct {
234 name string
235 pattern ast.Pattern
236 replacement ast.Replacement
237 input ast.Node
238 expected ast.Node
239 }{
240 {
241 name: "Deep nested pattern with mixed operators",
242 pattern: ast.Pattern{
243 Root: &ast.TermGroup{
244 Operands: []ast.Node{
245 &ast.Term{
246 Key: "A",
247 Match: ast.MatchEqual,
248 },
249 &ast.TermGroup{
250 Operands: []ast.Node{
251 &ast.Term{
252 Key: "B",
253 Match: ast.MatchEqual,
254 },
255 &ast.TermGroup{
256 Operands: []ast.Node{
257 &ast.Term{
258 Key: "C",
259 Match: ast.MatchEqual,
260 },
261 &ast.Term{
262 Key: "D",
263 Match: ast.MatchEqual,
264 },
265 },
266 Relation: ast.AndRelation,
267 },
268 },
269 Relation: ast.OrRelation,
270 },
271 },
272 Relation: ast.AndRelation,
273 },
274 },
275 replacement: ast.Replacement{
276 Root: &ast.Term{
277 Key: "RESULT",
278 Match: ast.MatchEqual,
279 },
280 },
281 input: &ast.TermGroup{
282 Operands: []ast.Node{
283 &ast.Term{
284 Key: "A",
285 Match: ast.MatchEqual,
286 },
287 &ast.TermGroup{
288 Operands: []ast.Node{
289 &ast.Term{
290 Key: "C",
291 Match: ast.MatchEqual,
292 },
293 &ast.Term{
294 Key: "D",
295 Match: ast.MatchEqual,
296 },
297 },
298 Relation: ast.AndRelation,
299 },
300 },
301 Relation: ast.AndRelation,
302 },
303 expected: &ast.Term{
304 Key: "RESULT",
305 Match: ast.MatchEqual,
306 },
307 },
308 }
309
310 for _, tt := range tests {
311 t.Run(tt.name, func(t *testing.T) {
312 m, err := matcher.NewMatcher(tt.pattern, tt.replacement)
313 require.NoError(t, err)
314 result := m.Replace(tt.input)
315 assert.Equal(t, tt.expected, result)
316 })
317 }
318}
319
320func TestInvalidPatternReplacement(t *testing.T) {
321 // Create a temporary config file
322 tmpDir := t.TempDir()
323 configFile := filepath.Join(tmpDir, "test-config.yaml")
324
325 configContent := `- id: test-mapper
326 foundryA: opennlp
327 layerA: p
328 foundryB: upos
329 layerB: p
330 mappings:
331 - "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]"`
332
333 err := os.WriteFile(configFile, []byte(configContent), 0644)
334 require.NoError(t, err)
335
336 // Create a new mapper
337 m, err := NewMapper(configFile)
338 require.NoError(t, err)
339
340 tests := []struct {
341 name string
342 input string
343 expectError bool
344 errorMsg string
345 }{
346 {
347 name: "Invalid input - empty term group",
348 input: `{
349 "@type": "koral:token",
350 "wrap": {
351 "@type": "koral:termGroup",
352 "operands": [],
353 "relation": "relation:and"
354 }
355 }`,
356 expectError: true,
357 errorMsg: "failed to parse JSON into AST: error parsing wrapped node: term group must have at least one operand",
358 },
359 }
360
361 for _, tt := range tests {
362 t.Run(tt.name, func(t *testing.T) {
363 var inputData any
364 err := json.Unmarshal([]byte(tt.input), &inputData)
365 require.NoError(t, err)
366
367 result, err := m.ApplyMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
368 if tt.expectError {
369 assert.Error(t, err)
370 assert.Equal(t, tt.errorMsg, err.Error())
371 assert.Nil(t, result)
372 } else {
373 assert.NoError(t, err)
374 assert.NotNil(t, result)
375 }
376 })
377 }
378}