blob: 0dc2b03bb1c69e3cd92fbf71f06e3a1b1db02be7 [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
Akronbf5149c2025-05-20 15:53:41 +02003// parser is a function that takes a JSON string and returns an AST node.
4// It is used to parse a JSON string into an AST node.
5
Akronb7e1f352025-05-16 15:45:23 +02006import (
7 "encoding/json"
8 "fmt"
9 "strings"
10
Akron87948e82025-05-26 18:19:51 +020011 "maps"
12
Akronfa55bb22025-05-26 15:10:42 +020013 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akronb7e1f352025-05-16 15:45:23 +020014)
15
16// rawNode represents the raw JSON structure
17type rawNode struct {
18 Type string `json:"@type"`
19 Wrap json.RawMessage `json:"wrap,omitempty"`
20 Operands []rawNode `json:"operands,omitempty"`
21 Relation string `json:"relation,omitempty"`
22 Foundry string `json:"foundry,omitempty"`
23 Key string `json:"key,omitempty"`
24 Layer string `json:"layer,omitempty"`
25 Match string `json:"match,omitempty"`
26 Value string `json:"value,omitempty"`
Akron8f1970f2025-05-30 12:52:03 +020027 Rewrites []ast.Rewrite `json:"-"` // Handle manually
Akron56e09e72025-05-22 15:38:35 +020028 // Store any additional fields
Akron87948e82025-05-26 18:19:51 +020029 Extra map[string]any `json:"-"`
Akron56e09e72025-05-22 15:38:35 +020030}
31
32// UnmarshalJSON implements the json.Unmarshaler interface
33func (r *rawNode) UnmarshalJSON(data []byte) error {
34 // First unmarshal into a map to capture all fields
Akron87948e82025-05-26 18:19:51 +020035 var raw map[string]any
Akron56e09e72025-05-22 15:38:35 +020036 if err := json.Unmarshal(data, &raw); err != nil {
37 return err
38 }
39
Akron8f1970f2025-05-30 12:52:03 +020040 // Create a temporary struct without the problematic fields
41 type tempNode struct {
42 Type string `json:"@type"`
43 Wrap json.RawMessage `json:"wrap,omitempty"`
44 Operands []rawNode `json:"operands,omitempty"`
45 Relation string `json:"relation,omitempty"`
46 Foundry string `json:"foundry,omitempty"`
47 Key string `json:"key,omitempty"`
48 Layer string `json:"layer,omitempty"`
49 Match string `json:"match,omitempty"`
50 Value string `json:"value,omitempty"`
51 }
52
Akron56e09e72025-05-22 15:38:35 +020053 var temp tempNode
54 if err := json.Unmarshal(data, &temp); err != nil {
55 return err
56 }
Akron8f1970f2025-05-30 12:52:03 +020057
58 // Copy the fields
59 r.Type = temp.Type
60 r.Wrap = temp.Wrap
61 r.Operands = temp.Operands
62 r.Relation = temp.Relation
63 r.Foundry = temp.Foundry
64 r.Key = temp.Key
65 r.Layer = temp.Layer
66 r.Match = temp.Match
67 r.Value = temp.Value
68
69 // Handle rewrites manually
70 if rewritesData, exists := raw["rewrites"]; exists && rewritesData != nil {
71 if rewritesList, ok := rewritesData.([]any); ok {
72 r.Rewrites = make([]ast.Rewrite, len(rewritesList))
73 for i, rewriteData := range rewritesList {
74 rewriteBytes, err := json.Marshal(rewriteData)
75 if err != nil {
76 return fmt.Errorf("failed to marshal rewrite %d: %w", i, err)
77 }
78 var rewrite ast.Rewrite
79 if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
80 return fmt.Errorf("failed to unmarshal rewrite %d: %w", i, err)
81 }
82 r.Rewrites[i] = rewrite
83 }
84 }
85 }
Akron56e09e72025-05-22 15:38:35 +020086
87 // Store any fields not in the struct in Extra
Akron87948e82025-05-26 18:19:51 +020088 r.Extra = make(map[string]any)
Akron56e09e72025-05-22 15:38:35 +020089 for k, v := range raw {
90 switch k {
Akron1a5fccd2025-05-27 09:54:09 +020091 case "@type", "wrap", "operands", "relation", "foundry", "key", "layer", "match", "value", "rewrites":
Akron56e09e72025-05-22 15:38:35 +020092 continue
93 default:
94 r.Extra[k] = v
95 }
96 }
97
98 return nil
99}
100
101// MarshalJSON implements the json.Marshaler interface
102func (r rawNode) MarshalJSON() ([]byte, error) {
103 // Create a map with all fields
Akron87948e82025-05-26 18:19:51 +0200104 raw := make(map[string]any)
Akron56e09e72025-05-22 15:38:35 +0200105
106 // Add the known fields if they're not empty
107 raw["@type"] = r.Type
108 if r.Wrap != nil {
109 raw["wrap"] = r.Wrap
110 }
111 if len(r.Operands) > 0 {
112 raw["operands"] = r.Operands
113 }
114 if r.Relation != "" {
115 raw["relation"] = r.Relation
116 }
117 if r.Foundry != "" {
118 raw["foundry"] = r.Foundry
119 }
120 if r.Key != "" {
121 raw["key"] = r.Key
122 }
123 if r.Layer != "" {
124 raw["layer"] = r.Layer
125 }
126 if r.Match != "" {
127 raw["match"] = r.Match
128 }
129 if r.Value != "" {
130 raw["value"] = r.Value
131 }
Akron1a5fccd2025-05-27 09:54:09 +0200132 if len(r.Rewrites) > 0 {
133 raw["rewrites"] = r.Rewrites
134 }
Akron56e09e72025-05-22 15:38:35 +0200135
136 // Add any extra fields
Akron87948e82025-05-26 18:19:51 +0200137 maps.Copy(raw, r.Extra)
Akron56e09e72025-05-22 15:38:35 +0200138
139 return json.Marshal(raw)
Akronb7e1f352025-05-16 15:45:23 +0200140}
141
142// ParseJSON parses a JSON string into our AST representation
143func ParseJSON(data []byte) (ast.Node, error) {
144 var raw rawNode
145 if err := json.Unmarshal(data, &raw); err != nil {
146 return nil, fmt.Errorf("failed to parse JSON: %w", err)
147 }
Akron32958422025-05-16 16:33:05 +0200148 if raw.Type == "" {
Akron56e09e72025-05-22 15:38:35 +0200149 return nil, fmt.Errorf("missing required field '@type' in JSON")
Akron32958422025-05-16 16:33:05 +0200150 }
Akronb7e1f352025-05-16 15:45:23 +0200151 return parseNode(raw)
152}
153
154// parseNode converts a raw node into an AST node
155func parseNode(raw rawNode) (ast.Node, error) {
156 switch raw.Type {
157 case "koral:token":
158 if raw.Wrap == nil {
Akron56e09e72025-05-22 15:38:35 +0200159 return nil, fmt.Errorf("token node of type '%s' missing required 'wrap' field", raw.Type)
Akronb7e1f352025-05-16 15:45:23 +0200160 }
161 var wrapRaw rawNode
162 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200163 return nil, fmt.Errorf("failed to parse 'wrap' field in token node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200164 }
165 wrap, err := parseNode(wrapRaw)
166 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200167 return nil, fmt.Errorf("error parsing wrapped node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200168 }
Akron1a5fccd2025-05-27 09:54:09 +0200169 return &ast.Token{Wrap: wrap, Rewrites: raw.Rewrites}, nil
Akronb7e1f352025-05-16 15:45:23 +0200170
171 case "koral:termGroup":
Akron56e09e72025-05-22 15:38:35 +0200172 if len(raw.Operands) == 0 {
173 return nil, fmt.Errorf("term group must have at least one operand")
174 }
175
Akronb7e1f352025-05-16 15:45:23 +0200176 operands := make([]ast.Node, len(raw.Operands))
177 for i, op := range raw.Operands {
178 node, err := parseNode(op)
179 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200180 return nil, fmt.Errorf("error parsing operand %d: %w", i+1, err)
Akronb7e1f352025-05-16 15:45:23 +0200181 }
182 operands[i] = node
183 }
184
Akron56e09e72025-05-22 15:38:35 +0200185 if raw.Relation == "" {
186 return nil, fmt.Errorf("term group must have a 'relation' field")
187 }
188
Akronb7e1f352025-05-16 15:45:23 +0200189 relation := ast.AndRelation
190 if strings.HasSuffix(raw.Relation, "or") {
191 relation = ast.OrRelation
Akron56e09e72025-05-22 15:38:35 +0200192 } else if !strings.HasSuffix(raw.Relation, "and") {
193 return nil, fmt.Errorf("invalid relation type '%s', must be one of: 'relation:and', 'relation:or'", raw.Relation)
Akronb7e1f352025-05-16 15:45:23 +0200194 }
195
196 return &ast.TermGroup{
197 Operands: operands,
198 Relation: relation,
Akron1a5fccd2025-05-27 09:54:09 +0200199 Rewrites: raw.Rewrites,
Akronb7e1f352025-05-16 15:45:23 +0200200 }, nil
201
202 case "koral:term":
Akron56e09e72025-05-22 15:38:35 +0200203 if raw.Key == "" {
204 return nil, fmt.Errorf("term must have a 'key' field")
205 }
206
Akronb7e1f352025-05-16 15:45:23 +0200207 match := ast.MatchEqual
Akron56e09e72025-05-22 15:38:35 +0200208 if raw.Match != "" {
209 if strings.HasSuffix(raw.Match, "ne") {
210 match = ast.MatchNotEqual
211 } else if !strings.HasSuffix(raw.Match, "eq") {
212 return nil, fmt.Errorf("invalid match type '%s', must be one of: 'match:eq', 'match:ne'", raw.Match)
213 }
Akronb7e1f352025-05-16 15:45:23 +0200214 }
215
216 return &ast.Term{
Akron1a5fccd2025-05-27 09:54:09 +0200217 Foundry: raw.Foundry,
218 Key: raw.Key,
219 Layer: raw.Layer,
220 Match: match,
221 Value: raw.Value,
222 Rewrites: raw.Rewrites,
Akronb7e1f352025-05-16 15:45:23 +0200223 }, nil
224
225 default:
Akron32958422025-05-16 16:33:05 +0200226 // Store the original JSON content
227 rawContent, err := json.Marshal(raw)
228 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200229 return nil, fmt.Errorf("failed to marshal unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200230 }
231
232 // Create a catchall node
233 catchall := &ast.CatchallNode{
234 NodeType: raw.Type,
235 RawContent: rawContent,
236 }
237
238 // Parse wrap if present
239 if raw.Wrap != nil {
240 var wrapRaw rawNode
241 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200242 return nil, fmt.Errorf("failed to parse 'wrap' field in unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200243 }
Akron56e09e72025-05-22 15:38:35 +0200244
245 // Check if the wrapped node is a known type
246 if wrapRaw.Type == "koral:term" || wrapRaw.Type == "koral:token" || wrapRaw.Type == "koral:termGroup" {
247 wrap, err := parseNode(wrapRaw)
248 if err != nil {
249 return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
250 }
251 catchall.Wrap = wrap
252 } else {
253 // For unknown types, recursively parse
254 wrap, err := parseNode(wrapRaw)
255 if err != nil {
256 return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
257 }
258 catchall.Wrap = wrap
Akron32958422025-05-16 16:33:05 +0200259 }
Akron32958422025-05-16 16:33:05 +0200260 }
261
262 // Parse operands if present
263 if len(raw.Operands) > 0 {
264 operands := make([]ast.Node, len(raw.Operands))
265 for i, op := range raw.Operands {
Akron56e09e72025-05-22 15:38:35 +0200266 // Check if the operand is a known type
267 if op.Type == "koral:term" || op.Type == "koral:token" || op.Type == "koral:termGroup" {
268 node, err := parseNode(op)
269 if err != nil {
270 return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
271 }
272 operands[i] = node
273 } else {
274 // For unknown types, recursively parse
275 node, err := parseNode(op)
276 if err != nil {
277 return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
278 }
279 operands[i] = node
Akron32958422025-05-16 16:33:05 +0200280 }
Akron32958422025-05-16 16:33:05 +0200281 }
282 catchall.Operands = operands
283 }
284
285 return catchall, nil
Akronb7e1f352025-05-16 15:45:23 +0200286 }
287}
288
289// SerializeToJSON converts an AST node back to JSON
290func SerializeToJSON(node ast.Node) ([]byte, error) {
Akron87948e82025-05-26 18:19:51 +0200291 return json.MarshalIndent(nodeToRaw(node), "", " ")
Akronb7e1f352025-05-16 15:45:23 +0200292}
293
294// nodeToRaw converts an AST node to a raw node for JSON serialization
295func nodeToRaw(node ast.Node) rawNode {
296 switch n := node.(type) {
297 case *ast.Token:
Akron56e09e72025-05-22 15:38:35 +0200298 if n.Wrap == nil {
299 return rawNode{
300 Type: "koral:token",
301 }
302 }
Akronb7e1f352025-05-16 15:45:23 +0200303 return rawNode{
304 Type: "koral:token",
305 Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
306 }
307
308 case *ast.TermGroup:
309 operands := make([]rawNode, len(n.Operands))
310 for i, op := range n.Operands {
311 operands[i] = nodeToRaw(op)
312 }
313 return rawNode{
314 Type: "koral:termGroup",
315 Operands: operands,
316 Relation: "relation:" + string(n.Relation),
317 }
318
319 case *ast.Term:
Akron56e09e72025-05-22 15:38:35 +0200320 raw := rawNode{
321 Type: "koral:term",
322 Key: n.Key,
323 Match: "match:" + string(n.Match),
Akronb7e1f352025-05-16 15:45:23 +0200324 }
Akron56e09e72025-05-22 15:38:35 +0200325 if n.Foundry != "" {
326 raw.Foundry = n.Foundry
327 }
328 if n.Layer != "" {
329 raw.Layer = n.Layer
330 }
331 if n.Value != "" {
332 raw.Value = n.Value
333 }
334 return raw
Akronb7e1f352025-05-16 15:45:23 +0200335
Akron32958422025-05-16 16:33:05 +0200336 case *ast.CatchallNode:
Akron56e09e72025-05-22 15:38:35 +0200337 // For catchall nodes, use the stored raw content if available
Akron32958422025-05-16 16:33:05 +0200338 if n.RawContent != nil {
Akron56e09e72025-05-22 15:38:35 +0200339 var raw rawNode
340 if err := json.Unmarshal(n.RawContent, &raw); err == nil {
341 // Ensure we preserve the node type
342 raw.Type = n.NodeType
Akron32958422025-05-16 16:33:05 +0200343
Akron56e09e72025-05-22 15:38:35 +0200344 // Handle wrap and operands if present
Akron32958422025-05-16 16:33:05 +0200345 if n.Wrap != nil {
346 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
347 }
Akron56e09e72025-05-22 15:38:35 +0200348 if len(n.Operands) > 0 {
349 operands := make([]rawNode, len(n.Operands))
350 for i, op := range n.Operands {
351 operands[i] = nodeToRaw(op)
352 }
353 raw.Operands = operands
354 }
Akron32958422025-05-16 16:33:05 +0200355 return raw
356 }
Akron32958422025-05-16 16:33:05 +0200357 }
Akron32958422025-05-16 16:33:05 +0200358
Akron56e09e72025-05-22 15:38:35 +0200359 // If RawContent is nil or invalid, create a minimal raw node
360 raw := rawNode{
361 Type: n.NodeType,
362 }
363 if n.Wrap != nil {
364 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
365 }
366 if len(n.Operands) > 0 {
367 operands := make([]rawNode, len(n.Operands))
368 for i, op := range n.Operands {
369 operands[i] = nodeToRaw(op)
370 }
371 raw.Operands = operands
372 }
373 return raw
374 }
375
376 // Return a minimal raw node for unknown types
377 return rawNode{
378 Type: "koral:unknown",
Akronb7e1f352025-05-16 15:45:23 +0200379 }
380}
381
382// toJSON converts a raw node to JSON bytes
383func (r rawNode) toJSON() []byte {
Akron56e09e72025-05-22 15:38:35 +0200384 data, err := json.Marshal(r)
385 if err != nil {
386 // Return a minimal valid JSON object if marshaling fails
387 return []byte(`{"@type":"koral:unknown"}`)
388 }
Akronb7e1f352025-05-16 15:45:23 +0200389 return data
390}