blob: e122edc430ab4f8e310951c8179399213e747f7b [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
Akronbf5149c2025-05-20 15:53:41 +02003// parser is a function that takes a JSON string and returns an AST node.
4// It is used to parse a JSON string into an AST node.
5
Akronb7e1f352025-05-16 15:45:23 +02006import (
7 "encoding/json"
8 "fmt"
9 "strings"
10
Akron87948e82025-05-26 18:19:51 +020011 "maps"
12
Akronfa55bb22025-05-26 15:10:42 +020013 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akronb7e1f352025-05-16 15:45:23 +020014)
15
16// rawNode represents the raw JSON structure
17type rawNode struct {
18 Type string `json:"@type"`
19 Wrap json.RawMessage `json:"wrap,omitempty"`
20 Operands []rawNode `json:"operands,omitempty"`
21 Relation string `json:"relation,omitempty"`
22 Foundry string `json:"foundry,omitempty"`
23 Key string `json:"key,omitempty"`
24 Layer string `json:"layer,omitempty"`
25 Match string `json:"match,omitempty"`
26 Value string `json:"value,omitempty"`
Akron8f1970f2025-05-30 12:52:03 +020027 Rewrites []ast.Rewrite `json:"-"` // Handle manually
Akron56e09e72025-05-22 15:38:35 +020028 // Store any additional fields
Akron87948e82025-05-26 18:19:51 +020029 Extra map[string]any `json:"-"`
Akron56e09e72025-05-22 15:38:35 +020030}
31
32// UnmarshalJSON implements the json.Unmarshaler interface
33func (r *rawNode) UnmarshalJSON(data []byte) error {
34 // First unmarshal into a map to capture all fields
Akron87948e82025-05-26 18:19:51 +020035 var raw map[string]any
Akron56e09e72025-05-22 15:38:35 +020036 if err := json.Unmarshal(data, &raw); err != nil {
37 return err
38 }
39
Akron8f1970f2025-05-30 12:52:03 +020040 // Create a temporary struct without the problematic fields
41 type tempNode struct {
42 Type string `json:"@type"`
43 Wrap json.RawMessage `json:"wrap,omitempty"`
44 Operands []rawNode `json:"operands,omitempty"`
45 Relation string `json:"relation,omitempty"`
46 Foundry string `json:"foundry,omitempty"`
47 Key string `json:"key,omitempty"`
48 Layer string `json:"layer,omitempty"`
49 Match string `json:"match,omitempty"`
50 Value string `json:"value,omitempty"`
51 }
52
Akron56e09e72025-05-22 15:38:35 +020053 var temp tempNode
54 if err := json.Unmarshal(data, &temp); err != nil {
55 return err
56 }
Akron8f1970f2025-05-30 12:52:03 +020057
58 // Copy the fields
59 r.Type = temp.Type
60 r.Wrap = temp.Wrap
61 r.Operands = temp.Operands
62 r.Relation = temp.Relation
63 r.Foundry = temp.Foundry
64 r.Key = temp.Key
65 r.Layer = temp.Layer
66 r.Match = temp.Match
67 r.Value = temp.Value
68
69 // Handle rewrites manually
70 if rewritesData, exists := raw["rewrites"]; exists && rewritesData != nil {
71 if rewritesList, ok := rewritesData.([]any); ok {
72 r.Rewrites = make([]ast.Rewrite, len(rewritesList))
73 for i, rewriteData := range rewritesList {
74 rewriteBytes, err := json.Marshal(rewriteData)
75 if err != nil {
76 return fmt.Errorf("failed to marshal rewrite %d: %w", i, err)
77 }
78 var rewrite ast.Rewrite
79 if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
80 return fmt.Errorf("failed to unmarshal rewrite %d: %w", i, err)
81 }
82 r.Rewrites[i] = rewrite
83 }
84 }
85 }
Akron56e09e72025-05-22 15:38:35 +020086
87 // Store any fields not in the struct in Extra
Akron87948e82025-05-26 18:19:51 +020088 r.Extra = make(map[string]any)
Akron56e09e72025-05-22 15:38:35 +020089 for k, v := range raw {
90 switch k {
Akron1a5fccd2025-05-27 09:54:09 +020091 case "@type", "wrap", "operands", "relation", "foundry", "key", "layer", "match", "value", "rewrites":
Akron56e09e72025-05-22 15:38:35 +020092 continue
93 default:
94 r.Extra[k] = v
95 }
96 }
97
98 return nil
99}
100
101// MarshalJSON implements the json.Marshaler interface
102func (r rawNode) MarshalJSON() ([]byte, error) {
103 // Create a map with all fields
Akron87948e82025-05-26 18:19:51 +0200104 raw := make(map[string]any)
Akron56e09e72025-05-22 15:38:35 +0200105
106 // Add the known fields if they're not empty
107 raw["@type"] = r.Type
108 if r.Wrap != nil {
109 raw["wrap"] = r.Wrap
110 }
111 if len(r.Operands) > 0 {
112 raw["operands"] = r.Operands
113 }
114 if r.Relation != "" {
115 raw["relation"] = r.Relation
116 }
117 if r.Foundry != "" {
118 raw["foundry"] = r.Foundry
119 }
120 if r.Key != "" {
121 raw["key"] = r.Key
122 }
123 if r.Layer != "" {
124 raw["layer"] = r.Layer
125 }
126 if r.Match != "" {
127 raw["match"] = r.Match
128 }
129 if r.Value != "" {
130 raw["value"] = r.Value
131 }
Akron1a5fccd2025-05-27 09:54:09 +0200132 if len(r.Rewrites) > 0 {
133 raw["rewrites"] = r.Rewrites
134 }
Akron56e09e72025-05-22 15:38:35 +0200135
136 // Add any extra fields
Akron87948e82025-05-26 18:19:51 +0200137 maps.Copy(raw, r.Extra)
Akron56e09e72025-05-22 15:38:35 +0200138
139 return json.Marshal(raw)
Akronb7e1f352025-05-16 15:45:23 +0200140}
141
142// ParseJSON parses a JSON string into our AST representation
143func ParseJSON(data []byte) (ast.Node, error) {
144 var raw rawNode
145 if err := json.Unmarshal(data, &raw); err != nil {
146 return nil, fmt.Errorf("failed to parse JSON: %w", err)
147 }
Akron32958422025-05-16 16:33:05 +0200148 if raw.Type == "" {
Akron56e09e72025-05-22 15:38:35 +0200149 return nil, fmt.Errorf("missing required field '@type' in JSON")
Akron32958422025-05-16 16:33:05 +0200150 }
Akronb7e1f352025-05-16 15:45:23 +0200151 return parseNode(raw)
152}
153
154// parseNode converts a raw node into an AST node
155func parseNode(raw rawNode) (ast.Node, error) {
156 switch raw.Type {
157 case "koral:token":
158 if raw.Wrap == nil {
Akron56e09e72025-05-22 15:38:35 +0200159 return nil, fmt.Errorf("token node of type '%s' missing required 'wrap' field", raw.Type)
Akronb7e1f352025-05-16 15:45:23 +0200160 }
161 var wrapRaw rawNode
162 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200163 return nil, fmt.Errorf("failed to parse 'wrap' field in token node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200164 }
165 wrap, err := parseNode(wrapRaw)
166 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200167 return nil, fmt.Errorf("error parsing wrapped node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200168 }
Akron1a5fccd2025-05-27 09:54:09 +0200169 return &ast.Token{Wrap: wrap, Rewrites: raw.Rewrites}, nil
Akronb7e1f352025-05-16 15:45:23 +0200170
171 case "koral:termGroup":
Akron56e09e72025-05-22 15:38:35 +0200172 if len(raw.Operands) == 0 {
173 return nil, fmt.Errorf("term group must have at least one operand")
174 }
175
Akronb7e1f352025-05-16 15:45:23 +0200176 operands := make([]ast.Node, len(raw.Operands))
177 for i, op := range raw.Operands {
178 node, err := parseNode(op)
179 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200180 return nil, fmt.Errorf("error parsing operand %d: %w", i+1, err)
Akronb7e1f352025-05-16 15:45:23 +0200181 }
182 operands[i] = node
183 }
184
Akron56e09e72025-05-22 15:38:35 +0200185 if raw.Relation == "" {
186 return nil, fmt.Errorf("term group must have a 'relation' field")
187 }
188
Akronb7e1f352025-05-16 15:45:23 +0200189 relation := ast.AndRelation
190 if strings.HasSuffix(raw.Relation, "or") {
191 relation = ast.OrRelation
Akron56e09e72025-05-22 15:38:35 +0200192 } else if !strings.HasSuffix(raw.Relation, "and") {
193 return nil, fmt.Errorf("invalid relation type '%s', must be one of: 'relation:and', 'relation:or'", raw.Relation)
Akronb7e1f352025-05-16 15:45:23 +0200194 }
195
196 return &ast.TermGroup{
197 Operands: operands,
198 Relation: relation,
Akron1a5fccd2025-05-27 09:54:09 +0200199 Rewrites: raw.Rewrites,
Akronb7e1f352025-05-16 15:45:23 +0200200 }, nil
201
202 case "koral:term":
Akron56e09e72025-05-22 15:38:35 +0200203 if raw.Key == "" {
204 return nil, fmt.Errorf("term must have a 'key' field")
205 }
206
Akronb7e1f352025-05-16 15:45:23 +0200207 match := ast.MatchEqual
Akron56e09e72025-05-22 15:38:35 +0200208 if raw.Match != "" {
209 if strings.HasSuffix(raw.Match, "ne") {
210 match = ast.MatchNotEqual
211 } else if !strings.HasSuffix(raw.Match, "eq") {
212 return nil, fmt.Errorf("invalid match type '%s', must be one of: 'match:eq', 'match:ne'", raw.Match)
213 }
Akronb7e1f352025-05-16 15:45:23 +0200214 }
215
216 return &ast.Term{
Akron1a5fccd2025-05-27 09:54:09 +0200217 Foundry: raw.Foundry,
218 Key: raw.Key,
219 Layer: raw.Layer,
220 Match: match,
221 Value: raw.Value,
222 Rewrites: raw.Rewrites,
Akronb7e1f352025-05-16 15:45:23 +0200223 }, nil
224
225 default:
Akron32958422025-05-16 16:33:05 +0200226 // Store the original JSON content
227 rawContent, err := json.Marshal(raw)
228 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200229 return nil, fmt.Errorf("failed to marshal unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200230 }
231
232 // Create a catchall node
233 catchall := &ast.CatchallNode{
234 NodeType: raw.Type,
235 RawContent: rawContent,
236 }
237
238 // Parse wrap if present
239 if raw.Wrap != nil {
240 var wrapRaw rawNode
241 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200242 return nil, fmt.Errorf("failed to parse 'wrap' field in unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200243 }
Akron56e09e72025-05-22 15:38:35 +0200244
Akron6b4c9eb2025-07-03 14:31:58 +0200245 wrap, err := parseNode(wrapRaw)
246 if err != nil {
247 return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200248 }
Akron6b4c9eb2025-07-03 14:31:58 +0200249 catchall.Wrap = wrap
Akron32958422025-05-16 16:33:05 +0200250 }
251
252 // Parse operands if present
253 if len(raw.Operands) > 0 {
254 operands := make([]ast.Node, len(raw.Operands))
255 for i, op := range raw.Operands {
Akron6b4c9eb2025-07-03 14:31:58 +0200256 node, err := parseNode(op)
257 if err != nil {
258 return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200259 }
Akron6b4c9eb2025-07-03 14:31:58 +0200260 operands[i] = node
Akron32958422025-05-16 16:33:05 +0200261 }
262 catchall.Operands = operands
263 }
264
265 return catchall, nil
Akronb7e1f352025-05-16 15:45:23 +0200266 }
267}
268
269// SerializeToJSON converts an AST node back to JSON
270func SerializeToJSON(node ast.Node) ([]byte, error) {
Akron87948e82025-05-26 18:19:51 +0200271 return json.MarshalIndent(nodeToRaw(node), "", " ")
Akronb7e1f352025-05-16 15:45:23 +0200272}
273
274// nodeToRaw converts an AST node to a raw node for JSON serialization
275func nodeToRaw(node ast.Node) rawNode {
276 switch n := node.(type) {
277 case *ast.Token:
Akron56e09e72025-05-22 15:38:35 +0200278 if n.Wrap == nil {
279 return rawNode{
280 Type: "koral:token",
281 }
282 }
Akronb7e1f352025-05-16 15:45:23 +0200283 return rawNode{
284 Type: "koral:token",
285 Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
286 }
287
288 case *ast.TermGroup:
289 operands := make([]rawNode, len(n.Operands))
290 for i, op := range n.Operands {
291 operands[i] = nodeToRaw(op)
292 }
293 return rawNode{
294 Type: "koral:termGroup",
295 Operands: operands,
296 Relation: "relation:" + string(n.Relation),
297 }
298
299 case *ast.Term:
Akron56e09e72025-05-22 15:38:35 +0200300 raw := rawNode{
301 Type: "koral:term",
302 Key: n.Key,
303 Match: "match:" + string(n.Match),
Akronb7e1f352025-05-16 15:45:23 +0200304 }
Akron56e09e72025-05-22 15:38:35 +0200305 if n.Foundry != "" {
306 raw.Foundry = n.Foundry
307 }
308 if n.Layer != "" {
309 raw.Layer = n.Layer
310 }
311 if n.Value != "" {
312 raw.Value = n.Value
313 }
314 return raw
Akronb7e1f352025-05-16 15:45:23 +0200315
Akron32958422025-05-16 16:33:05 +0200316 case *ast.CatchallNode:
Akron56e09e72025-05-22 15:38:35 +0200317 // For catchall nodes, use the stored raw content if available
Akron32958422025-05-16 16:33:05 +0200318 if n.RawContent != nil {
Akron56e09e72025-05-22 15:38:35 +0200319 var raw rawNode
320 if err := json.Unmarshal(n.RawContent, &raw); err == nil {
321 // Ensure we preserve the node type
322 raw.Type = n.NodeType
Akron32958422025-05-16 16:33:05 +0200323
Akron56e09e72025-05-22 15:38:35 +0200324 // Handle wrap and operands if present
Akron32958422025-05-16 16:33:05 +0200325 if n.Wrap != nil {
326 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
327 }
Akron56e09e72025-05-22 15:38:35 +0200328 if len(n.Operands) > 0 {
329 operands := make([]rawNode, len(n.Operands))
330 for i, op := range n.Operands {
331 operands[i] = nodeToRaw(op)
332 }
333 raw.Operands = operands
334 }
Akron32958422025-05-16 16:33:05 +0200335 return raw
336 }
Akron32958422025-05-16 16:33:05 +0200337 }
Akron32958422025-05-16 16:33:05 +0200338
Akron56e09e72025-05-22 15:38:35 +0200339 // If RawContent is nil or invalid, create a minimal raw node
340 raw := rawNode{
341 Type: n.NodeType,
342 }
343 if n.Wrap != nil {
344 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
345 }
346 if len(n.Operands) > 0 {
347 operands := make([]rawNode, len(n.Operands))
348 for i, op := range n.Operands {
349 operands[i] = nodeToRaw(op)
350 }
351 raw.Operands = operands
352 }
353 return raw
354 }
355
356 // Return a minimal raw node for unknown types
357 return rawNode{
358 Type: "koral:unknown",
Akronb7e1f352025-05-16 15:45:23 +0200359 }
360}
361
362// toJSON converts a raw node to JSON bytes
363func (r rawNode) toJSON() []byte {
Akron56e09e72025-05-22 15:38:35 +0200364 data, err := json.Marshal(r)
365 if err != nil {
366 // Return a minimal valid JSON object if marshaling fails
367 return []byte(`{"@type":"koral:unknown"}`)
368 }
Akronb7e1f352025-05-16 15:45:23 +0200369 return data
370}