blob: 6c3c7505d9395b22ce74919df7884e793c13bc38 [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
Akronbf5149c2025-05-20 15:53:41 +02003// parser is a function that takes a JSON string and returns an AST node.
4// It is used to parse a JSON string into an AST node.
5
Akronb7e1f352025-05-16 15:45:23 +02006import (
7 "encoding/json"
8 "fmt"
9 "strings"
10
Akron87948e82025-05-26 18:19:51 +020011 "maps"
12
Akronfa55bb22025-05-26 15:10:42 +020013 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akronb7e1f352025-05-16 15:45:23 +020014)
15
16// rawNode represents the raw JSON structure
17type rawNode struct {
18 Type string `json:"@type"`
19 Wrap json.RawMessage `json:"wrap,omitempty"`
20 Operands []rawNode `json:"operands,omitempty"`
21 Relation string `json:"relation,omitempty"`
22 Foundry string `json:"foundry,omitempty"`
23 Key string `json:"key,omitempty"`
24 Layer string `json:"layer,omitempty"`
25 Match string `json:"match,omitempty"`
26 Value string `json:"value,omitempty"`
Akron1a5fccd2025-05-27 09:54:09 +020027 Rewrites []ast.Rewrite `json:"rewrites,omitempty"`
Akron56e09e72025-05-22 15:38:35 +020028 // Store any additional fields
Akron87948e82025-05-26 18:19:51 +020029 Extra map[string]any `json:"-"`
Akron56e09e72025-05-22 15:38:35 +020030}
31
32// UnmarshalJSON implements the json.Unmarshaler interface
33func (r *rawNode) UnmarshalJSON(data []byte) error {
34 // First unmarshal into a map to capture all fields
Akron87948e82025-05-26 18:19:51 +020035 var raw map[string]any
Akron56e09e72025-05-22 15:38:35 +020036 if err := json.Unmarshal(data, &raw); err != nil {
37 return err
38 }
39
40 // Create a temporary struct to unmarshal known fields
41 type tempNode rawNode
42 var temp tempNode
43 if err := json.Unmarshal(data, &temp); err != nil {
44 return err
45 }
46 *r = rawNode(temp)
47
48 // Store any fields not in the struct in Extra
Akron87948e82025-05-26 18:19:51 +020049 r.Extra = make(map[string]any)
Akron56e09e72025-05-22 15:38:35 +020050 for k, v := range raw {
51 switch k {
Akron1a5fccd2025-05-27 09:54:09 +020052 case "@type", "wrap", "operands", "relation", "foundry", "key", "layer", "match", "value", "rewrites":
Akron56e09e72025-05-22 15:38:35 +020053 continue
54 default:
55 r.Extra[k] = v
56 }
57 }
58
59 return nil
60}
61
62// MarshalJSON implements the json.Marshaler interface
63func (r rawNode) MarshalJSON() ([]byte, error) {
64 // Create a map with all fields
Akron87948e82025-05-26 18:19:51 +020065 raw := make(map[string]any)
Akron56e09e72025-05-22 15:38:35 +020066
67 // Add the known fields if they're not empty
68 raw["@type"] = r.Type
69 if r.Wrap != nil {
70 raw["wrap"] = r.Wrap
71 }
72 if len(r.Operands) > 0 {
73 raw["operands"] = r.Operands
74 }
75 if r.Relation != "" {
76 raw["relation"] = r.Relation
77 }
78 if r.Foundry != "" {
79 raw["foundry"] = r.Foundry
80 }
81 if r.Key != "" {
82 raw["key"] = r.Key
83 }
84 if r.Layer != "" {
85 raw["layer"] = r.Layer
86 }
87 if r.Match != "" {
88 raw["match"] = r.Match
89 }
90 if r.Value != "" {
91 raw["value"] = r.Value
92 }
Akron1a5fccd2025-05-27 09:54:09 +020093 if len(r.Rewrites) > 0 {
94 raw["rewrites"] = r.Rewrites
95 }
Akron56e09e72025-05-22 15:38:35 +020096
97 // Add any extra fields
Akron87948e82025-05-26 18:19:51 +020098 maps.Copy(raw, r.Extra)
Akron56e09e72025-05-22 15:38:35 +020099
100 return json.Marshal(raw)
Akronb7e1f352025-05-16 15:45:23 +0200101}
102
103// ParseJSON parses a JSON string into our AST representation
104func ParseJSON(data []byte) (ast.Node, error) {
105 var raw rawNode
106 if err := json.Unmarshal(data, &raw); err != nil {
107 return nil, fmt.Errorf("failed to parse JSON: %w", err)
108 }
Akron32958422025-05-16 16:33:05 +0200109 if raw.Type == "" {
Akron56e09e72025-05-22 15:38:35 +0200110 return nil, fmt.Errorf("missing required field '@type' in JSON")
Akron32958422025-05-16 16:33:05 +0200111 }
Akronb7e1f352025-05-16 15:45:23 +0200112 return parseNode(raw)
113}
114
115// parseNode converts a raw node into an AST node
116func parseNode(raw rawNode) (ast.Node, error) {
117 switch raw.Type {
118 case "koral:token":
119 if raw.Wrap == nil {
Akron56e09e72025-05-22 15:38:35 +0200120 return nil, fmt.Errorf("token node of type '%s' missing required 'wrap' field", raw.Type)
Akronb7e1f352025-05-16 15:45:23 +0200121 }
122 var wrapRaw rawNode
123 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200124 return nil, fmt.Errorf("failed to parse 'wrap' field in token node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200125 }
126 wrap, err := parseNode(wrapRaw)
127 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200128 return nil, fmt.Errorf("error parsing wrapped node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200129 }
Akron1a5fccd2025-05-27 09:54:09 +0200130 return &ast.Token{Wrap: wrap, Rewrites: raw.Rewrites}, nil
Akronb7e1f352025-05-16 15:45:23 +0200131
132 case "koral:termGroup":
Akron56e09e72025-05-22 15:38:35 +0200133 if len(raw.Operands) == 0 {
134 return nil, fmt.Errorf("term group must have at least one operand")
135 }
136
Akronb7e1f352025-05-16 15:45:23 +0200137 operands := make([]ast.Node, len(raw.Operands))
138 for i, op := range raw.Operands {
139 node, err := parseNode(op)
140 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200141 return nil, fmt.Errorf("error parsing operand %d: %w", i+1, err)
Akronb7e1f352025-05-16 15:45:23 +0200142 }
143 operands[i] = node
144 }
145
Akron56e09e72025-05-22 15:38:35 +0200146 if raw.Relation == "" {
147 return nil, fmt.Errorf("term group must have a 'relation' field")
148 }
149
Akronb7e1f352025-05-16 15:45:23 +0200150 relation := ast.AndRelation
151 if strings.HasSuffix(raw.Relation, "or") {
152 relation = ast.OrRelation
Akron56e09e72025-05-22 15:38:35 +0200153 } else if !strings.HasSuffix(raw.Relation, "and") {
154 return nil, fmt.Errorf("invalid relation type '%s', must be one of: 'relation:and', 'relation:or'", raw.Relation)
Akronb7e1f352025-05-16 15:45:23 +0200155 }
156
157 return &ast.TermGroup{
158 Operands: operands,
159 Relation: relation,
Akron1a5fccd2025-05-27 09:54:09 +0200160 Rewrites: raw.Rewrites,
Akronb7e1f352025-05-16 15:45:23 +0200161 }, nil
162
163 case "koral:term":
Akron56e09e72025-05-22 15:38:35 +0200164 if raw.Key == "" {
165 return nil, fmt.Errorf("term must have a 'key' field")
166 }
167
Akronb7e1f352025-05-16 15:45:23 +0200168 match := ast.MatchEqual
Akron56e09e72025-05-22 15:38:35 +0200169 if raw.Match != "" {
170 if strings.HasSuffix(raw.Match, "ne") {
171 match = ast.MatchNotEqual
172 } else if !strings.HasSuffix(raw.Match, "eq") {
173 return nil, fmt.Errorf("invalid match type '%s', must be one of: 'match:eq', 'match:ne'", raw.Match)
174 }
Akronb7e1f352025-05-16 15:45:23 +0200175 }
176
177 return &ast.Term{
Akron1a5fccd2025-05-27 09:54:09 +0200178 Foundry: raw.Foundry,
179 Key: raw.Key,
180 Layer: raw.Layer,
181 Match: match,
182 Value: raw.Value,
183 Rewrites: raw.Rewrites,
Akronb7e1f352025-05-16 15:45:23 +0200184 }, nil
185
186 default:
Akron32958422025-05-16 16:33:05 +0200187 // Store the original JSON content
188 rawContent, err := json.Marshal(raw)
189 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200190 return nil, fmt.Errorf("failed to marshal unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200191 }
192
193 // Create a catchall node
194 catchall := &ast.CatchallNode{
195 NodeType: raw.Type,
196 RawContent: rawContent,
197 }
198
199 // Parse wrap if present
200 if raw.Wrap != nil {
201 var wrapRaw rawNode
202 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200203 return nil, fmt.Errorf("failed to parse 'wrap' field in unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200204 }
Akron56e09e72025-05-22 15:38:35 +0200205
206 // Check if the wrapped node is a known type
207 if wrapRaw.Type == "koral:term" || wrapRaw.Type == "koral:token" || wrapRaw.Type == "koral:termGroup" {
208 wrap, err := parseNode(wrapRaw)
209 if err != nil {
210 return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
211 }
212 catchall.Wrap = wrap
213 } else {
214 // For unknown types, recursively parse
215 wrap, err := parseNode(wrapRaw)
216 if err != nil {
217 return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
218 }
219 catchall.Wrap = wrap
Akron32958422025-05-16 16:33:05 +0200220 }
Akron32958422025-05-16 16:33:05 +0200221 }
222
223 // Parse operands if present
224 if len(raw.Operands) > 0 {
225 operands := make([]ast.Node, len(raw.Operands))
226 for i, op := range raw.Operands {
Akron56e09e72025-05-22 15:38:35 +0200227 // Check if the operand is a known type
228 if op.Type == "koral:term" || op.Type == "koral:token" || op.Type == "koral:termGroup" {
229 node, err := parseNode(op)
230 if err != nil {
231 return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
232 }
233 operands[i] = node
234 } else {
235 // For unknown types, recursively parse
236 node, err := parseNode(op)
237 if err != nil {
238 return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
239 }
240 operands[i] = node
Akron32958422025-05-16 16:33:05 +0200241 }
Akron32958422025-05-16 16:33:05 +0200242 }
243 catchall.Operands = operands
244 }
245
246 return catchall, nil
Akronb7e1f352025-05-16 15:45:23 +0200247 }
248}
249
250// SerializeToJSON converts an AST node back to JSON
251func SerializeToJSON(node ast.Node) ([]byte, error) {
Akron87948e82025-05-26 18:19:51 +0200252 return json.MarshalIndent(nodeToRaw(node), "", " ")
Akronb7e1f352025-05-16 15:45:23 +0200253}
254
255// nodeToRaw converts an AST node to a raw node for JSON serialization
256func nodeToRaw(node ast.Node) rawNode {
257 switch n := node.(type) {
258 case *ast.Token:
Akron56e09e72025-05-22 15:38:35 +0200259 if n.Wrap == nil {
260 return rawNode{
261 Type: "koral:token",
262 }
263 }
Akronb7e1f352025-05-16 15:45:23 +0200264 return rawNode{
265 Type: "koral:token",
266 Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
267 }
268
269 case *ast.TermGroup:
270 operands := make([]rawNode, len(n.Operands))
271 for i, op := range n.Operands {
272 operands[i] = nodeToRaw(op)
273 }
274 return rawNode{
275 Type: "koral:termGroup",
276 Operands: operands,
277 Relation: "relation:" + string(n.Relation),
278 }
279
280 case *ast.Term:
Akron56e09e72025-05-22 15:38:35 +0200281 raw := rawNode{
282 Type: "koral:term",
283 Key: n.Key,
284 Match: "match:" + string(n.Match),
Akronb7e1f352025-05-16 15:45:23 +0200285 }
Akron56e09e72025-05-22 15:38:35 +0200286 if n.Foundry != "" {
287 raw.Foundry = n.Foundry
288 }
289 if n.Layer != "" {
290 raw.Layer = n.Layer
291 }
292 if n.Value != "" {
293 raw.Value = n.Value
294 }
295 return raw
Akronb7e1f352025-05-16 15:45:23 +0200296
Akron32958422025-05-16 16:33:05 +0200297 case *ast.CatchallNode:
Akron56e09e72025-05-22 15:38:35 +0200298 // For catchall nodes, use the stored raw content if available
Akron32958422025-05-16 16:33:05 +0200299 if n.RawContent != nil {
Akron56e09e72025-05-22 15:38:35 +0200300 var raw rawNode
301 if err := json.Unmarshal(n.RawContent, &raw); err == nil {
302 // Ensure we preserve the node type
303 raw.Type = n.NodeType
Akron32958422025-05-16 16:33:05 +0200304
Akron56e09e72025-05-22 15:38:35 +0200305 // Handle wrap and operands if present
Akron32958422025-05-16 16:33:05 +0200306 if n.Wrap != nil {
307 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
308 }
Akron56e09e72025-05-22 15:38:35 +0200309 if len(n.Operands) > 0 {
310 operands := make([]rawNode, len(n.Operands))
311 for i, op := range n.Operands {
312 operands[i] = nodeToRaw(op)
313 }
314 raw.Operands = operands
315 }
Akron32958422025-05-16 16:33:05 +0200316 return raw
317 }
Akron32958422025-05-16 16:33:05 +0200318 }
Akron32958422025-05-16 16:33:05 +0200319
Akron56e09e72025-05-22 15:38:35 +0200320 // If RawContent is nil or invalid, create a minimal raw node
321 raw := rawNode{
322 Type: n.NodeType,
323 }
324 if n.Wrap != nil {
325 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
326 }
327 if len(n.Operands) > 0 {
328 operands := make([]rawNode, len(n.Operands))
329 for i, op := range n.Operands {
330 operands[i] = nodeToRaw(op)
331 }
332 raw.Operands = operands
333 }
334 return raw
335 }
336
337 // Return a minimal raw node for unknown types
338 return rawNode{
339 Type: "koral:unknown",
Akronb7e1f352025-05-16 15:45:23 +0200340 }
341}
342
343// toJSON converts a raw node to JSON bytes
344func (r rawNode) toJSON() []byte {
Akron56e09e72025-05-22 15:38:35 +0200345 data, err := json.Marshal(r)
346 if err != nil {
347 // Return a minimal valid JSON object if marshaling fails
348 return []byte(`{"@type":"koral:unknown"}`)
349 }
Akronb7e1f352025-05-16 15:45:23 +0200350 return data
351}