blob: e9b1102d2de4016c810783ecf7d2eab1b15eb2a3 [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
Akronbf5149c2025-05-20 15:53:41 +02003// parser is a function that takes a JSON string and returns an AST node.
4// It is used to parse a JSON string into an AST node.
5
Akronb7e1f352025-05-16 15:45:23 +02006import (
7 "encoding/json"
8 "fmt"
9 "strings"
10
Akron87948e82025-05-26 18:19:51 +020011 "maps"
12
Akronfa55bb22025-05-26 15:10:42 +020013 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akronb7e1f352025-05-16 15:45:23 +020014)
15
16// rawNode represents the raw JSON structure
17type rawNode struct {
18 Type string `json:"@type"`
19 Wrap json.RawMessage `json:"wrap,omitempty"`
20 Operands []rawNode `json:"operands,omitempty"`
21 Relation string `json:"relation,omitempty"`
22 Foundry string `json:"foundry,omitempty"`
23 Key string `json:"key,omitempty"`
24 Layer string `json:"layer,omitempty"`
25 Match string `json:"match,omitempty"`
26 Value string `json:"value,omitempty"`
Akron56e09e72025-05-22 15:38:35 +020027 // Store any additional fields
Akron87948e82025-05-26 18:19:51 +020028 Extra map[string]any `json:"-"`
Akron56e09e72025-05-22 15:38:35 +020029}
30
31// UnmarshalJSON implements the json.Unmarshaler interface
32func (r *rawNode) UnmarshalJSON(data []byte) error {
33 // First unmarshal into a map to capture all fields
Akron87948e82025-05-26 18:19:51 +020034 var raw map[string]any
Akron56e09e72025-05-22 15:38:35 +020035 if err := json.Unmarshal(data, &raw); err != nil {
36 return err
37 }
38
39 // Create a temporary struct to unmarshal known fields
40 type tempNode rawNode
41 var temp tempNode
42 if err := json.Unmarshal(data, &temp); err != nil {
43 return err
44 }
45 *r = rawNode(temp)
46
47 // Store any fields not in the struct in Extra
Akron87948e82025-05-26 18:19:51 +020048 r.Extra = make(map[string]any)
Akron56e09e72025-05-22 15:38:35 +020049 for k, v := range raw {
50 switch k {
51 case "@type", "wrap", "operands", "relation", "foundry", "key", "layer", "match", "value":
52 continue
53 default:
54 r.Extra[k] = v
55 }
56 }
57
58 return nil
59}
60
61// MarshalJSON implements the json.Marshaler interface
62func (r rawNode) MarshalJSON() ([]byte, error) {
63 // Create a map with all fields
Akron87948e82025-05-26 18:19:51 +020064 raw := make(map[string]any)
Akron56e09e72025-05-22 15:38:35 +020065
66 // Add the known fields if they're not empty
67 raw["@type"] = r.Type
68 if r.Wrap != nil {
69 raw["wrap"] = r.Wrap
70 }
71 if len(r.Operands) > 0 {
72 raw["operands"] = r.Operands
73 }
74 if r.Relation != "" {
75 raw["relation"] = r.Relation
76 }
77 if r.Foundry != "" {
78 raw["foundry"] = r.Foundry
79 }
80 if r.Key != "" {
81 raw["key"] = r.Key
82 }
83 if r.Layer != "" {
84 raw["layer"] = r.Layer
85 }
86 if r.Match != "" {
87 raw["match"] = r.Match
88 }
89 if r.Value != "" {
90 raw["value"] = r.Value
91 }
92
93 // Add any extra fields
Akron87948e82025-05-26 18:19:51 +020094 maps.Copy(raw, r.Extra)
Akron56e09e72025-05-22 15:38:35 +020095
96 return json.Marshal(raw)
Akronb7e1f352025-05-16 15:45:23 +020097}
98
99// ParseJSON parses a JSON string into our AST representation
100func ParseJSON(data []byte) (ast.Node, error) {
101 var raw rawNode
102 if err := json.Unmarshal(data, &raw); err != nil {
103 return nil, fmt.Errorf("failed to parse JSON: %w", err)
104 }
Akron32958422025-05-16 16:33:05 +0200105 if raw.Type == "" {
Akron56e09e72025-05-22 15:38:35 +0200106 return nil, fmt.Errorf("missing required field '@type' in JSON")
Akron32958422025-05-16 16:33:05 +0200107 }
Akronb7e1f352025-05-16 15:45:23 +0200108 return parseNode(raw)
109}
110
111// parseNode converts a raw node into an AST node
112func parseNode(raw rawNode) (ast.Node, error) {
113 switch raw.Type {
114 case "koral:token":
115 if raw.Wrap == nil {
Akron56e09e72025-05-22 15:38:35 +0200116 return nil, fmt.Errorf("token node of type '%s' missing required 'wrap' field", raw.Type)
Akronb7e1f352025-05-16 15:45:23 +0200117 }
118 var wrapRaw rawNode
119 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200120 return nil, fmt.Errorf("failed to parse 'wrap' field in token node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200121 }
122 wrap, err := parseNode(wrapRaw)
123 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200124 return nil, fmt.Errorf("error parsing wrapped node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200125 }
126 return &ast.Token{Wrap: wrap}, nil
127
128 case "koral:termGroup":
Akron56e09e72025-05-22 15:38:35 +0200129 if len(raw.Operands) == 0 {
130 return nil, fmt.Errorf("term group must have at least one operand")
131 }
132
Akronb7e1f352025-05-16 15:45:23 +0200133 operands := make([]ast.Node, len(raw.Operands))
134 for i, op := range raw.Operands {
135 node, err := parseNode(op)
136 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200137 return nil, fmt.Errorf("error parsing operand %d: %w", i+1, err)
Akronb7e1f352025-05-16 15:45:23 +0200138 }
139 operands[i] = node
140 }
141
Akron56e09e72025-05-22 15:38:35 +0200142 if raw.Relation == "" {
143 return nil, fmt.Errorf("term group must have a 'relation' field")
144 }
145
Akronb7e1f352025-05-16 15:45:23 +0200146 relation := ast.AndRelation
147 if strings.HasSuffix(raw.Relation, "or") {
148 relation = ast.OrRelation
Akron56e09e72025-05-22 15:38:35 +0200149 } else if !strings.HasSuffix(raw.Relation, "and") {
150 return nil, fmt.Errorf("invalid relation type '%s', must be one of: 'relation:and', 'relation:or'", raw.Relation)
Akronb7e1f352025-05-16 15:45:23 +0200151 }
152
153 return &ast.TermGroup{
154 Operands: operands,
155 Relation: relation,
156 }, nil
157
158 case "koral:term":
Akron56e09e72025-05-22 15:38:35 +0200159 if raw.Key == "" {
160 return nil, fmt.Errorf("term must have a 'key' field")
161 }
162
Akronb7e1f352025-05-16 15:45:23 +0200163 match := ast.MatchEqual
Akron56e09e72025-05-22 15:38:35 +0200164 if raw.Match != "" {
165 if strings.HasSuffix(raw.Match, "ne") {
166 match = ast.MatchNotEqual
167 } else if !strings.HasSuffix(raw.Match, "eq") {
168 return nil, fmt.Errorf("invalid match type '%s', must be one of: 'match:eq', 'match:ne'", raw.Match)
169 }
Akronb7e1f352025-05-16 15:45:23 +0200170 }
171
172 return &ast.Term{
173 Foundry: raw.Foundry,
174 Key: raw.Key,
175 Layer: raw.Layer,
176 Match: match,
177 Value: raw.Value,
178 }, nil
179
180 default:
Akron32958422025-05-16 16:33:05 +0200181 // Store the original JSON content
182 rawContent, err := json.Marshal(raw)
183 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200184 return nil, fmt.Errorf("failed to marshal unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200185 }
186
187 // Create a catchall node
188 catchall := &ast.CatchallNode{
189 NodeType: raw.Type,
190 RawContent: rawContent,
191 }
192
193 // Parse wrap if present
194 if raw.Wrap != nil {
195 var wrapRaw rawNode
196 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200197 return nil, fmt.Errorf("failed to parse 'wrap' field in unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200198 }
Akron56e09e72025-05-22 15:38:35 +0200199
200 // Check if the wrapped node is a known type
201 if wrapRaw.Type == "koral:term" || wrapRaw.Type == "koral:token" || wrapRaw.Type == "koral:termGroup" {
202 wrap, err := parseNode(wrapRaw)
203 if err != nil {
204 return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
205 }
206 catchall.Wrap = wrap
207 } else {
208 // For unknown types, recursively parse
209 wrap, err := parseNode(wrapRaw)
210 if err != nil {
211 return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
212 }
213 catchall.Wrap = wrap
Akron32958422025-05-16 16:33:05 +0200214 }
Akron32958422025-05-16 16:33:05 +0200215 }
216
217 // Parse operands if present
218 if len(raw.Operands) > 0 {
219 operands := make([]ast.Node, len(raw.Operands))
220 for i, op := range raw.Operands {
Akron56e09e72025-05-22 15:38:35 +0200221 // Check if the operand is a known type
222 if op.Type == "koral:term" || op.Type == "koral:token" || op.Type == "koral:termGroup" {
223 node, err := parseNode(op)
224 if err != nil {
225 return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
226 }
227 operands[i] = node
228 } else {
229 // For unknown types, recursively parse
230 node, err := parseNode(op)
231 if err != nil {
232 return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
233 }
234 operands[i] = node
Akron32958422025-05-16 16:33:05 +0200235 }
Akron32958422025-05-16 16:33:05 +0200236 }
237 catchall.Operands = operands
238 }
239
240 return catchall, nil
Akronb7e1f352025-05-16 15:45:23 +0200241 }
242}
243
244// SerializeToJSON converts an AST node back to JSON
245func SerializeToJSON(node ast.Node) ([]byte, error) {
Akron87948e82025-05-26 18:19:51 +0200246 return json.MarshalIndent(nodeToRaw(node), "", " ")
Akronb7e1f352025-05-16 15:45:23 +0200247}
248
249// nodeToRaw converts an AST node to a raw node for JSON serialization
250func nodeToRaw(node ast.Node) rawNode {
251 switch n := node.(type) {
252 case *ast.Token:
Akron56e09e72025-05-22 15:38:35 +0200253 if n.Wrap == nil {
254 return rawNode{
255 Type: "koral:token",
256 }
257 }
Akronb7e1f352025-05-16 15:45:23 +0200258 return rawNode{
259 Type: "koral:token",
260 Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
261 }
262
263 case *ast.TermGroup:
264 operands := make([]rawNode, len(n.Operands))
265 for i, op := range n.Operands {
266 operands[i] = nodeToRaw(op)
267 }
268 return rawNode{
269 Type: "koral:termGroup",
270 Operands: operands,
271 Relation: "relation:" + string(n.Relation),
272 }
273
274 case *ast.Term:
Akron56e09e72025-05-22 15:38:35 +0200275 raw := rawNode{
276 Type: "koral:term",
277 Key: n.Key,
278 Match: "match:" + string(n.Match),
Akronb7e1f352025-05-16 15:45:23 +0200279 }
Akron56e09e72025-05-22 15:38:35 +0200280 if n.Foundry != "" {
281 raw.Foundry = n.Foundry
282 }
283 if n.Layer != "" {
284 raw.Layer = n.Layer
285 }
286 if n.Value != "" {
287 raw.Value = n.Value
288 }
289 return raw
Akronb7e1f352025-05-16 15:45:23 +0200290
Akron32958422025-05-16 16:33:05 +0200291 case *ast.CatchallNode:
Akron56e09e72025-05-22 15:38:35 +0200292 // For catchall nodes, use the stored raw content if available
Akron32958422025-05-16 16:33:05 +0200293 if n.RawContent != nil {
Akron56e09e72025-05-22 15:38:35 +0200294 var raw rawNode
295 if err := json.Unmarshal(n.RawContent, &raw); err == nil {
296 // Ensure we preserve the node type
297 raw.Type = n.NodeType
Akron32958422025-05-16 16:33:05 +0200298
Akron56e09e72025-05-22 15:38:35 +0200299 // Handle wrap and operands if present
Akron32958422025-05-16 16:33:05 +0200300 if n.Wrap != nil {
301 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
302 }
Akron56e09e72025-05-22 15:38:35 +0200303 if len(n.Operands) > 0 {
304 operands := make([]rawNode, len(n.Operands))
305 for i, op := range n.Operands {
306 operands[i] = nodeToRaw(op)
307 }
308 raw.Operands = operands
309 }
Akron32958422025-05-16 16:33:05 +0200310 return raw
311 }
Akron32958422025-05-16 16:33:05 +0200312 }
Akron32958422025-05-16 16:33:05 +0200313
Akron56e09e72025-05-22 15:38:35 +0200314 // If RawContent is nil or invalid, create a minimal raw node
315 raw := rawNode{
316 Type: n.NodeType,
317 }
318 if n.Wrap != nil {
319 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
320 }
321 if len(n.Operands) > 0 {
322 operands := make([]rawNode, len(n.Operands))
323 for i, op := range n.Operands {
324 operands[i] = nodeToRaw(op)
325 }
326 raw.Operands = operands
327 }
328 return raw
329 }
330
331 // Return a minimal raw node for unknown types
332 return rawNode{
333 Type: "koral:unknown",
Akronb7e1f352025-05-16 15:45:23 +0200334 }
335}
336
337// toJSON converts a raw node to JSON bytes
338func (r rawNode) toJSON() []byte {
Akron56e09e72025-05-22 15:38:35 +0200339 data, err := json.Marshal(r)
340 if err != nil {
341 // Return a minimal valid JSON object if marshaling fails
342 return []byte(`{"@type":"koral:unknown"}`)
343 }
Akronb7e1f352025-05-16 15:45:23 +0200344 return data
345}