blob: 4f8c3bcc1ab55ff4c6691fa7ddc6458d5d1784e8 [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
Akronbf5149c2025-05-20 15:53:41 +02003// parser is a function that takes a JSON string and returns an AST node.
4// It is used to parse a JSON string into an AST node.
5
Akronb7e1f352025-05-16 15:45:23 +02006import (
7 "encoding/json"
8 "fmt"
9 "strings"
10
Akronfa55bb22025-05-26 15:10:42 +020011 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akronb7e1f352025-05-16 15:45:23 +020012)
13
14// rawNode represents the raw JSON structure
15type rawNode struct {
16 Type string `json:"@type"`
17 Wrap json.RawMessage `json:"wrap,omitempty"`
18 Operands []rawNode `json:"operands,omitempty"`
19 Relation string `json:"relation,omitempty"`
20 Foundry string `json:"foundry,omitempty"`
21 Key string `json:"key,omitempty"`
22 Layer string `json:"layer,omitempty"`
23 Match string `json:"match,omitempty"`
24 Value string `json:"value,omitempty"`
Akron56e09e72025-05-22 15:38:35 +020025 // Store any additional fields
26 Extra map[string]interface{} `json:"-"`
27}
28
29// UnmarshalJSON implements the json.Unmarshaler interface
30func (r *rawNode) UnmarshalJSON(data []byte) error {
31 // First unmarshal into a map to capture all fields
32 var raw map[string]interface{}
33 if err := json.Unmarshal(data, &raw); err != nil {
34 return err
35 }
36
37 // Create a temporary struct to unmarshal known fields
38 type tempNode rawNode
39 var temp tempNode
40 if err := json.Unmarshal(data, &temp); err != nil {
41 return err
42 }
43 *r = rawNode(temp)
44
45 // Store any fields not in the struct in Extra
46 r.Extra = make(map[string]interface{})
47 for k, v := range raw {
48 switch k {
49 case "@type", "wrap", "operands", "relation", "foundry", "key", "layer", "match", "value":
50 continue
51 default:
52 r.Extra[k] = v
53 }
54 }
55
56 return nil
57}
58
59// MarshalJSON implements the json.Marshaler interface
60func (r rawNode) MarshalJSON() ([]byte, error) {
61 // Create a map with all fields
62 raw := make(map[string]interface{})
63
64 // Add the known fields if they're not empty
65 raw["@type"] = r.Type
66 if r.Wrap != nil {
67 raw["wrap"] = r.Wrap
68 }
69 if len(r.Operands) > 0 {
70 raw["operands"] = r.Operands
71 }
72 if r.Relation != "" {
73 raw["relation"] = r.Relation
74 }
75 if r.Foundry != "" {
76 raw["foundry"] = r.Foundry
77 }
78 if r.Key != "" {
79 raw["key"] = r.Key
80 }
81 if r.Layer != "" {
82 raw["layer"] = r.Layer
83 }
84 if r.Match != "" {
85 raw["match"] = r.Match
86 }
87 if r.Value != "" {
88 raw["value"] = r.Value
89 }
90
91 // Add any extra fields
92 for k, v := range r.Extra {
93 raw[k] = v
94 }
95
96 return json.Marshal(raw)
Akronb7e1f352025-05-16 15:45:23 +020097}
98
99// ParseJSON parses a JSON string into our AST representation
100func ParseJSON(data []byte) (ast.Node, error) {
101 var raw rawNode
102 if err := json.Unmarshal(data, &raw); err != nil {
103 return nil, fmt.Errorf("failed to parse JSON: %w", err)
104 }
Akron32958422025-05-16 16:33:05 +0200105 if raw.Type == "" {
Akron56e09e72025-05-22 15:38:35 +0200106 return nil, fmt.Errorf("missing required field '@type' in JSON")
Akron32958422025-05-16 16:33:05 +0200107 }
Akronb7e1f352025-05-16 15:45:23 +0200108 return parseNode(raw)
109}
110
111// parseNode converts a raw node into an AST node
112func parseNode(raw rawNode) (ast.Node, error) {
113 switch raw.Type {
114 case "koral:token":
115 if raw.Wrap == nil {
Akron56e09e72025-05-22 15:38:35 +0200116 return nil, fmt.Errorf("token node of type '%s' missing required 'wrap' field", raw.Type)
Akronb7e1f352025-05-16 15:45:23 +0200117 }
118 var wrapRaw rawNode
119 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200120 return nil, fmt.Errorf("failed to parse 'wrap' field in token node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200121 }
122 wrap, err := parseNode(wrapRaw)
123 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200124 return nil, fmt.Errorf("error parsing wrapped node: %w", err)
Akronb7e1f352025-05-16 15:45:23 +0200125 }
126 return &ast.Token{Wrap: wrap}, nil
127
128 case "koral:termGroup":
Akron56e09e72025-05-22 15:38:35 +0200129 if len(raw.Operands) == 0 {
130 return nil, fmt.Errorf("term group must have at least one operand")
131 }
132
Akronb7e1f352025-05-16 15:45:23 +0200133 operands := make([]ast.Node, len(raw.Operands))
134 for i, op := range raw.Operands {
135 node, err := parseNode(op)
136 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200137 return nil, fmt.Errorf("error parsing operand %d: %w", i+1, err)
Akronb7e1f352025-05-16 15:45:23 +0200138 }
139 operands[i] = node
140 }
141
Akron56e09e72025-05-22 15:38:35 +0200142 if raw.Relation == "" {
143 return nil, fmt.Errorf("term group must have a 'relation' field")
144 }
145
Akronb7e1f352025-05-16 15:45:23 +0200146 relation := ast.AndRelation
147 if strings.HasSuffix(raw.Relation, "or") {
148 relation = ast.OrRelation
Akron56e09e72025-05-22 15:38:35 +0200149 } else if !strings.HasSuffix(raw.Relation, "and") {
150 return nil, fmt.Errorf("invalid relation type '%s', must be one of: 'relation:and', 'relation:or'", raw.Relation)
Akronb7e1f352025-05-16 15:45:23 +0200151 }
152
153 return &ast.TermGroup{
154 Operands: operands,
155 Relation: relation,
156 }, nil
157
158 case "koral:term":
Akron56e09e72025-05-22 15:38:35 +0200159 if raw.Key == "" {
160 return nil, fmt.Errorf("term must have a 'key' field")
161 }
162
Akronb7e1f352025-05-16 15:45:23 +0200163 match := ast.MatchEqual
Akron56e09e72025-05-22 15:38:35 +0200164 if raw.Match != "" {
165 if strings.HasSuffix(raw.Match, "ne") {
166 match = ast.MatchNotEqual
167 } else if !strings.HasSuffix(raw.Match, "eq") {
168 return nil, fmt.Errorf("invalid match type '%s', must be one of: 'match:eq', 'match:ne'", raw.Match)
169 }
Akronb7e1f352025-05-16 15:45:23 +0200170 }
171
172 return &ast.Term{
173 Foundry: raw.Foundry,
174 Key: raw.Key,
175 Layer: raw.Layer,
176 Match: match,
177 Value: raw.Value,
178 }, nil
179
180 default:
Akron32958422025-05-16 16:33:05 +0200181 // Store the original JSON content
182 rawContent, err := json.Marshal(raw)
183 if err != nil {
Akron56e09e72025-05-22 15:38:35 +0200184 return nil, fmt.Errorf("failed to marshal unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200185 }
186
187 // Create a catchall node
188 catchall := &ast.CatchallNode{
189 NodeType: raw.Type,
190 RawContent: rawContent,
191 }
192
193 // Parse wrap if present
194 if raw.Wrap != nil {
195 var wrapRaw rawNode
196 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
Akron56e09e72025-05-22 15:38:35 +0200197 return nil, fmt.Errorf("failed to parse 'wrap' field in unknown node type '%s': %w", raw.Type, err)
Akron32958422025-05-16 16:33:05 +0200198 }
Akron56e09e72025-05-22 15:38:35 +0200199
200 // Check if the wrapped node is a known type
201 if wrapRaw.Type == "koral:term" || wrapRaw.Type == "koral:token" || wrapRaw.Type == "koral:termGroup" {
202 wrap, err := parseNode(wrapRaw)
203 if err != nil {
204 return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
205 }
206 catchall.Wrap = wrap
207 } else {
208 // For unknown types, recursively parse
209 wrap, err := parseNode(wrapRaw)
210 if err != nil {
211 return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
212 }
213 catchall.Wrap = wrap
Akron32958422025-05-16 16:33:05 +0200214 }
Akron32958422025-05-16 16:33:05 +0200215 }
216
217 // Parse operands if present
218 if len(raw.Operands) > 0 {
219 operands := make([]ast.Node, len(raw.Operands))
220 for i, op := range raw.Operands {
Akron56e09e72025-05-22 15:38:35 +0200221 // Check if the operand is a known type
222 if op.Type == "koral:term" || op.Type == "koral:token" || op.Type == "koral:termGroup" {
223 node, err := parseNode(op)
224 if err != nil {
225 return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
226 }
227 operands[i] = node
228 } else {
229 // For unknown types, recursively parse
230 node, err := parseNode(op)
231 if err != nil {
232 return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
233 }
234 operands[i] = node
Akron32958422025-05-16 16:33:05 +0200235 }
Akron32958422025-05-16 16:33:05 +0200236 }
237 catchall.Operands = operands
238 }
239
240 return catchall, nil
Akronb7e1f352025-05-16 15:45:23 +0200241 }
242}
243
244// SerializeToJSON converts an AST node back to JSON
245func SerializeToJSON(node ast.Node) ([]byte, error) {
246 raw := nodeToRaw(node)
247 return json.MarshalIndent(raw, "", " ")
248}
249
250// nodeToRaw converts an AST node to a raw node for JSON serialization
251func nodeToRaw(node ast.Node) rawNode {
252 switch n := node.(type) {
253 case *ast.Token:
Akron56e09e72025-05-22 15:38:35 +0200254 if n.Wrap == nil {
255 return rawNode{
256 Type: "koral:token",
257 }
258 }
Akronb7e1f352025-05-16 15:45:23 +0200259 return rawNode{
260 Type: "koral:token",
261 Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
262 }
263
264 case *ast.TermGroup:
265 operands := make([]rawNode, len(n.Operands))
266 for i, op := range n.Operands {
267 operands[i] = nodeToRaw(op)
268 }
269 return rawNode{
270 Type: "koral:termGroup",
271 Operands: operands,
272 Relation: "relation:" + string(n.Relation),
273 }
274
275 case *ast.Term:
Akron56e09e72025-05-22 15:38:35 +0200276 raw := rawNode{
277 Type: "koral:term",
278 Key: n.Key,
279 Match: "match:" + string(n.Match),
Akronb7e1f352025-05-16 15:45:23 +0200280 }
Akron56e09e72025-05-22 15:38:35 +0200281 if n.Foundry != "" {
282 raw.Foundry = n.Foundry
283 }
284 if n.Layer != "" {
285 raw.Layer = n.Layer
286 }
287 if n.Value != "" {
288 raw.Value = n.Value
289 }
290 return raw
Akronb7e1f352025-05-16 15:45:23 +0200291
Akron32958422025-05-16 16:33:05 +0200292 case *ast.CatchallNode:
Akron56e09e72025-05-22 15:38:35 +0200293 // For catchall nodes, use the stored raw content if available
Akron32958422025-05-16 16:33:05 +0200294 if n.RawContent != nil {
Akron56e09e72025-05-22 15:38:35 +0200295 var raw rawNode
296 if err := json.Unmarshal(n.RawContent, &raw); err == nil {
297 // Ensure we preserve the node type
298 raw.Type = n.NodeType
Akron32958422025-05-16 16:33:05 +0200299
Akron56e09e72025-05-22 15:38:35 +0200300 // Handle wrap and operands if present
Akron32958422025-05-16 16:33:05 +0200301 if n.Wrap != nil {
302 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
303 }
Akron56e09e72025-05-22 15:38:35 +0200304 if len(n.Operands) > 0 {
305 operands := make([]rawNode, len(n.Operands))
306 for i, op := range n.Operands {
307 operands[i] = nodeToRaw(op)
308 }
309 raw.Operands = operands
310 }
Akron32958422025-05-16 16:33:05 +0200311 return raw
312 }
Akron32958422025-05-16 16:33:05 +0200313 }
Akron32958422025-05-16 16:33:05 +0200314
Akron56e09e72025-05-22 15:38:35 +0200315 // If RawContent is nil or invalid, create a minimal raw node
316 raw := rawNode{
317 Type: n.NodeType,
318 }
319 if n.Wrap != nil {
320 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
321 }
322 if len(n.Operands) > 0 {
323 operands := make([]rawNode, len(n.Operands))
324 for i, op := range n.Operands {
325 operands[i] = nodeToRaw(op)
326 }
327 raw.Operands = operands
328 }
329 return raw
330 }
331
332 // Return a minimal raw node for unknown types
333 return rawNode{
334 Type: "koral:unknown",
Akronb7e1f352025-05-16 15:45:23 +0200335 }
336}
337
338// toJSON converts a raw node to JSON bytes
339func (r rawNode) toJSON() []byte {
Akron56e09e72025-05-22 15:38:35 +0200340 data, err := json.Marshal(r)
341 if err != nil {
342 // Return a minimal valid JSON object if marshaling fails
343 return []byte(`{"@type":"koral:unknown"}`)
344 }
Akronb7e1f352025-05-16 15:45:23 +0200345 return data
346}