blob: db20bc844b4341164da578f74642a702530d167a [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
Akronbf5149c2025-05-20 15:53:41 +02003// parser is a function that takes a JSON string and returns an AST node.
4// It is used to parse a JSON string into an AST node.
5
Akronb7e1f352025-05-16 15:45:23 +02006import (
7 "encoding/json"
8 "fmt"
9 "strings"
10
11 "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
12)
13
14// rawNode represents the raw JSON structure
15type rawNode struct {
16 Type string `json:"@type"`
17 Wrap json.RawMessage `json:"wrap,omitempty"`
18 Operands []rawNode `json:"operands,omitempty"`
19 Relation string `json:"relation,omitempty"`
20 Foundry string `json:"foundry,omitempty"`
21 Key string `json:"key,omitempty"`
22 Layer string `json:"layer,omitempty"`
23 Match string `json:"match,omitempty"`
24 Value string `json:"value,omitempty"`
25}
26
27// ParseJSON parses a JSON string into our AST representation
28func ParseJSON(data []byte) (ast.Node, error) {
29 var raw rawNode
30 if err := json.Unmarshal(data, &raw); err != nil {
31 return nil, fmt.Errorf("failed to parse JSON: %w", err)
32 }
Akron32958422025-05-16 16:33:05 +020033 if raw.Type == "" {
34 return nil, fmt.Errorf("missing @type field")
35 }
Akronb7e1f352025-05-16 15:45:23 +020036 return parseNode(raw)
37}
38
39// parseNode converts a raw node into an AST node
40func parseNode(raw rawNode) (ast.Node, error) {
41 switch raw.Type {
42 case "koral:token":
43 if raw.Wrap == nil {
44 return nil, fmt.Errorf("token node missing wrap field")
45 }
46 var wrapRaw rawNode
47 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
48 return nil, fmt.Errorf("failed to parse wrap: %w", err)
49 }
50 wrap, err := parseNode(wrapRaw)
51 if err != nil {
52 return nil, err
53 }
54 return &ast.Token{Wrap: wrap}, nil
55
56 case "koral:termGroup":
57 operands := make([]ast.Node, len(raw.Operands))
58 for i, op := range raw.Operands {
59 node, err := parseNode(op)
60 if err != nil {
61 return nil, err
62 }
63 operands[i] = node
64 }
65
66 relation := ast.AndRelation
67 if strings.HasSuffix(raw.Relation, "or") {
68 relation = ast.OrRelation
69 }
70
71 return &ast.TermGroup{
72 Operands: operands,
73 Relation: relation,
74 }, nil
75
76 case "koral:term":
77 match := ast.MatchEqual
78 if strings.HasSuffix(raw.Match, "ne") {
79 match = ast.MatchNotEqual
80 }
81
82 return &ast.Term{
83 Foundry: raw.Foundry,
84 Key: raw.Key,
85 Layer: raw.Layer,
86 Match: match,
87 Value: raw.Value,
88 }, nil
89
90 default:
Akron32958422025-05-16 16:33:05 +020091 // Store the original JSON content
92 rawContent, err := json.Marshal(raw)
93 if err != nil {
94 return nil, fmt.Errorf("failed to marshal unknown node: %w", err)
95 }
96
97 // Create a catchall node
98 catchall := &ast.CatchallNode{
99 NodeType: raw.Type,
100 RawContent: rawContent,
101 }
102
103 // Parse wrap if present
104 if raw.Wrap != nil {
105 var wrapRaw rawNode
106 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
107 return nil, fmt.Errorf("failed to parse wrap in unknown node: %w", err)
108 }
109 wrap, err := parseNode(wrapRaw)
110 if err != nil {
111 return nil, err
112 }
113 catchall.Wrap = wrap
114 }
115
116 // Parse operands if present
117 if len(raw.Operands) > 0 {
118 operands := make([]ast.Node, len(raw.Operands))
119 for i, op := range raw.Operands {
120 node, err := parseNode(op)
121 if err != nil {
122 return nil, err
123 }
124 operands[i] = node
125 }
126 catchall.Operands = operands
127 }
128
129 return catchall, nil
Akronb7e1f352025-05-16 15:45:23 +0200130 }
131}
132
133// SerializeToJSON converts an AST node back to JSON
134func SerializeToJSON(node ast.Node) ([]byte, error) {
135 raw := nodeToRaw(node)
136 return json.MarshalIndent(raw, "", " ")
137}
138
139// nodeToRaw converts an AST node to a raw node for JSON serialization
140func nodeToRaw(node ast.Node) rawNode {
141 switch n := node.(type) {
142 case *ast.Token:
143 return rawNode{
144 Type: "koral:token",
145 Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
146 }
147
148 case *ast.TermGroup:
149 operands := make([]rawNode, len(n.Operands))
150 for i, op := range n.Operands {
151 operands[i] = nodeToRaw(op)
152 }
153 return rawNode{
154 Type: "koral:termGroup",
155 Operands: operands,
156 Relation: "relation:" + string(n.Relation),
157 }
158
159 case *ast.Term:
160 return rawNode{
161 Type: "koral:term",
162 Foundry: n.Foundry,
163 Key: n.Key,
164 Layer: n.Layer,
165 Match: "match:" + string(n.Match),
166 Value: n.Value,
167 }
168
Akron32958422025-05-16 16:33:05 +0200169 case *ast.CatchallNode:
170 // For catchall nodes, use the stored raw content
171 if n.RawContent != nil {
172 // If we have operands or wrap that were modified, we need to update the raw content
173 if len(n.Operands) > 0 || n.Wrap != nil {
174 var raw rawNode
175 if err := json.Unmarshal(n.RawContent, &raw); err != nil {
176 return rawNode{}
177 }
178
179 // Update operands if present
180 if len(n.Operands) > 0 {
181 raw.Operands = make([]rawNode, len(n.Operands))
182 for i, op := range n.Operands {
183 raw.Operands[i] = nodeToRaw(op)
184 }
185 }
186
187 // Update wrap if present
188 if n.Wrap != nil {
189 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
190 }
191
192 return raw
193 }
194 // If no modifications, return the original content as is
195 var raw rawNode
196 _ = json.Unmarshal(n.RawContent, &raw)
197 return raw
198 }
199 return rawNode{}
200
Akronb7e1f352025-05-16 15:45:23 +0200201 default:
202 return rawNode{}
203 }
204}
205
206// toJSON converts a raw node to JSON bytes
207func (r rawNode) toJSON() []byte {
208 data, _ := json.Marshal(r)
209 return data
210}