blob: d0ff222f31100c5fd937437ac43c8c15dd145d11 [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
3import (
4 "encoding/json"
5 "fmt"
6 "strings"
7
8 "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
9)
10
11// rawNode represents the raw JSON structure
12type rawNode struct {
13 Type string `json:"@type"`
14 Wrap json.RawMessage `json:"wrap,omitempty"`
15 Operands []rawNode `json:"operands,omitempty"`
16 Relation string `json:"relation,omitempty"`
17 Foundry string `json:"foundry,omitempty"`
18 Key string `json:"key,omitempty"`
19 Layer string `json:"layer,omitempty"`
20 Match string `json:"match,omitempty"`
21 Value string `json:"value,omitempty"`
22}
23
24// ParseJSON parses a JSON string into our AST representation
25func ParseJSON(data []byte) (ast.Node, error) {
26 var raw rawNode
27 if err := json.Unmarshal(data, &raw); err != nil {
28 return nil, fmt.Errorf("failed to parse JSON: %w", err)
29 }
Akron32958422025-05-16 16:33:05 +020030 if raw.Type == "" {
31 return nil, fmt.Errorf("missing @type field")
32 }
Akronb7e1f352025-05-16 15:45:23 +020033 return parseNode(raw)
34}
35
36// parseNode converts a raw node into an AST node
37func parseNode(raw rawNode) (ast.Node, error) {
38 switch raw.Type {
39 case "koral:token":
40 if raw.Wrap == nil {
41 return nil, fmt.Errorf("token node missing wrap field")
42 }
43 var wrapRaw rawNode
44 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
45 return nil, fmt.Errorf("failed to parse wrap: %w", err)
46 }
47 wrap, err := parseNode(wrapRaw)
48 if err != nil {
49 return nil, err
50 }
51 return &ast.Token{Wrap: wrap}, nil
52
53 case "koral:termGroup":
54 operands := make([]ast.Node, len(raw.Operands))
55 for i, op := range raw.Operands {
56 node, err := parseNode(op)
57 if err != nil {
58 return nil, err
59 }
60 operands[i] = node
61 }
62
63 relation := ast.AndRelation
64 if strings.HasSuffix(raw.Relation, "or") {
65 relation = ast.OrRelation
66 }
67
68 return &ast.TermGroup{
69 Operands: operands,
70 Relation: relation,
71 }, nil
72
73 case "koral:term":
74 match := ast.MatchEqual
75 if strings.HasSuffix(raw.Match, "ne") {
76 match = ast.MatchNotEqual
77 }
78
79 return &ast.Term{
80 Foundry: raw.Foundry,
81 Key: raw.Key,
82 Layer: raw.Layer,
83 Match: match,
84 Value: raw.Value,
85 }, nil
86
87 default:
Akron32958422025-05-16 16:33:05 +020088 // Store the original JSON content
89 rawContent, err := json.Marshal(raw)
90 if err != nil {
91 return nil, fmt.Errorf("failed to marshal unknown node: %w", err)
92 }
93
94 // Create a catchall node
95 catchall := &ast.CatchallNode{
96 NodeType: raw.Type,
97 RawContent: rawContent,
98 }
99
100 // Parse wrap if present
101 if raw.Wrap != nil {
102 var wrapRaw rawNode
103 if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
104 return nil, fmt.Errorf("failed to parse wrap in unknown node: %w", err)
105 }
106 wrap, err := parseNode(wrapRaw)
107 if err != nil {
108 return nil, err
109 }
110 catchall.Wrap = wrap
111 }
112
113 // Parse operands if present
114 if len(raw.Operands) > 0 {
115 operands := make([]ast.Node, len(raw.Operands))
116 for i, op := range raw.Operands {
117 node, err := parseNode(op)
118 if err != nil {
119 return nil, err
120 }
121 operands[i] = node
122 }
123 catchall.Operands = operands
124 }
125
126 return catchall, nil
Akronb7e1f352025-05-16 15:45:23 +0200127 }
128}
129
130// SerializeToJSON converts an AST node back to JSON
131func SerializeToJSON(node ast.Node) ([]byte, error) {
132 raw := nodeToRaw(node)
133 return json.MarshalIndent(raw, "", " ")
134}
135
136// nodeToRaw converts an AST node to a raw node for JSON serialization
137func nodeToRaw(node ast.Node) rawNode {
138 switch n := node.(type) {
139 case *ast.Token:
140 return rawNode{
141 Type: "koral:token",
142 Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
143 }
144
145 case *ast.TermGroup:
146 operands := make([]rawNode, len(n.Operands))
147 for i, op := range n.Operands {
148 operands[i] = nodeToRaw(op)
149 }
150 return rawNode{
151 Type: "koral:termGroup",
152 Operands: operands,
153 Relation: "relation:" + string(n.Relation),
154 }
155
156 case *ast.Term:
157 return rawNode{
158 Type: "koral:term",
159 Foundry: n.Foundry,
160 Key: n.Key,
161 Layer: n.Layer,
162 Match: "match:" + string(n.Match),
163 Value: n.Value,
164 }
165
Akron32958422025-05-16 16:33:05 +0200166 case *ast.CatchallNode:
167 // For catchall nodes, use the stored raw content
168 if n.RawContent != nil {
169 // If we have operands or wrap that were modified, we need to update the raw content
170 if len(n.Operands) > 0 || n.Wrap != nil {
171 var raw rawNode
172 if err := json.Unmarshal(n.RawContent, &raw); err != nil {
173 return rawNode{}
174 }
175
176 // Update operands if present
177 if len(n.Operands) > 0 {
178 raw.Operands = make([]rawNode, len(n.Operands))
179 for i, op := range n.Operands {
180 raw.Operands[i] = nodeToRaw(op)
181 }
182 }
183
184 // Update wrap if present
185 if n.Wrap != nil {
186 raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
187 }
188
189 return raw
190 }
191 // If no modifications, return the original content as is
192 var raw rawNode
193 _ = json.Unmarshal(n.RawContent, &raw)
194 return raw
195 }
196 return rawNode{}
197
Akronb7e1f352025-05-16 15:45:23 +0200198 default:
199 return rawNode{}
200 }
201}
202
203// toJSON converts a raw node to JSON bytes
204func (r rawNode) toJSON() []byte {
205 data, _ := json.Marshal(r)
206 return data
207}