Test new approach with AST (AI assisted)
diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go
new file mode 100644
index 0000000..be11da9
--- /dev/null
+++ b/pkg/parser/parser.go
@@ -0,0 +1,134 @@
+package parser
+
+import (
+ "encoding/json"
+ "fmt"
+ "strings"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
+)
+
+// rawNode represents the raw JSON structure
+type rawNode struct {
+ Type string `json:"@type"`
+ Wrap json.RawMessage `json:"wrap,omitempty"`
+ Operands []rawNode `json:"operands,omitempty"`
+ Relation string `json:"relation,omitempty"`
+ Foundry string `json:"foundry,omitempty"`
+ Key string `json:"key,omitempty"`
+ Layer string `json:"layer,omitempty"`
+ Match string `json:"match,omitempty"`
+ Value string `json:"value,omitempty"`
+}
+
+// ParseJSON parses a JSON string into our AST representation
+func ParseJSON(data []byte) (ast.Node, error) {
+ var raw rawNode
+ if err := json.Unmarshal(data, &raw); err != nil {
+ return nil, fmt.Errorf("failed to parse JSON: %w", err)
+ }
+ return parseNode(raw)
+}
+
+// parseNode converts a raw node into an AST node
+func parseNode(raw rawNode) (ast.Node, error) {
+ switch raw.Type {
+ case "koral:token":
+ if raw.Wrap == nil {
+ return nil, fmt.Errorf("token node missing wrap field")
+ }
+ var wrapRaw rawNode
+ if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
+ return nil, fmt.Errorf("failed to parse wrap: %w", err)
+ }
+ wrap, err := parseNode(wrapRaw)
+ if err != nil {
+ return nil, err
+ }
+ return &ast.Token{Wrap: wrap}, nil
+
+ case "koral:termGroup":
+ operands := make([]ast.Node, len(raw.Operands))
+ for i, op := range raw.Operands {
+ node, err := parseNode(op)
+ if err != nil {
+ return nil, err
+ }
+ operands[i] = node
+ }
+
+ relation := ast.AndRelation
+ if strings.HasSuffix(raw.Relation, "or") {
+ relation = ast.OrRelation
+ }
+
+ return &ast.TermGroup{
+ Operands: operands,
+ Relation: relation,
+ }, nil
+
+ case "koral:term":
+ match := ast.MatchEqual
+ if strings.HasSuffix(raw.Match, "ne") {
+ match = ast.MatchNotEqual
+ }
+
+ return &ast.Term{
+ Foundry: raw.Foundry,
+ Key: raw.Key,
+ Layer: raw.Layer,
+ Match: match,
+ Value: raw.Value,
+ }, nil
+
+ default:
+ return nil, fmt.Errorf("unknown node type: %s", raw.Type)
+ }
+}
+
+// SerializeToJSON converts an AST node back to JSON
+func SerializeToJSON(node ast.Node) ([]byte, error) {
+ raw := nodeToRaw(node)
+ return json.MarshalIndent(raw, "", " ")
+}
+
+// nodeToRaw converts an AST node to a raw node for JSON serialization
+func nodeToRaw(node ast.Node) rawNode {
+ switch n := node.(type) {
+ case *ast.Token:
+ return rawNode{
+ Type: "koral:token",
+ Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
+ }
+
+ case *ast.TermGroup:
+ operands := make([]rawNode, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = nodeToRaw(op)
+ }
+ return rawNode{
+ Type: "koral:termGroup",
+ Operands: operands,
+ Relation: "relation:" + string(n.Relation),
+ }
+
+ case *ast.Term:
+ return rawNode{
+ Type: "koral:term",
+ Foundry: n.Foundry,
+ Key: n.Key,
+ Layer: n.Layer,
+ Match: "match:" + string(n.Match),
+ Value: n.Value,
+ }
+
+ default:
+ return rawNode{}
+ }
+}
+
+// toJSON converts a raw node to JSON bytes
+func (r rawNode) toJSON() []byte {
+ data, _ := json.Marshal(r)
+ return data
+}
diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go
new file mode 100644
index 0000000..464c497
--- /dev/null
+++ b/pkg/parser/parser_test.go
@@ -0,0 +1,340 @@
+package parser
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestParseJSON(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected ast.Node
+ wantErr bool
+ }{
+ {
+ name: "Parse simple term",
+ input: `{
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }`,
+ expected: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ wantErr: false,
+ },
+ {
+ name: "Parse term group with AND relation",
+ input: `{
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+ }`,
+ expected: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ wantErr: false,
+ },
+ {
+ name: "Parse token with wrapped term",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`,
+ expected: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Parse complex nested structure",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PronType",
+ "layer": "m",
+ "match": "match:ne",
+ "value": "Neg"
+ }
+ ],
+ "relation": "relation:or"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`,
+ expected: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "m",
+ Match: ast.MatchNotEqual,
+ Value: "Neg",
+ },
+ },
+ Relation: ast.OrRelation,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Invalid JSON",
+ input: `{"invalid": json`,
+ wantErr: true,
+ },
+ {
+ name: "Empty JSON",
+ input: `{}`,
+ wantErr: true,
+ },
+ {
+ name: "Invalid node type",
+ input: `{
+ "@type": "koral:unknown",
+ "key": "value"
+ }`,
+ wantErr: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := ParseJSON([]byte(tt.input))
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+
+ require.NoError(t, err)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestSerializeToJSON(t *testing.T) {
+ tests := []struct {
+ name string
+ input ast.Node
+ expected string
+ wantErr bool
+ }{
+ {
+ name: "Serialize simple term",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: `{
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+}`,
+ wantErr: false,
+ },
+ {
+ name: "Serialize term group",
+ input: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ expected: `{
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+}`,
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := SerializeToJSON(tt.input)
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+
+ require.NoError(t, err)
+ // Compare JSON objects instead of raw strings to avoid whitespace issues
+ var expected, actual interface{}
+ err = json.Unmarshal([]byte(tt.expected), &expected)
+ require.NoError(t, err)
+ err = json.Unmarshal(result, &actual)
+ require.NoError(t, err)
+ assert.Equal(t, expected, actual)
+ })
+ }
+}
+
+func TestRoundTrip(t *testing.T) {
+ // Test that parsing and then serializing produces equivalent JSON
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+
+ // Parse JSON to AST
+ node, err := ParseJSON([]byte(input))
+ require.NoError(t, err)
+
+ // Serialize AST back to JSON
+ output, err := SerializeToJSON(node)
+ require.NoError(t, err)
+
+ // Compare JSON objects
+ var expected, actual interface{}
+ err = json.Unmarshal([]byte(input), &expected)
+ require.NoError(t, err)
+ err = json.Unmarshal(output, &actual)
+ require.NoError(t, err)
+ assert.Equal(t, expected, actual)
+}