Improve parser testing
diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go
index db20bc8..3730683 100644
--- a/pkg/parser/parser.go
+++ b/pkg/parser/parser.go
@@ -22,6 +22,78 @@
Layer string `json:"layer,omitempty"`
Match string `json:"match,omitempty"`
Value string `json:"value,omitempty"`
+ // Store any additional fields
+ Extra map[string]interface{} `json:"-"`
+}
+
+// UnmarshalJSON implements the json.Unmarshaler interface
+func (r *rawNode) UnmarshalJSON(data []byte) error {
+ // First unmarshal into a map to capture all fields
+ var raw map[string]interface{}
+ if err := json.Unmarshal(data, &raw); err != nil {
+ return err
+ }
+
+ // Create a temporary struct to unmarshal known fields
+ type tempNode rawNode
+ var temp tempNode
+ if err := json.Unmarshal(data, &temp); err != nil {
+ return err
+ }
+ *r = rawNode(temp)
+
+ // Store any fields not in the struct in Extra
+ r.Extra = make(map[string]interface{})
+ for k, v := range raw {
+ switch k {
+ case "@type", "wrap", "operands", "relation", "foundry", "key", "layer", "match", "value":
+ continue
+ default:
+ r.Extra[k] = v
+ }
+ }
+
+ return nil
+}
+
+// MarshalJSON implements the json.Marshaler interface
+func (r rawNode) MarshalJSON() ([]byte, error) {
+ // Create a map with all fields
+ raw := make(map[string]interface{})
+
+ // Add the known fields if they're not empty
+ raw["@type"] = r.Type
+ if r.Wrap != nil {
+ raw["wrap"] = r.Wrap
+ }
+ if len(r.Operands) > 0 {
+ raw["operands"] = r.Operands
+ }
+ if r.Relation != "" {
+ raw["relation"] = r.Relation
+ }
+ if r.Foundry != "" {
+ raw["foundry"] = r.Foundry
+ }
+ if r.Key != "" {
+ raw["key"] = r.Key
+ }
+ if r.Layer != "" {
+ raw["layer"] = r.Layer
+ }
+ if r.Match != "" {
+ raw["match"] = r.Match
+ }
+ if r.Value != "" {
+ raw["value"] = r.Value
+ }
+
+ // Add any extra fields
+ for k, v := range r.Extra {
+ raw[k] = v
+ }
+
+ return json.Marshal(raw)
}
// ParseJSON parses a JSON string into our AST representation
@@ -31,7 +103,7 @@
return nil, fmt.Errorf("failed to parse JSON: %w", err)
}
if raw.Type == "" {
- return nil, fmt.Errorf("missing @type field")
+ return nil, fmt.Errorf("missing required field '@type' in JSON")
}
return parseNode(raw)
}
@@ -41,31 +113,41 @@
switch raw.Type {
case "koral:token":
if raw.Wrap == nil {
- return nil, fmt.Errorf("token node missing wrap field")
+ return nil, fmt.Errorf("token node of type '%s' missing required 'wrap' field", raw.Type)
}
var wrapRaw rawNode
if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
- return nil, fmt.Errorf("failed to parse wrap: %w", err)
+ return nil, fmt.Errorf("failed to parse 'wrap' field in token node: %w", err)
}
wrap, err := parseNode(wrapRaw)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("error parsing wrapped node: %w", err)
}
return &ast.Token{Wrap: wrap}, nil
case "koral:termGroup":
+ if len(raw.Operands) == 0 {
+ return nil, fmt.Errorf("term group must have at least one operand")
+ }
+
operands := make([]ast.Node, len(raw.Operands))
for i, op := range raw.Operands {
node, err := parseNode(op)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("error parsing operand %d: %w", i+1, err)
}
operands[i] = node
}
+ if raw.Relation == "" {
+ return nil, fmt.Errorf("term group must have a 'relation' field")
+ }
+
relation := ast.AndRelation
if strings.HasSuffix(raw.Relation, "or") {
relation = ast.OrRelation
+ } else if !strings.HasSuffix(raw.Relation, "and") {
+ return nil, fmt.Errorf("invalid relation type '%s', must be one of: 'relation:and', 'relation:or'", raw.Relation)
}
return &ast.TermGroup{
@@ -74,9 +156,17 @@
}, nil
case "koral:term":
+ if raw.Key == "" {
+ return nil, fmt.Errorf("term must have a 'key' field")
+ }
+
match := ast.MatchEqual
- if strings.HasSuffix(raw.Match, "ne") {
- match = ast.MatchNotEqual
+ if raw.Match != "" {
+ if strings.HasSuffix(raw.Match, "ne") {
+ match = ast.MatchNotEqual
+ } else if !strings.HasSuffix(raw.Match, "eq") {
+ return nil, fmt.Errorf("invalid match type '%s', must be one of: 'match:eq', 'match:ne'", raw.Match)
+ }
}
return &ast.Term{
@@ -91,7 +181,7 @@
// Store the original JSON content
rawContent, err := json.Marshal(raw)
if err != nil {
- return nil, fmt.Errorf("failed to marshal unknown node: %w", err)
+ return nil, fmt.Errorf("failed to marshal unknown node type '%s': %w", raw.Type, err)
}
// Create a catchall node
@@ -104,24 +194,45 @@
if raw.Wrap != nil {
var wrapRaw rawNode
if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
- return nil, fmt.Errorf("failed to parse wrap in unknown node: %w", err)
+ return nil, fmt.Errorf("failed to parse 'wrap' field in unknown node type '%s': %w", raw.Type, err)
}
- wrap, err := parseNode(wrapRaw)
- if err != nil {
- return nil, err
+
+ // Check if the wrapped node is a known type
+ if wrapRaw.Type == "koral:term" || wrapRaw.Type == "koral:token" || wrapRaw.Type == "koral:termGroup" {
+ wrap, err := parseNode(wrapRaw)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
+ }
+ catchall.Wrap = wrap
+ } else {
+ // For unknown types, recursively parse
+ wrap, err := parseNode(wrapRaw)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
+ }
+ catchall.Wrap = wrap
}
- catchall.Wrap = wrap
}
// Parse operands if present
if len(raw.Operands) > 0 {
operands := make([]ast.Node, len(raw.Operands))
for i, op := range raw.Operands {
- node, err := parseNode(op)
- if err != nil {
- return nil, err
+ // Check if the operand is a known type
+ if op.Type == "koral:term" || op.Type == "koral:token" || op.Type == "koral:termGroup" {
+ node, err := parseNode(op)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
+ }
+ operands[i] = node
+ } else {
+ // For unknown types, recursively parse
+ node, err := parseNode(op)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
+ }
+ operands[i] = node
}
- operands[i] = node
}
catchall.Operands = operands
}
@@ -140,6 +251,11 @@
func nodeToRaw(node ast.Node) rawNode {
switch n := node.(type) {
case *ast.Token:
+ if n.Wrap == nil {
+ return rawNode{
+ Type: "koral:token",
+ }
+ }
return rawNode{
Type: "koral:token",
Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
@@ -157,54 +273,74 @@
}
case *ast.Term:
- return rawNode{
- Type: "koral:term",
- Foundry: n.Foundry,
- Key: n.Key,
- Layer: n.Layer,
- Match: "match:" + string(n.Match),
- Value: n.Value,
+ raw := rawNode{
+ Type: "koral:term",
+ Key: n.Key,
+ Match: "match:" + string(n.Match),
}
+ if n.Foundry != "" {
+ raw.Foundry = n.Foundry
+ }
+ if n.Layer != "" {
+ raw.Layer = n.Layer
+ }
+ if n.Value != "" {
+ raw.Value = n.Value
+ }
+ return raw
case *ast.CatchallNode:
- // For catchall nodes, use the stored raw content
+ // For catchall nodes, use the stored raw content if available
if n.RawContent != nil {
- // If we have operands or wrap that were modified, we need to update the raw content
- if len(n.Operands) > 0 || n.Wrap != nil {
- var raw rawNode
- if err := json.Unmarshal(n.RawContent, &raw); err != nil {
- return rawNode{}
- }
+ var raw rawNode
+ if err := json.Unmarshal(n.RawContent, &raw); err == nil {
+ // Ensure we preserve the node type
+ raw.Type = n.NodeType
- // Update operands if present
- if len(n.Operands) > 0 {
- raw.Operands = make([]rawNode, len(n.Operands))
- for i, op := range n.Operands {
- raw.Operands[i] = nodeToRaw(op)
- }
- }
-
- // Update wrap if present
+ // Handle wrap and operands if present
if n.Wrap != nil {
raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
}
-
+ if len(n.Operands) > 0 {
+ operands := make([]rawNode, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = nodeToRaw(op)
+ }
+ raw.Operands = operands
+ }
return raw
}
- // If no modifications, return the original content as is
- var raw rawNode
- _ = json.Unmarshal(n.RawContent, &raw)
- return raw
}
- return rawNode{}
- default:
- return rawNode{}
+ // If RawContent is nil or invalid, create a minimal raw node
+ raw := rawNode{
+ Type: n.NodeType,
+ }
+ if n.Wrap != nil {
+ raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
+ }
+ if len(n.Operands) > 0 {
+ operands := make([]rawNode, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = nodeToRaw(op)
+ }
+ raw.Operands = operands
+ }
+ return raw
+ }
+
+ // Return a minimal raw node for unknown types
+ return rawNode{
+ Type: "koral:unknown",
}
}
// toJSON converts a raw node to JSON bytes
func (r rawNode) toJSON() []byte {
- data, _ := json.Marshal(r)
+ data, err := json.Marshal(r)
+ if err != nil {
+ // Return a minimal valid JSON object if marshaling fails
+ return []byte(`{"@type":"koral:unknown"}`)
+ }
return data
}
diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go
index d2964ce..0a6ad27 100644
--- a/pkg/parser/parser_test.go
+++ b/pkg/parser/parser_test.go
@@ -9,6 +9,145 @@
"github.com/stretchr/testify/require"
)
+// normalizeJSON normalizes JSON by parsing and re-marshaling it
+func normalizeJSON(t *testing.T, data json.RawMessage) json.RawMessage {
+ var v interface{}
+ err := json.Unmarshal(data, &v)
+ require.NoError(t, err)
+
+ // Convert to canonical form (sorted keys, no whitespace)
+ normalized, err := json.Marshal(v)
+ require.NoError(t, err)
+ return normalized
+}
+
+// compareJSON compares two JSON strings for equality, ignoring whitespace and field order
+func compareJSON(t *testing.T, expected, actual string) bool {
+ // Parse both JSON strings
+ var expectedObj, actualObj interface{}
+ err := json.Unmarshal([]byte(expected), &expectedObj)
+ require.NoError(t, err, "Failed to parse expected JSON")
+ err = json.Unmarshal([]byte(actual), &actualObj)
+ require.NoError(t, err, "Failed to parse actual JSON")
+
+ // Convert both to canonical form
+ expectedBytes, err := json.Marshal(expectedObj)
+ require.NoError(t, err)
+ actualBytes, err := json.Marshal(actualObj)
+ require.NoError(t, err)
+
+ // Compare the canonical forms
+ return string(expectedBytes) == string(actualBytes)
+}
+
+// compareNodes compares two AST nodes, normalizing JSON content in CatchallNodes
+func compareNodes(t *testing.T, expected, actual ast.Node) bool {
+ // If both nodes are CatchallNodes, normalize their JSON content before comparison
+ if expectedCatchall, ok := expected.(*ast.CatchallNode); ok {
+ if actualCatchall, ok := actual.(*ast.CatchallNode); ok {
+ // Compare NodeType
+ if !assert.Equal(t, expectedCatchall.NodeType, actualCatchall.NodeType) {
+ t.Logf("NodeType mismatch: expected '%s', got '%s'", expectedCatchall.NodeType, actualCatchall.NodeType)
+ return false
+ }
+
+ // Normalize and compare RawContent
+ if expectedCatchall.RawContent != nil && actualCatchall.RawContent != nil {
+ expectedNorm := normalizeJSON(t, expectedCatchall.RawContent)
+ actualNorm := normalizeJSON(t, actualCatchall.RawContent)
+ if !assert.Equal(t, string(expectedNorm), string(actualNorm)) {
+ t.Logf("RawContent mismatch:\nExpected: %s\nActual: %s", expectedNorm, actualNorm)
+ return false
+ }
+ } else if !assert.Equal(t, expectedCatchall.RawContent == nil, actualCatchall.RawContent == nil) {
+ t.Log("One node has RawContent while the other doesn't")
+ return false
+ }
+
+ // Compare Operands
+ if !assert.Equal(t, len(expectedCatchall.Operands), len(actualCatchall.Operands)) {
+ t.Logf("Operands length mismatch: expected %d, got %d", len(expectedCatchall.Operands), len(actualCatchall.Operands))
+ return false
+ }
+ for i := range expectedCatchall.Operands {
+ if !compareNodes(t, expectedCatchall.Operands[i], actualCatchall.Operands[i]) {
+ t.Logf("Operand %d mismatch", i)
+ return false
+ }
+ }
+
+ // Compare Wrap
+ if expectedCatchall.Wrap != nil || actualCatchall.Wrap != nil {
+ if !assert.Equal(t, expectedCatchall.Wrap != nil, actualCatchall.Wrap != nil) {
+ t.Log("One node has Wrap while the other doesn't")
+ return false
+ }
+ if expectedCatchall.Wrap != nil {
+ if !compareNodes(t, expectedCatchall.Wrap, actualCatchall.Wrap) {
+ t.Log("Wrap node mismatch")
+ return false
+ }
+ }
+ }
+
+ return true
+ }
+ }
+
+ // For Token nodes, compare their Wrap fields using compareNodes
+ if expectedToken, ok := expected.(*ast.Token); ok {
+ if actualToken, ok := actual.(*ast.Token); ok {
+ if expectedToken.Wrap == nil || actualToken.Wrap == nil {
+ return assert.Equal(t, expectedToken.Wrap == nil, actualToken.Wrap == nil)
+ }
+ return compareNodes(t, expectedToken.Wrap, actualToken.Wrap)
+ }
+ }
+
+ // For TermGroup nodes, compare relation and operands
+ if expectedGroup, ok := expected.(*ast.TermGroup); ok {
+ if actualGroup, ok := actual.(*ast.TermGroup); ok {
+ if !assert.Equal(t, expectedGroup.Relation, actualGroup.Relation) {
+ t.Logf("Relation mismatch: expected '%s', got '%s'", expectedGroup.Relation, actualGroup.Relation)
+ return false
+ }
+ if !assert.Equal(t, len(expectedGroup.Operands), len(actualGroup.Operands)) {
+ t.Logf("Operands length mismatch: expected %d, got %d", len(expectedGroup.Operands), len(actualGroup.Operands))
+ return false
+ }
+ for i := range expectedGroup.Operands {
+ if !compareNodes(t, expectedGroup.Operands[i], actualGroup.Operands[i]) {
+ t.Logf("Operand %d mismatch", i)
+ return false
+ }
+ }
+ return true
+ }
+ }
+
+ // For Term nodes, compare all fields
+ if expectedTerm, ok := expected.(*ast.Term); ok {
+ if actualTerm, ok := actual.(*ast.Term); ok {
+ equal := assert.Equal(t, expectedTerm.Foundry, actualTerm.Foundry) &&
+ assert.Equal(t, expectedTerm.Key, actualTerm.Key) &&
+ assert.Equal(t, expectedTerm.Layer, actualTerm.Layer) &&
+ assert.Equal(t, expectedTerm.Match, actualTerm.Match) &&
+ assert.Equal(t, expectedTerm.Value, actualTerm.Value)
+ if !equal {
+ t.Logf("Term mismatch:\nExpected: %+v\nActual: %+v", expectedTerm, actualTerm)
+ }
+ return equal
+ }
+ }
+
+ // For other node types or mismatched types, use regular equality comparison
+ equal := assert.Equal(t, expected, actual)
+ if !equal {
+ t.Logf("Node type mismatch:\nExpected type: %T\nActual type: %T", expected, actual)
+ }
+ return equal
+}
+
func TestParseJSON(t *testing.T) {
tests := []struct {
name string
@@ -304,7 +443,7 @@
require.NoError(t, err)
// Compare JSON objects instead of raw strings to avoid whitespace issues
- var expected, actual interface{}
+ var expected, actual any
err = json.Unmarshal([]byte(tt.expected), &expected)
require.NoError(t, err)
err = json.Unmarshal(result, &actual)
@@ -407,3 +546,258 @@
require.NoError(t, err)
assert.Equal(t, expected, actual)
}
+
+func TestParseJSONEdgeCases(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected ast.Node
+ wantErr bool
+ }{
+ {
+ name: "Unknown node type",
+ input: `{
+ "@type": "koral:unknown",
+ "customField": "value",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:unknown",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:unknown",
+ "customField": "value",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }`),
+ Wrap: &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Unknown node with operands",
+ input: `{
+ "@type": "koral:unknown",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ },
+ {
+ "@type": "koral:term",
+ "key": "NOUN"
+ }
+ ]
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:unknown",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:unknown",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ },
+ {
+ "@type": "koral:term",
+ "key": "NOUN"
+ }
+ ]
+ }`),
+ Operands: []ast.Node{
+ &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Key: "NOUN",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Deeply nested unknown nodes",
+ input: `{
+ "@type": "koral:outer",
+ "wrap": {
+ "@type": "koral:middle",
+ "wrap": {
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }
+ }
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:outer",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:outer",
+ "wrap": {
+ "@type": "koral:middle",
+ "wrap": {
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }
+ }
+ }`),
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:middle",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:middle",
+ "wrap": {
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }
+ }`),
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:inner",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }`),
+ Wrap: &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Mixed known and unknown nodes",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:custom",
+ "customField": "value",
+ "operands": [
+ {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ ]
+ }
+ }`,
+ expected: &ast.Token{
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:custom",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:custom",
+ "customField": "value",
+ "operands": [
+ {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ ]
+ }`),
+ Operands: []ast.Node{
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Invalid match type",
+ input: `{
+ "@type": "koral:term",
+ "key": "DET",
+ "match": "match:invalid"
+ }`,
+ wantErr: true,
+ },
+ {
+ name: "Invalid relation type",
+ input: `{
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ ],
+ "relation": "relation:invalid"
+ }`,
+ wantErr: true,
+ },
+ {
+ name: "Empty operands in term group",
+ input: `{
+ "@type": "koral:termGroup",
+ "operands": [],
+ "relation": "relation:and"
+ }`,
+ wantErr: true,
+ },
+ {
+ name: "Null values in term",
+ input: `{
+ "@type": "koral:term",
+ "foundry": null,
+ "key": "DET",
+ "layer": null,
+ "match": null,
+ "value": null
+ }`,
+ expected: &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := ParseJSON([]byte(tt.input))
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ compareNodes(t, tt.expected, result)
+ })
+ }
+}