Improve parser testing
diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go
index d2964ce..0a6ad27 100644
--- a/pkg/parser/parser_test.go
+++ b/pkg/parser/parser_test.go
@@ -9,6 +9,145 @@
"github.com/stretchr/testify/require"
)
+// normalizeJSON normalizes JSON by parsing and re-marshaling it
+func normalizeJSON(t *testing.T, data json.RawMessage) json.RawMessage {
+ var v interface{}
+ err := json.Unmarshal(data, &v)
+ require.NoError(t, err)
+
+ // Convert to canonical form (sorted keys, no whitespace)
+ normalized, err := json.Marshal(v)
+ require.NoError(t, err)
+ return normalized
+}
+
+// compareJSON compares two JSON strings for equality, ignoring whitespace and field order
+func compareJSON(t *testing.T, expected, actual string) bool {
+ // Parse both JSON strings
+ var expectedObj, actualObj interface{}
+ err := json.Unmarshal([]byte(expected), &expectedObj)
+ require.NoError(t, err, "Failed to parse expected JSON")
+ err = json.Unmarshal([]byte(actual), &actualObj)
+ require.NoError(t, err, "Failed to parse actual JSON")
+
+ // Convert both to canonical form
+ expectedBytes, err := json.Marshal(expectedObj)
+ require.NoError(t, err)
+ actualBytes, err := json.Marshal(actualObj)
+ require.NoError(t, err)
+
+ // Compare the canonical forms
+ return string(expectedBytes) == string(actualBytes)
+}
+
+// compareNodes compares two AST nodes, normalizing JSON content in CatchallNodes
+func compareNodes(t *testing.T, expected, actual ast.Node) bool {
+ // If both nodes are CatchallNodes, normalize their JSON content before comparison
+ if expectedCatchall, ok := expected.(*ast.CatchallNode); ok {
+ if actualCatchall, ok := actual.(*ast.CatchallNode); ok {
+ // Compare NodeType
+ if !assert.Equal(t, expectedCatchall.NodeType, actualCatchall.NodeType) {
+ t.Logf("NodeType mismatch: expected '%s', got '%s'", expectedCatchall.NodeType, actualCatchall.NodeType)
+ return false
+ }
+
+ // Normalize and compare RawContent
+ if expectedCatchall.RawContent != nil && actualCatchall.RawContent != nil {
+ expectedNorm := normalizeJSON(t, expectedCatchall.RawContent)
+ actualNorm := normalizeJSON(t, actualCatchall.RawContent)
+ if !assert.Equal(t, string(expectedNorm), string(actualNorm)) {
+ t.Logf("RawContent mismatch:\nExpected: %s\nActual: %s", expectedNorm, actualNorm)
+ return false
+ }
+ } else if !assert.Equal(t, expectedCatchall.RawContent == nil, actualCatchall.RawContent == nil) {
+ t.Log("One node has RawContent while the other doesn't")
+ return false
+ }
+
+ // Compare Operands
+ if !assert.Equal(t, len(expectedCatchall.Operands), len(actualCatchall.Operands)) {
+ t.Logf("Operands length mismatch: expected %d, got %d", len(expectedCatchall.Operands), len(actualCatchall.Operands))
+ return false
+ }
+ for i := range expectedCatchall.Operands {
+ if !compareNodes(t, expectedCatchall.Operands[i], actualCatchall.Operands[i]) {
+ t.Logf("Operand %d mismatch", i)
+ return false
+ }
+ }
+
+ // Compare Wrap
+ if expectedCatchall.Wrap != nil || actualCatchall.Wrap != nil {
+ if !assert.Equal(t, expectedCatchall.Wrap != nil, actualCatchall.Wrap != nil) {
+ t.Log("One node has Wrap while the other doesn't")
+ return false
+ }
+ if expectedCatchall.Wrap != nil {
+ if !compareNodes(t, expectedCatchall.Wrap, actualCatchall.Wrap) {
+ t.Log("Wrap node mismatch")
+ return false
+ }
+ }
+ }
+
+ return true
+ }
+ }
+
+ // For Token nodes, compare their Wrap fields using compareNodes
+ if expectedToken, ok := expected.(*ast.Token); ok {
+ if actualToken, ok := actual.(*ast.Token); ok {
+ if expectedToken.Wrap == nil || actualToken.Wrap == nil {
+ return assert.Equal(t, expectedToken.Wrap == nil, actualToken.Wrap == nil)
+ }
+ return compareNodes(t, expectedToken.Wrap, actualToken.Wrap)
+ }
+ }
+
+ // For TermGroup nodes, compare relation and operands
+ if expectedGroup, ok := expected.(*ast.TermGroup); ok {
+ if actualGroup, ok := actual.(*ast.TermGroup); ok {
+ if !assert.Equal(t, expectedGroup.Relation, actualGroup.Relation) {
+ t.Logf("Relation mismatch: expected '%s', got '%s'", expectedGroup.Relation, actualGroup.Relation)
+ return false
+ }
+ if !assert.Equal(t, len(expectedGroup.Operands), len(actualGroup.Operands)) {
+ t.Logf("Operands length mismatch: expected %d, got %d", len(expectedGroup.Operands), len(actualGroup.Operands))
+ return false
+ }
+ for i := range expectedGroup.Operands {
+ if !compareNodes(t, expectedGroup.Operands[i], actualGroup.Operands[i]) {
+ t.Logf("Operand %d mismatch", i)
+ return false
+ }
+ }
+ return true
+ }
+ }
+
+ // For Term nodes, compare all fields
+ if expectedTerm, ok := expected.(*ast.Term); ok {
+ if actualTerm, ok := actual.(*ast.Term); ok {
+ equal := assert.Equal(t, expectedTerm.Foundry, actualTerm.Foundry) &&
+ assert.Equal(t, expectedTerm.Key, actualTerm.Key) &&
+ assert.Equal(t, expectedTerm.Layer, actualTerm.Layer) &&
+ assert.Equal(t, expectedTerm.Match, actualTerm.Match) &&
+ assert.Equal(t, expectedTerm.Value, actualTerm.Value)
+ if !equal {
+ t.Logf("Term mismatch:\nExpected: %+v\nActual: %+v", expectedTerm, actualTerm)
+ }
+ return equal
+ }
+ }
+
+ // For other node types or mismatched types, use regular equality comparison
+ equal := assert.Equal(t, expected, actual)
+ if !equal {
+ t.Logf("Node type mismatch:\nExpected type: %T\nActual type: %T", expected, actual)
+ }
+ return equal
+}
+
func TestParseJSON(t *testing.T) {
tests := []struct {
name string
@@ -304,7 +443,7 @@
require.NoError(t, err)
// Compare JSON objects instead of raw strings to avoid whitespace issues
- var expected, actual interface{}
+ var expected, actual any
err = json.Unmarshal([]byte(tt.expected), &expected)
require.NoError(t, err)
err = json.Unmarshal(result, &actual)
@@ -407,3 +546,258 @@
require.NoError(t, err)
assert.Equal(t, expected, actual)
}
+
+func TestParseJSONEdgeCases(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected ast.Node
+ wantErr bool
+ }{
+ {
+ name: "Unknown node type",
+ input: `{
+ "@type": "koral:unknown",
+ "customField": "value",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:unknown",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:unknown",
+ "customField": "value",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }`),
+ Wrap: &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Unknown node with operands",
+ input: `{
+ "@type": "koral:unknown",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ },
+ {
+ "@type": "koral:term",
+ "key": "NOUN"
+ }
+ ]
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:unknown",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:unknown",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ },
+ {
+ "@type": "koral:term",
+ "key": "NOUN"
+ }
+ ]
+ }`),
+ Operands: []ast.Node{
+ &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Key: "NOUN",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Deeply nested unknown nodes",
+ input: `{
+ "@type": "koral:outer",
+ "wrap": {
+ "@type": "koral:middle",
+ "wrap": {
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }
+ }
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:outer",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:outer",
+ "wrap": {
+ "@type": "koral:middle",
+ "wrap": {
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }
+ }
+ }`),
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:middle",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:middle",
+ "wrap": {
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }
+ }`),
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:inner",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }`),
+ Wrap: &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Mixed known and unknown nodes",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:custom",
+ "customField": "value",
+ "operands": [
+ {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ ]
+ }
+ }`,
+ expected: &ast.Token{
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:custom",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:custom",
+ "customField": "value",
+ "operands": [
+ {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ ]
+ }`),
+ Operands: []ast.Node{
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Invalid match type",
+ input: `{
+ "@type": "koral:term",
+ "key": "DET",
+ "match": "match:invalid"
+ }`,
+ wantErr: true,
+ },
+ {
+ name: "Invalid relation type",
+ input: `{
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ ],
+ "relation": "relation:invalid"
+ }`,
+ wantErr: true,
+ },
+ {
+ name: "Empty operands in term group",
+ input: `{
+ "@type": "koral:termGroup",
+ "operands": [],
+ "relation": "relation:and"
+ }`,
+ wantErr: true,
+ },
+ {
+ name: "Null values in term",
+ input: `{
+ "@type": "koral:term",
+ "foundry": null,
+ "key": "DET",
+ "layer": null,
+ "match": null,
+ "value": null
+ }`,
+ expected: &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := ParseJSON([]byte(tt.input))
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ compareNodes(t, tt.expected, result)
+ })
+ }
+}