Improve parser testing

commit: 56e09e7f746648165fb19a2b783fccf52da25f59 [log] [tgz]
author: Akron <nils@diewald-online.de> Thu May 22 15:38:35 2025 +0200
committer: Akron <nils@diewald-online.de> Thu May 22 15:38:35 2025 +0200
tree: 8b1e754fa6541e0428d7089eaffc4e79f4efbbc8
parent: a5d88143f945b6371fb98c8e16cbfac358113230 [diff] [blame]
diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go
index d2964ce..0a6ad27 100644
--- a/pkg/parser/parser_test.go
+++ b/pkg/parser/parser_test.go

@@ -9,6 +9,145 @@
 	"github.com/stretchr/testify/require"
 )
 
+// normalizeJSON normalizes JSON by parsing and re-marshaling it
+func normalizeJSON(t *testing.T, data json.RawMessage) json.RawMessage {
+	var v interface{}
+	err := json.Unmarshal(data, &v)
+	require.NoError(t, err)
+
+	// Convert to canonical form (sorted keys, no whitespace)
+	normalized, err := json.Marshal(v)
+	require.NoError(t, err)
+	return normalized
+}
+
+// compareJSON compares two JSON strings for equality, ignoring whitespace and field order
+func compareJSON(t *testing.T, expected, actual string) bool {
+	// Parse both JSON strings
+	var expectedObj, actualObj interface{}
+	err := json.Unmarshal([]byte(expected), &expectedObj)
+	require.NoError(t, err, "Failed to parse expected JSON")
+	err = json.Unmarshal([]byte(actual), &actualObj)
+	require.NoError(t, err, "Failed to parse actual JSON")
+
+	// Convert both to canonical form
+	expectedBytes, err := json.Marshal(expectedObj)
+	require.NoError(t, err)
+	actualBytes, err := json.Marshal(actualObj)
+	require.NoError(t, err)
+
+	// Compare the canonical forms
+	return string(expectedBytes) == string(actualBytes)
+}
+
+// compareNodes compares two AST nodes, normalizing JSON content in CatchallNodes
+func compareNodes(t *testing.T, expected, actual ast.Node) bool {
+	// If both nodes are CatchallNodes, normalize their JSON content before comparison
+	if expectedCatchall, ok := expected.(*ast.CatchallNode); ok {
+		if actualCatchall, ok := actual.(*ast.CatchallNode); ok {
+			// Compare NodeType
+			if !assert.Equal(t, expectedCatchall.NodeType, actualCatchall.NodeType) {
+				t.Logf("NodeType mismatch: expected '%s', got '%s'", expectedCatchall.NodeType, actualCatchall.NodeType)
+				return false
+			}
+
+			// Normalize and compare RawContent
+			if expectedCatchall.RawContent != nil && actualCatchall.RawContent != nil {
+				expectedNorm := normalizeJSON(t, expectedCatchall.RawContent)
+				actualNorm := normalizeJSON(t, actualCatchall.RawContent)
+				if !assert.Equal(t, string(expectedNorm), string(actualNorm)) {
+					t.Logf("RawContent mismatch:\nExpected: %s\nActual: %s", expectedNorm, actualNorm)
+					return false
+				}
+			} else if !assert.Equal(t, expectedCatchall.RawContent == nil, actualCatchall.RawContent == nil) {
+				t.Log("One node has RawContent while the other doesn't")
+				return false
+			}
+
+			// Compare Operands
+			if !assert.Equal(t, len(expectedCatchall.Operands), len(actualCatchall.Operands)) {
+				t.Logf("Operands length mismatch: expected %d, got %d", len(expectedCatchall.Operands), len(actualCatchall.Operands))
+				return false
+			}
+			for i := range expectedCatchall.Operands {
+				if !compareNodes(t, expectedCatchall.Operands[i], actualCatchall.Operands[i]) {
+					t.Logf("Operand %d mismatch", i)
+					return false
+				}
+			}
+
+			// Compare Wrap
+			if expectedCatchall.Wrap != nil || actualCatchall.Wrap != nil {
+				if !assert.Equal(t, expectedCatchall.Wrap != nil, actualCatchall.Wrap != nil) {
+					t.Log("One node has Wrap while the other doesn't")
+					return false
+				}
+				if expectedCatchall.Wrap != nil {
+					if !compareNodes(t, expectedCatchall.Wrap, actualCatchall.Wrap) {
+						t.Log("Wrap node mismatch")
+						return false
+					}
+				}
+			}
+
+			return true
+		}
+	}
+
+	// For Token nodes, compare their Wrap fields using compareNodes
+	if expectedToken, ok := expected.(*ast.Token); ok {
+		if actualToken, ok := actual.(*ast.Token); ok {
+			if expectedToken.Wrap == nil || actualToken.Wrap == nil {
+				return assert.Equal(t, expectedToken.Wrap == nil, actualToken.Wrap == nil)
+			}
+			return compareNodes(t, expectedToken.Wrap, actualToken.Wrap)
+		}
+	}
+
+	// For TermGroup nodes, compare relation and operands
+	if expectedGroup, ok := expected.(*ast.TermGroup); ok {
+		if actualGroup, ok := actual.(*ast.TermGroup); ok {
+			if !assert.Equal(t, expectedGroup.Relation, actualGroup.Relation) {
+				t.Logf("Relation mismatch: expected '%s', got '%s'", expectedGroup.Relation, actualGroup.Relation)
+				return false
+			}
+			if !assert.Equal(t, len(expectedGroup.Operands), len(actualGroup.Operands)) {
+				t.Logf("Operands length mismatch: expected %d, got %d", len(expectedGroup.Operands), len(actualGroup.Operands))
+				return false
+			}
+			for i := range expectedGroup.Operands {
+				if !compareNodes(t, expectedGroup.Operands[i], actualGroup.Operands[i]) {
+					t.Logf("Operand %d mismatch", i)
+					return false
+				}
+			}
+			return true
+		}
+	}
+
+	// For Term nodes, compare all fields
+	if expectedTerm, ok := expected.(*ast.Term); ok {
+		if actualTerm, ok := actual.(*ast.Term); ok {
+			equal := assert.Equal(t, expectedTerm.Foundry, actualTerm.Foundry) &&
+				assert.Equal(t, expectedTerm.Key, actualTerm.Key) &&
+				assert.Equal(t, expectedTerm.Layer, actualTerm.Layer) &&
+				assert.Equal(t, expectedTerm.Match, actualTerm.Match) &&
+				assert.Equal(t, expectedTerm.Value, actualTerm.Value)
+			if !equal {
+				t.Logf("Term mismatch:\nExpected: %+v\nActual: %+v", expectedTerm, actualTerm)
+			}
+			return equal
+		}
+	}
+
+	// For other node types or mismatched types, use regular equality comparison
+	equal := assert.Equal(t, expected, actual)
+	if !equal {
+		t.Logf("Node type mismatch:\nExpected type: %T\nActual type: %T", expected, actual)
+	}
+	return equal
+}
+
 func TestParseJSON(t *testing.T) {
 	tests := []struct {
 		name     string
@@ -304,7 +443,7 @@
 
 			require.NoError(t, err)
 			// Compare JSON objects instead of raw strings to avoid whitespace issues
-			var expected, actual interface{}
+			var expected, actual any
 			err = json.Unmarshal([]byte(tt.expected), &expected)
 			require.NoError(t, err)
 			err = json.Unmarshal(result, &actual)
@@ -407,3 +546,258 @@
 	require.NoError(t, err)
 	assert.Equal(t, expected, actual)
 }
+
+func TestParseJSONEdgeCases(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected ast.Node
+		wantErr  bool
+	}{
+		{
+			name: "Unknown node type",
+			input: `{
+				"@type": "koral:unknown",
+				"customField": "value",
+				"wrap": {
+					"@type": "koral:term",
+					"key": "DET"
+				}
+			}`,
+			expected: &ast.CatchallNode{
+				NodeType: "koral:unknown",
+				RawContent: json.RawMessage(`{
+					"@type": "koral:unknown",
+					"customField": "value",
+					"wrap": {
+						"@type": "koral:term",
+						"key": "DET"
+					}
+				}`),
+				Wrap: &ast.Term{
+					Key:   "DET",
+					Match: ast.MatchEqual,
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "Unknown node with operands",
+			input: `{
+				"@type": "koral:unknown",
+				"operands": [
+					{
+						"@type": "koral:term",
+						"key": "DET"
+					},
+					{
+						"@type": "koral:term",
+						"key": "NOUN"
+					}
+				]
+			}`,
+			expected: &ast.CatchallNode{
+				NodeType: "koral:unknown",
+				RawContent: json.RawMessage(`{
+					"@type": "koral:unknown",
+					"operands": [
+						{
+							"@type": "koral:term",
+							"key": "DET"
+						},
+						{
+							"@type": "koral:term",
+							"key": "NOUN"
+						}
+					]
+				}`),
+				Operands: []ast.Node{
+					&ast.Term{
+						Key:   "DET",
+						Match: ast.MatchEqual,
+					},
+					&ast.Term{
+						Key:   "NOUN",
+						Match: ast.MatchEqual,
+					},
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "Deeply nested unknown nodes",
+			input: `{
+				"@type": "koral:outer",
+				"wrap": {
+					"@type": "koral:middle",
+					"wrap": {
+						"@type": "koral:inner",
+						"wrap": {
+							"@type": "koral:term",
+							"key": "DET"
+						}
+					}
+				}
+			}`,
+			expected: &ast.CatchallNode{
+				NodeType: "koral:outer",
+				RawContent: json.RawMessage(`{
+					"@type": "koral:outer",
+					"wrap": {
+						"@type": "koral:middle",
+						"wrap": {
+							"@type": "koral:inner",
+							"wrap": {
+								"@type": "koral:term",
+								"key": "DET"
+							}
+						}
+					}
+				}`),
+				Wrap: &ast.CatchallNode{
+					NodeType: "koral:middle",
+					RawContent: json.RawMessage(`{
+						"@type": "koral:middle",
+						"wrap": {
+							"@type": "koral:inner",
+							"wrap": {
+								"@type": "koral:term",
+								"key": "DET"
+							}
+						}
+					}`),
+					Wrap: &ast.CatchallNode{
+						NodeType: "koral:inner",
+						RawContent: json.RawMessage(`{
+							"@type": "koral:inner",
+							"wrap": {
+								"@type": "koral:term",
+								"key": "DET"
+							}
+						}`),
+						Wrap: &ast.Term{
+							Key:   "DET",
+							Match: ast.MatchEqual,
+						},
+					},
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "Mixed known and unknown nodes",
+			input: `{
+				"@type": "koral:token",
+				"wrap": {
+					"@type": "koral:custom",
+					"customField": "value",
+					"operands": [
+						{
+							"@type": "koral:termGroup",
+							"operands": [
+								{
+									"@type": "koral:term",
+									"key": "DET"
+								}
+							],
+							"relation": "relation:and"
+						}
+					]
+				}
+			}`,
+			expected: &ast.Token{
+				Wrap: &ast.CatchallNode{
+					NodeType: "koral:custom",
+					RawContent: json.RawMessage(`{
+						"@type": "koral:custom",
+						"customField": "value",
+						"operands": [
+							{
+								"@type": "koral:termGroup",
+								"operands": [
+									{
+										"@type": "koral:term",
+										"key": "DET"
+									}
+								],
+								"relation": "relation:and"
+							}
+						]
+					}`),
+					Operands: []ast.Node{
+						&ast.TermGroup{
+							Operands: []ast.Node{
+								&ast.Term{
+									Key:   "DET",
+									Match: ast.MatchEqual,
+								},
+							},
+							Relation: ast.AndRelation,
+						},
+					},
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "Invalid match type",
+			input: `{
+				"@type": "koral:term",
+				"key": "DET",
+				"match": "match:invalid"
+			}`,
+			wantErr: true,
+		},
+		{
+			name: "Invalid relation type",
+			input: `{
+				"@type": "koral:termGroup",
+				"operands": [
+					{
+						"@type": "koral:term",
+						"key": "DET"
+					}
+				],
+				"relation": "relation:invalid"
+			}`,
+			wantErr: true,
+		},
+		{
+			name: "Empty operands in term group",
+			input: `{
+				"@type": "koral:termGroup",
+				"operands": [],
+				"relation": "relation:and"
+			}`,
+			wantErr: true,
+		},
+		{
+			name: "Null values in term",
+			input: `{
+				"@type": "koral:term",
+				"foundry": null,
+				"key": "DET",
+				"layer": null,
+				"match": null,
+				"value": null
+			}`,
+			expected: &ast.Term{
+				Key:   "DET",
+				Match: ast.MatchEqual,
+			},
+			wantErr: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := ParseJSON([]byte(tt.input))
+			if tt.wantErr {
+				assert.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			compareNodes(t, tt.expected, result)
+		})
+	}
+}
commit	56e09e7f746648165fb19a2b783fccf52da25f59	[log] [tgz]
author	Akron <nils@diewald-online.de>	Thu May 22 15:38:35 2025 +0200
committer	Akron <nils@diewald-online.de>	Thu May 22 15:38:35 2025 +0200
tree	8b1e754fa6541e0428d7089eaffc4e79f4efbbc8
parent	a5d88143f945b6371fb98c8e16cbfac358113230 [diff] [blame]