Add response endpoint
Change-Id: I37fb32253d3011a8960a17852ea611443b9f093e
diff --git a/README.md b/README.md
index 87c5461..1ebcbec 100644
--- a/README.md
+++ b/README.md
@@ -164,6 +164,40 @@
}
```
+### POST /:map/response
+
+Transform JSON response objects using the specified mapping list. This endpoint processes response snippets by applying term mappings to annotations within HTML snippet markup.
+
+Parameters:
+
+- `:map`: ID of the mapping list to use
+- `dir` (query): Direction of transformation (atob or `btoa`, default: `atob`)
+- `foundryA` (query): Override default foundryA from mapping list
+- `foundryB` (query): Override default foundryB from mapping list
+- `layerA` (query): Override default layerA from mapping list
+- `layerB` (query): Override default layerB from mapping list
+
+Request body: JSON object containing a `snippet` field with HTML markup
+
+Example request:
+
+```http
+POST /opennlp-mapper/response?dir=atob&foundryB=custom HTTP/1.1
+Content-Type: application/json
+
+{
+ "snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"
+}
+```
+
+Example response:
+
+```json
+{
+ "snippet": "<span title=\"marmot/m:gender:masc\"><span title=\"custom/p:M\" class=\"notinindex\"><span title=\"custom/m:M\" class=\"notinindex\">Der</span></span></span>"
+}
+```
+
### GET /
Serves the Kalamar plugin integration page. This HTML page includes:
diff --git a/cmd/termmapper/fuzz_test.go b/cmd/termmapper/fuzz_test.go
index 650a08d..e2ac02d 100644
--- a/cmd/termmapper/fuzz_test.go
+++ b/cmd/termmapper/fuzz_test.go
@@ -150,6 +150,125 @@
})
}
+func FuzzResponseTransformEndpoint(f *testing.F) {
+ // Create test mapping list
+ mappingList := tmconfig.MappingList{
+ ID: "test-mapper",
+ FoundryA: "marmot",
+ LayerA: "m",
+ FoundryB: "opennlp",
+ LayerB: "p",
+ Mappings: []tmconfig.MappingRule{
+ "[gender=masc] <> [p=M & m=M]",
+ },
+ }
+
+ // Create mapper
+ m, err := mapper.NewMapper([]tmconfig.MappingList{mappingList})
+ if err != nil {
+ f.Fatal(err)
+ }
+
+ // Create mock config for testing
+ mockConfig := &tmconfig.MappingConfig{
+ Lists: []tmconfig.MappingList{mappingList},
+ }
+
+ // Create fiber app
+ app := fiber.New(fiber.Config{
+ DisableStartupMessage: true,
+ ErrorHandler: func(c *fiber.Ctx, err error) error {
+ // For body limit errors, return 413 status code
+ if err.Error() == "body size exceeds the given limit" || errors.Is(err, fiber.ErrRequestEntityTooLarge) {
+ return c.Status(fiber.StatusRequestEntityTooLarge).JSON(fiber.Map{
+ "error": fmt.Sprintf("request body too large (max %d bytes)", maxInputLength),
+ })
+ }
+ // For other errors, return 500 status code
+ return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{
+ "error": err.Error(),
+ })
+ },
+ BodyLimit: maxInputLength,
+ })
+ setupRoutes(app, m, mockConfig)
+
+ // Add seed corpus
+ f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"snippet": "<span>test</span>"}`)) // Valid minimal input
+ f.Add("test-mapper", "btoa", "custom", "", "", "", []byte(`{"snippet": "<span title=\"test\">word</span>"}`)) // Valid with foundry override
+ f.Add("", "", "", "", "", "", []byte(`{}`)) // Empty parameters
+ f.Add("nonexistent", "invalid", "!@#$", "%^&*", "()", "[]", []byte(`invalid json`)) // Invalid everything
+ f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"snippet": null}`)) // Valid JSON with null snippet
+ f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"snippet": 123}`)) // Valid JSON with non-string snippet
+ f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"}`)) // Valid response snippet
+
+ f.Fuzz(func(t *testing.T, mapID, dir, foundryA, foundryB, layerA, layerB string, body []byte) {
+
+ // Validate input first
+ if err := validateInput(mapID, dir, foundryA, foundryB, layerA, layerB, body); err != nil {
+ // Skip this test case as it's invalid
+ t.Skip(err)
+ }
+
+ // Build URL with query parameters
+ params := url.Values{}
+ if dir != "" {
+ params.Set("dir", dir)
+ }
+ if foundryA != "" {
+ params.Set("foundryA", foundryA)
+ }
+ if foundryB != "" {
+ params.Set("foundryB", foundryB)
+ }
+ if layerA != "" {
+ params.Set("layerA", layerA)
+ }
+ if layerB != "" {
+ params.Set("layerB", layerB)
+ }
+
+ url := fmt.Sprintf("/%s/response", url.PathEscape(mapID))
+ if len(params) > 0 {
+ url += "?" + params.Encode()
+ }
+
+ // Make request
+ req := httptest.NewRequest(http.MethodPost, url, bytes.NewReader(body))
+ req.Header.Set("Content-Type", "application/json")
+ resp, err := app.Test(req)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer resp.Body.Close()
+
+ // Verify that we always get a valid response
+ if resp.StatusCode != http.StatusOK &&
+ resp.StatusCode != http.StatusBadRequest &&
+ resp.StatusCode != http.StatusInternalServerError {
+ t.Errorf("unexpected status code: %d", resp.StatusCode)
+ }
+
+ // Verify that the response is valid JSON
+ var result any
+ if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+ t.Errorf("invalid JSON response: %v", err)
+ }
+
+ // For error responses, verify that we have an error message
+ if resp.StatusCode != http.StatusOK {
+ // For error responses, we expect a JSON object with an error field
+ if resultMap, ok := result.(map[string]any); ok {
+ if errMsg, ok := resultMap["error"].(string); !ok || errMsg == "" {
+ t.Error("error response missing error message")
+ }
+ } else {
+ t.Error("error response should be a JSON object")
+ }
+ }
+ })
+}
+
func TestLargeInput(t *testing.T) {
// Create test mapping list
mappingList := tmconfig.MappingList{
diff --git a/cmd/termmapper/main.go b/cmd/termmapper/main.go
index 4c868b5..006a009 100644
--- a/cmd/termmapper/main.go
+++ b/cmd/termmapper/main.go
@@ -163,6 +163,9 @@
// Transformation endpoint
app.Post("/:map/query", handleTransform(m))
+ // Response transformation endpoint
+ app.Post("/:map/response", handleResponseTransform(m))
+
// Kalamar plugin endpoint
app.Get("/", handleKalamarPlugin(yamlConfig))
app.Get("/:map", handleKalamarPlugin(yamlConfig))
@@ -232,6 +235,70 @@
}
}
+func handleResponseTransform(m *mapper.Mapper) fiber.Handler {
+ return func(c *fiber.Ctx) error {
+ // Get parameters
+ mapID := c.Params("map")
+ dir := c.Query("dir", "atob")
+ foundryA := c.Query("foundryA", "")
+ foundryB := c.Query("foundryB", "")
+ layerA := c.Query("layerA", "")
+ layerB := c.Query("layerB", "")
+
+ // Validate input parameters
+ if err := validateInput(mapID, dir, foundryA, foundryB, layerA, layerB, c.Body()); err != nil {
+ return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
+ "error": err.Error(),
+ })
+ }
+
+ // Validate direction
+ if dir != "atob" && dir != "btoa" {
+ return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
+ "error": "invalid direction, must be 'atob' or 'btoa'",
+ })
+ }
+
+ // Parse request body
+ var jsonData any
+ if err := c.BodyParser(&jsonData); err != nil {
+ return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
+ "error": "invalid JSON in request body",
+ })
+ }
+
+ // Parse direction
+ direction, err := mapper.ParseDirection(dir)
+ if err != nil {
+ return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
+ "error": err.Error(),
+ })
+ }
+
+ // Apply response mappings
+ result, err := m.ApplyResponseMappings(mapID, mapper.MappingOptions{
+ Direction: direction,
+ FoundryA: foundryA,
+ FoundryB: foundryB,
+ LayerA: layerA,
+ LayerB: layerB,
+ }, jsonData)
+
+ if err != nil {
+ log.Error().Err(err).
+ Str("mapID", mapID).
+ Str("direction", dir).
+ Msg("Failed to apply response mappings")
+
+ return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{
+ "error": err.Error(),
+ })
+ }
+
+ return c.JSON(result)
+ }
+}
+
// validateInput checks if the input parameters are valid
func validateInput(mapID, dir, foundryA, foundryB, layerA, layerB string, body []byte) error {
// Define parameter checks
@@ -337,6 +404,9 @@
<dt><tt><strong>POST</strong> /:map/query</tt></dt>
<dd><small>Transform JSON query objects using term mapping rules</small></dd>
+
+ <dt><tt><strong>POST</strong> /:map/response</tt></dt>
+ <dd><small>Transform JSON response objects using term mapping rules</small></dd>
</dl>
diff --git a/cmd/termmapper/main_test.go b/cmd/termmapper/main_test.go
index b248deb..8030b37 100644
--- a/cmd/termmapper/main_test.go
+++ b/cmd/termmapper/main_test.go
@@ -258,6 +258,191 @@
}
}
+func TestResponseTransformEndpoint(t *testing.T) {
+ // Create test mapping list
+ mappingList := tmconfig.MappingList{
+ ID: "test-response-mapper",
+ FoundryA: "marmot",
+ LayerA: "m",
+ FoundryB: "opennlp",
+ LayerB: "p",
+ Mappings: []tmconfig.MappingRule{
+ "[gender=masc] <> [p=M & m=M]",
+ },
+ }
+
+ // Create mapper
+ m, err := mapper.NewMapper([]tmconfig.MappingList{mappingList})
+ require.NoError(t, err)
+
+ // Create mock config for testing
+ mockConfig := &tmconfig.MappingConfig{
+ Lists: []tmconfig.MappingList{mappingList},
+ }
+
+ // Create fiber app
+ app := fiber.New()
+ setupRoutes(app, m, mockConfig)
+
+ tests := []struct {
+ name string
+ mapID string
+ direction string
+ foundryA string
+ foundryB string
+ layerA string
+ layerB string
+ input string
+ expectedCode int
+ expectedBody string
+ expectedError string
+ }{
+ {
+ name: "Simple response mapping with snippet transformation",
+ mapID: "test-response-mapper",
+ direction: "atob",
+ input: `{
+ "snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"
+ }`,
+ expectedCode: http.StatusOK,
+ expectedBody: `{
+ "snippet": "<span title=\"marmot/m:gender:masc\"><span title=\"opennlp/p:M\" class=\"notinindex\"><span title=\"opennlp/m:M\" class=\"notinindex\">Der</span></span></span>"
+ }`,
+ },
+ {
+ name: "Response with no snippet field",
+ mapID: "test-response-mapper",
+ direction: "atob",
+ input: `{
+ "@type": "koral:response",
+ "meta": {
+ "version": "Krill-0.64.1"
+ }
+ }`,
+ expectedCode: http.StatusOK,
+ expectedBody: `{
+ "@type": "koral:response",
+ "meta": {
+ "version": "Krill-0.64.1"
+ }
+ }`,
+ },
+ {
+ name: "Response with null snippet",
+ mapID: "test-response-mapper",
+ direction: "atob",
+ input: `{
+ "snippet": null
+ }`,
+ expectedCode: http.StatusOK,
+ expectedBody: `{
+ "snippet": null
+ }`,
+ },
+ {
+ name: "Response with non-string snippet",
+ mapID: "test-response-mapper",
+ direction: "atob",
+ input: `{
+ "snippet": 123
+ }`,
+ expectedCode: http.StatusOK,
+ expectedBody: `{
+ "snippet": 123
+ }`,
+ },
+ {
+ name: "Response mapping with foundry override",
+ mapID: "test-response-mapper",
+ direction: "atob",
+ foundryB: "custom",
+ input: `{
+ "snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"
+ }`,
+ expectedCode: http.StatusOK,
+ expectedBody: `{
+ "snippet": "<span title=\"marmot/m:gender:masc\"><span title=\"custom/p:M\" class=\"notinindex\"><span title=\"custom/m:M\" class=\"notinindex\">Der</span></span></span>"
+ }`,
+ },
+ {
+ name: "Invalid mapping ID for response",
+ mapID: "nonexistent",
+ direction: "atob",
+ input: `{"snippet": "<span>test</span>"}`,
+ expectedCode: http.StatusInternalServerError,
+ expectedError: "mapping list with ID nonexistent not found",
+ },
+ {
+ name: "Invalid direction for response",
+ mapID: "test-response-mapper",
+ direction: "invalid",
+ input: `{"snippet": "<span>test</span>"}`,
+ expectedCode: http.StatusBadRequest,
+ expectedError: "invalid direction, must be 'atob' or 'btoa'",
+ },
+ {
+ name: "Invalid JSON for response",
+ mapID: "test-response-mapper",
+ direction: "atob",
+ input: `{invalid json}`,
+ expectedCode: http.StatusBadRequest,
+ expectedError: "invalid JSON in request body",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Build URL with query parameters
+ url := "/" + tt.mapID + "/response"
+ if tt.direction != "" {
+ url += "?dir=" + tt.direction
+ }
+ if tt.foundryA != "" {
+ url += "&foundryA=" + tt.foundryA
+ }
+ if tt.foundryB != "" {
+ url += "&foundryB=" + tt.foundryB
+ }
+ if tt.layerA != "" {
+ url += "&layerA=" + tt.layerA
+ }
+ if tt.layerB != "" {
+ url += "&layerB=" + tt.layerB
+ }
+
+ // Make request
+ req := httptest.NewRequest(http.MethodPost, url, bytes.NewBufferString(tt.input))
+ req.Header.Set("Content-Type", "application/json")
+ resp, err := app.Test(req)
+ require.NoError(t, err)
+ defer resp.Body.Close()
+
+ // Check status code
+ assert.Equal(t, tt.expectedCode, resp.StatusCode)
+
+ // Read response body
+ body, err := io.ReadAll(resp.Body)
+ require.NoError(t, err)
+
+ if tt.expectedError != "" {
+ // Check error message
+ var errResp fiber.Map
+ err = json.Unmarshal(body, &errResp)
+ require.NoError(t, err)
+ assert.Equal(t, tt.expectedError, errResp["error"])
+ } else {
+ // Compare JSON responses
+ var expected, actual any
+ err = json.Unmarshal([]byte(tt.expectedBody), &expected)
+ require.NoError(t, err)
+ err = json.Unmarshal(body, &actual)
+ require.NoError(t, err)
+ assert.Equal(t, expected, actual)
+ }
+ })
+ }
+}
+
func TestHealthEndpoint(t *testing.T) {
// Create test mapping list
mappingList := tmconfig.MappingList{
diff --git a/mapper/mapper.go b/mapper/mapper.go
index 0bbe39f..fe46a83 100644
--- a/mapper/mapper.go
+++ b/mapper/mapper.go
@@ -1,12 +1,9 @@
package mapper
import (
- "encoding/json"
"fmt"
- "github.com/KorAP/KoralPipe-TermMapper/ast"
"github.com/KorAP/KoralPipe-TermMapper/config"
- "github.com/KorAP/KoralPipe-TermMapper/matcher"
"github.com/KorAP/KoralPipe-TermMapper/parser"
)
@@ -81,407 +78,3 @@
Direction Direction
AddRewrites bool
}
-
-// ApplyQueryMappings applies the specified mapping rules to a JSON object
-func (m *Mapper) ApplyQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
- // Validate mapping ID
- if _, exists := m.mappingLists[mappingID]; !exists {
- return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
- }
-
- // Get the parsed rules
- rules := m.parsedRules[mappingID]
-
- // Check if we have a wrapper object with a "query" field
- var queryData any
- var hasQueryWrapper bool
-
- if jsonMap, ok := jsonData.(map[string]any); ok {
- if query, exists := jsonMap["query"]; exists {
- queryData = query
- hasQueryWrapper = true
- }
- }
-
- // If no query wrapper was found, use the entire input
- if !hasQueryWrapper {
- // If the input itself is not a valid query object, return it as is
- if !isValidQueryObject(jsonData) {
- return jsonData, nil
- }
- queryData = jsonData
- } else if queryData == nil || !isValidQueryObject(queryData) {
- // If we have a query wrapper but the query is nil or not a valid object,
- // return the original data
- return jsonData, nil
- }
-
- // Store rewrites if they exist
- var oldRewrites any
- if queryMap, ok := queryData.(map[string]any); ok {
- if rewrites, exists := queryMap["rewrites"]; exists {
- oldRewrites = rewrites
- delete(queryMap, "rewrites")
- }
- }
-
- // Convert input JSON to AST
- jsonBytes, err := json.Marshal(queryData)
- if err != nil {
- return nil, fmt.Errorf("failed to marshal input JSON: %w", err)
- }
-
- node, err := parser.ParseJSON(jsonBytes)
- if err != nil {
- return nil, fmt.Errorf("failed to parse JSON into AST: %w", err)
- }
-
- // Store whether the input was a Token
- isToken := false
- var tokenWrap ast.Node
- if token, ok := node.(*ast.Token); ok {
- isToken = true
- tokenWrap = token.Wrap
- node = tokenWrap
- }
-
- // Store original node for rewrite if needed
- var originalNode ast.Node
- if opts.AddRewrites {
- originalNode = node.Clone()
- }
-
- // Pre-check foundry/layer overrides to optimize processing
- var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
- if opts.Direction { // true means AtoB
- patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
- replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
- } else {
- patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
- replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
- }
-
- // Create a pattern cache key for memoization
- type patternCacheKey struct {
- ruleIndex int
- foundry string
- layer string
- isReplacement bool
- }
- patternCache := make(map[patternCacheKey]ast.Node)
-
- // Apply each rule to the AST
- for i, rule := range rules {
- // Create pattern and replacement based on direction
- var pattern, replacement ast.Node
- if opts.Direction { // true means AtoB
- pattern = rule.Upper
- replacement = rule.Lower
- } else {
- pattern = rule.Lower
- replacement = rule.Upper
- }
-
- // Extract the inner nodes from the pattern and replacement tokens
- if token, ok := pattern.(*ast.Token); ok {
- pattern = token.Wrap
- }
- if token, ok := replacement.(*ast.Token); ok {
- replacement = token.Wrap
- }
-
- // First, quickly check if the pattern could match without creating a full matcher
- // This is a lightweight pre-check to avoid expensive operations
- if !m.couldPatternMatch(node, pattern) {
- continue
- }
-
- // Get or create pattern with overrides
- patternKey := patternCacheKey{ruleIndex: i, foundry: patternFoundry, layer: patternLayer, isReplacement: false}
- processedPattern, exists := patternCache[patternKey]
- if !exists {
- // Clone pattern only when needed
- processedPattern = pattern.Clone()
- // Apply foundry and layer overrides only if they're non-empty
- if patternFoundry != "" || patternLayer != "" {
- ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
- }
- patternCache[patternKey] = processedPattern
- }
-
- // Create a temporary matcher to check for actual matches
- tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
- if err != nil {
- return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
- }
-
- // Only proceed if there's an actual match
- if !tempMatcher.Match(node) {
- continue
- }
-
- // Get or create replacement with overrides (lazy evaluation)
- replacementKey := patternCacheKey{ruleIndex: i, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
- processedReplacement, exists := patternCache[replacementKey]
- if !exists {
- // Clone replacement only when we have a match
- processedReplacement = replacement.Clone()
- // Apply foundry and layer overrides only if they're non-empty
- if replacementFoundry != "" || replacementLayer != "" {
- ast.ApplyFoundryAndLayerOverrides(processedReplacement, replacementFoundry, replacementLayer)
- }
- patternCache[replacementKey] = processedReplacement
- }
-
- // Create the actual matcher and apply replacement
- actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
- if err != nil {
- return nil, fmt.Errorf("failed to create matcher: %w", err)
- }
- node = actualMatcher.Replace(node)
- }
-
- // Wrap the result in a token if the input was a token
- var result ast.Node
- if isToken {
- result = &ast.Token{Wrap: node}
- } else {
- result = node
- }
-
- // Convert AST back to JSON
- resultBytes, err := parser.SerializeToJSON(result)
- if err != nil {
- return nil, fmt.Errorf("failed to serialize AST to JSON: %w", err)
- }
-
- // Parse the JSON string back into
- var resultData any
- if err := json.Unmarshal(resultBytes, &resultData); err != nil {
- return nil, fmt.Errorf("failed to parse result JSON: %w", err)
- }
-
- // Add rewrites if enabled and node was changed
- if opts.AddRewrites && !ast.NodesEqual(node, originalNode) {
- // Create rewrite object
- rewrite := map[string]any{
- "@type": "koral:rewrite",
- "editor": "termMapper",
- }
-
- // Check if the node types are different (structural change)
- if originalNode.Type() != node.Type() {
- // Full node replacement
- originalBytes, err := parser.SerializeToJSON(originalNode)
- if err != nil {
- return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
- }
- var originalJSON any
- if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
- return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
- }
- rewrite["original"] = originalJSON
- } else if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(node) {
- // Check which attributes changed
- newTerm := node.(*ast.Term)
- if term.Foundry != newTerm.Foundry {
- rewrite["scope"] = "foundry"
- rewrite["original"] = term.Foundry
- } else if term.Layer != newTerm.Layer {
- rewrite["scope"] = "layer"
- rewrite["original"] = term.Layer
- } else if term.Key != newTerm.Key {
- rewrite["scope"] = "key"
- rewrite["original"] = term.Key
- } else if term.Value != newTerm.Value {
- rewrite["scope"] = "value"
- rewrite["original"] = term.Value
- } else {
- // No specific attribute changed, use full node replacement
- originalBytes, err := parser.SerializeToJSON(originalNode)
- if err != nil {
- return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
- }
- var originalJSON any
- if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
- return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
- }
- rewrite["original"] = originalJSON
- }
- } else {
- // Full node replacement
- originalBytes, err := parser.SerializeToJSON(originalNode)
- if err != nil {
- return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
- }
- var originalJSON any
- if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
- return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
- }
- rewrite["original"] = originalJSON
- }
-
- // Add rewrite to the node
- if resultMap, ok := resultData.(map[string]any); ok {
- if wrapMap, ok := resultMap["wrap"].(map[string]any); ok {
- rewrites, exists := wrapMap["rewrites"]
- if !exists {
- rewrites = []any{}
- }
- if rewritesList, ok := rewrites.([]any); ok {
- wrapMap["rewrites"] = append(rewritesList, rewrite)
- } else {
- wrapMap["rewrites"] = []any{rewrite}
- }
- }
- }
- }
-
- // Restore rewrites if they existed
- if oldRewrites != nil {
- // Process old rewrites through AST to ensure backward compatibility
- if rewritesList, ok := oldRewrites.([]any); ok {
- processedRewrites := make([]any, len(rewritesList))
- for i, rewriteData := range rewritesList {
- // Marshal and unmarshal each rewrite to apply backward compatibility
- rewriteBytes, err := json.Marshal(rewriteData)
- if err != nil {
- return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
- }
- var rewrite ast.Rewrite
- if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
- return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
- }
- // Marshal back to get the transformed version
- transformedBytes, err := json.Marshal(&rewrite)
- if err != nil {
- return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
- }
- var transformedRewrite any
- if err := json.Unmarshal(transformedBytes, &transformedRewrite); err != nil {
- return nil, fmt.Errorf("failed to unmarshal transformed rewrite %d: %w", i, err)
- }
- processedRewrites[i] = transformedRewrite
- }
- if resultMap, ok := resultData.(map[string]any); ok {
- resultMap["rewrites"] = processedRewrites
- }
- } else {
- // If it's not a list, restore as-is
- if resultMap, ok := resultData.(map[string]any); ok {
- resultMap["rewrites"] = oldRewrites
- }
- }
- }
-
- // If we had a query wrapper, put the transformed data back in it
- if hasQueryWrapper {
- if wrapper, ok := jsonData.(map[string]any); ok {
- wrapper["query"] = resultData
- return wrapper, nil
- }
- }
-
- return resultData, nil
-}
-
-// isValidQueryObject checks if the query data is a valid object that can be processed
-func isValidQueryObject(data any) bool {
- // Check if it's a map
- queryMap, ok := data.(map[string]any)
- if !ok {
- return false
- }
-
- // Check if it has the required @type field
- if _, ok := queryMap["@type"]; !ok {
- return false
- }
-
- return true
-}
-
-// couldPatternMatch performs a lightweight check to see if a pattern could potentially match a node
-// This is an optimization to avoid expensive operations when there's clearly no match possible
-func (m *Mapper) couldPatternMatch(node, pattern ast.Node) bool {
- if pattern == nil {
- return true
- }
- if node == nil {
- return false
- }
-
- // Handle Token wrappers
- if token, ok := pattern.(*ast.Token); ok {
- pattern = token.Wrap
- }
- if token, ok := node.(*ast.Token); ok {
- node = token.Wrap
- }
-
- // For simple terms, check basic compatibility
- if patternTerm, ok := pattern.(*ast.Term); ok {
- // Check if there's any term in the node structure that could match
- return m.hasMatchingTerm(node, patternTerm)
- }
-
- // For TermGroups, we need to check all possible matches
- if patternGroup, ok := pattern.(*ast.TermGroup); ok {
- if patternGroup.Relation == ast.OrRelation {
- // For OR relations, any operand could match
- for _, op := range patternGroup.Operands {
- if m.couldPatternMatch(node, op) {
- return true
- }
- }
- return false
- } else {
- // For AND relations, all operands must have potential matches
- for _, op := range patternGroup.Operands {
- if !m.couldPatternMatch(node, op) {
- return false
- }
- }
- return true
- }
- }
-
- // For other cases, assume they could match (conservative approach)
- return true
-}
-
-// hasMatchingTerm checks if there's any term in the node structure that could match the pattern term
-func (m *Mapper) hasMatchingTerm(node ast.Node, patternTerm *ast.Term) bool {
- if node == nil {
- return false
- }
-
- switch n := node.(type) {
- case *ast.Term:
- // Check if this term could match the pattern
- // We only check key as that's the most distinctive attribute
- return n.Key == patternTerm.Key
- case *ast.TermGroup:
- // Check all operands
- for _, op := range n.Operands {
- if m.hasMatchingTerm(op, patternTerm) {
- return true
- }
- }
- return false
- case *ast.Token:
- return m.hasMatchingTerm(n.Wrap, patternTerm)
- case *ast.CatchallNode:
- if n.Wrap != nil && m.hasMatchingTerm(n.Wrap, patternTerm) {
- return true
- }
- for _, op := range n.Operands {
- if m.hasMatchingTerm(op, patternTerm) {
- return true
- }
- }
- return false
- default:
- return false
- }
-}
diff --git a/mapper/query.go b/mapper/query.go
new file mode 100644
index 0000000..81fddba
--- /dev/null
+++ b/mapper/query.go
@@ -0,0 +1,414 @@
+package mapper // ApplyQueryMappings applies the specified mapping rules to a JSON object
+
+import (
+ "encoding/json"
+ "fmt"
+
+ "github.com/KorAP/KoralPipe-TermMapper/ast"
+ "github.com/KorAP/KoralPipe-TermMapper/matcher"
+ "github.com/KorAP/KoralPipe-TermMapper/parser"
+)
+
+// ApplyQueryMappings applies the specified mapping rules to a JSON object
+func (m *Mapper) ApplyQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
+ // Validate mapping ID
+ if _, exists := m.mappingLists[mappingID]; !exists {
+ return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
+ }
+
+ // Get the parsed rules
+ rules := m.parsedRules[mappingID]
+
+ // Check if we have a wrapper object with a "query" field
+ var queryData any
+ var hasQueryWrapper bool
+
+ if jsonMap, ok := jsonData.(map[string]any); ok {
+ if query, exists := jsonMap["query"]; exists {
+ queryData = query
+ hasQueryWrapper = true
+ }
+ }
+
+ // If no query wrapper was found, use the entire input
+ if !hasQueryWrapper {
+ // If the input itself is not a valid query object, return it as is
+ if !isValidQueryObject(jsonData) {
+ return jsonData, nil
+ }
+ queryData = jsonData
+ } else if queryData == nil || !isValidQueryObject(queryData) {
+ // If we have a query wrapper but the query is nil or not a valid object,
+ // return the original data
+ return jsonData, nil
+ }
+
+ // Store rewrites if they exist
+ var oldRewrites any
+ if queryMap, ok := queryData.(map[string]any); ok {
+ if rewrites, exists := queryMap["rewrites"]; exists {
+ oldRewrites = rewrites
+ delete(queryMap, "rewrites")
+ }
+ }
+
+ // Convert input JSON to AST
+ jsonBytes, err := json.Marshal(queryData)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal input JSON: %w", err)
+ }
+
+ node, err := parser.ParseJSON(jsonBytes)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse JSON into AST: %w", err)
+ }
+
+ // Store whether the input was a Token
+ isToken := false
+ var tokenWrap ast.Node
+ if token, ok := node.(*ast.Token); ok {
+ isToken = true
+ tokenWrap = token.Wrap
+ node = tokenWrap
+ }
+
+ // Store original node for rewrite if needed
+ var originalNode ast.Node
+ if opts.AddRewrites {
+ originalNode = node.Clone()
+ }
+
+ // Pre-check foundry/layer overrides to optimize processing
+ var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
+ if opts.Direction { // true means AtoB
+ patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
+ replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
+ } else {
+ patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
+ replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
+ }
+
+ // Create a pattern cache key for memoization
+ type patternCacheKey struct {
+ ruleIndex int
+ foundry string
+ layer string
+ isReplacement bool
+ }
+ patternCache := make(map[patternCacheKey]ast.Node)
+
+ // Apply each rule to the AST
+ for i, rule := range rules {
+ // Create pattern and replacement based on direction
+ var pattern, replacement ast.Node
+ if opts.Direction { // true means AtoB
+ pattern = rule.Upper
+ replacement = rule.Lower
+ } else {
+ pattern = rule.Lower
+ replacement = rule.Upper
+ }
+
+ // Extract the inner nodes from the pattern and replacement tokens
+ if token, ok := pattern.(*ast.Token); ok {
+ pattern = token.Wrap
+ }
+ if token, ok := replacement.(*ast.Token); ok {
+ replacement = token.Wrap
+ }
+
+ // First, quickly check if the pattern could match without creating a full matcher
+ // This is a lightweight pre-check to avoid expensive operations
+ if !m.couldPatternMatch(node, pattern) {
+ continue
+ }
+
+ // Get or create pattern with overrides
+ patternKey := patternCacheKey{ruleIndex: i, foundry: patternFoundry, layer: patternLayer, isReplacement: false}
+ processedPattern, exists := patternCache[patternKey]
+ if !exists {
+ // Clone pattern only when needed
+ processedPattern = pattern.Clone()
+ // Apply foundry and layer overrides only if they're non-empty
+ if patternFoundry != "" || patternLayer != "" {
+ ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
+ }
+ patternCache[patternKey] = processedPattern
+ }
+
+ // Create a temporary matcher to check for actual matches
+ tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
+ if err != nil {
+ return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
+ }
+
+ // Only proceed if there's an actual match
+ if !tempMatcher.Match(node) {
+ continue
+ }
+
+ // Get or create replacement with overrides (lazy evaluation)
+ replacementKey := patternCacheKey{ruleIndex: i, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
+ processedReplacement, exists := patternCache[replacementKey]
+ if !exists {
+ // Clone replacement only when we have a match
+ processedReplacement = replacement.Clone()
+ // Apply foundry and layer overrides only if they're non-empty
+ if replacementFoundry != "" || replacementLayer != "" {
+ ast.ApplyFoundryAndLayerOverrides(processedReplacement, replacementFoundry, replacementLayer)
+ }
+ patternCache[replacementKey] = processedReplacement
+ }
+
+ // Create the actual matcher and apply replacement
+ actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
+ if err != nil {
+ return nil, fmt.Errorf("failed to create matcher: %w", err)
+ }
+ node = actualMatcher.Replace(node)
+ }
+
+ // Wrap the result in a token if the input was a token
+ var result ast.Node
+ if isToken {
+ result = &ast.Token{Wrap: node}
+ } else {
+ result = node
+ }
+
+ // Convert AST back to JSON
+ resultBytes, err := parser.SerializeToJSON(result)
+ if err != nil {
+ return nil, fmt.Errorf("failed to serialize AST to JSON: %w", err)
+ }
+
+ // Parse the JSON string back into
+ var resultData any
+ if err := json.Unmarshal(resultBytes, &resultData); err != nil {
+ return nil, fmt.Errorf("failed to parse result JSON: %w", err)
+ }
+
+ // Add rewrites if enabled and node was changed
+ if opts.AddRewrites && !ast.NodesEqual(node, originalNode) {
+ // Create rewrite object
+ rewrite := map[string]any{
+ "@type": "koral:rewrite",
+ "editor": "termMapper",
+ }
+
+ // Check if the node types are different (structural change)
+ if originalNode.Type() != node.Type() {
+ // Full node replacement
+ originalBytes, err := parser.SerializeToJSON(originalNode)
+ if err != nil {
+ return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
+ }
+ var originalJSON any
+ if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
+ return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
+ }
+ rewrite["original"] = originalJSON
+ } else if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(node) {
+ // Check which attributes changed
+ newTerm := node.(*ast.Term)
+ if term.Foundry != newTerm.Foundry {
+ rewrite["scope"] = "foundry"
+ rewrite["original"] = term.Foundry
+ } else if term.Layer != newTerm.Layer {
+ rewrite["scope"] = "layer"
+ rewrite["original"] = term.Layer
+ } else if term.Key != newTerm.Key {
+ rewrite["scope"] = "key"
+ rewrite["original"] = term.Key
+ } else if term.Value != newTerm.Value {
+ rewrite["scope"] = "value"
+ rewrite["original"] = term.Value
+ } else {
+ // No specific attribute changed, use full node replacement
+ originalBytes, err := parser.SerializeToJSON(originalNode)
+ if err != nil {
+ return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
+ }
+ var originalJSON any
+ if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
+ return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
+ }
+ rewrite["original"] = originalJSON
+ }
+ } else {
+ // Full node replacement
+ originalBytes, err := parser.SerializeToJSON(originalNode)
+ if err != nil {
+ return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
+ }
+ var originalJSON any
+ if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
+ return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
+ }
+ rewrite["original"] = originalJSON
+ }
+
+ // Add rewrite to the node
+ if resultMap, ok := resultData.(map[string]any); ok {
+ if wrapMap, ok := resultMap["wrap"].(map[string]any); ok {
+ rewrites, exists := wrapMap["rewrites"]
+ if !exists {
+ rewrites = []any{}
+ }
+ if rewritesList, ok := rewrites.([]any); ok {
+ wrapMap["rewrites"] = append(rewritesList, rewrite)
+ } else {
+ wrapMap["rewrites"] = []any{rewrite}
+ }
+ }
+ }
+ }
+
+ // Restore rewrites if they existed
+ if oldRewrites != nil {
+ // Process old rewrites through AST to ensure backward compatibility
+ if rewritesList, ok := oldRewrites.([]any); ok {
+ processedRewrites := make([]any, len(rewritesList))
+ for i, rewriteData := range rewritesList {
+ // Marshal and unmarshal each rewrite to apply backward compatibility
+ rewriteBytes, err := json.Marshal(rewriteData)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
+ }
+ var rewrite ast.Rewrite
+ if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
+ return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
+ }
+ // Marshal back to get the transformed version
+ transformedBytes, err := json.Marshal(&rewrite)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
+ }
+ var transformedRewrite any
+ if err := json.Unmarshal(transformedBytes, &transformedRewrite); err != nil {
+ return nil, fmt.Errorf("failed to unmarshal transformed rewrite %d: %w", i, err)
+ }
+ processedRewrites[i] = transformedRewrite
+ }
+ if resultMap, ok := resultData.(map[string]any); ok {
+ resultMap["rewrites"] = processedRewrites
+ }
+ } else {
+ // If it's not a list, restore as-is
+ if resultMap, ok := resultData.(map[string]any); ok {
+ resultMap["rewrites"] = oldRewrites
+ }
+ }
+ }
+
+ // If we had a query wrapper, put the transformed data back in it
+ if hasQueryWrapper {
+ if wrapper, ok := jsonData.(map[string]any); ok {
+ wrapper["query"] = resultData
+ return wrapper, nil
+ }
+ }
+
+ return resultData, nil
+}
+
+// isValidQueryObject checks if the query data is a valid object that can be processed
+func isValidQueryObject(data any) bool {
+ // Check if it's a map
+ queryMap, ok := data.(map[string]any)
+ if !ok {
+ return false
+ }
+
+ // Check if it has the required @type field
+ if _, ok := queryMap["@type"]; !ok {
+ return false
+ }
+
+ return true
+}
+
+// couldPatternMatch performs a lightweight check to see if a pattern could potentially match a node
+// This is an optimization to avoid expensive operations when there's clearly no match possible
+func (m *Mapper) couldPatternMatch(node, pattern ast.Node) bool {
+ if pattern == nil {
+ return true
+ }
+ if node == nil {
+ return false
+ }
+
+ // Handle Token wrappers
+ if token, ok := pattern.(*ast.Token); ok {
+ pattern = token.Wrap
+ }
+ if token, ok := node.(*ast.Token); ok {
+ node = token.Wrap
+ }
+
+ // For simple terms, check basic compatibility
+ if patternTerm, ok := pattern.(*ast.Term); ok {
+ // Check if there's any term in the node structure that could match
+ return m.hasMatchingTerm(node, patternTerm)
+ }
+
+ // For TermGroups, we need to check all possible matches
+ if patternGroup, ok := pattern.(*ast.TermGroup); ok {
+ if patternGroup.Relation == ast.OrRelation {
+ // For OR relations, any operand could match
+ for _, op := range patternGroup.Operands {
+ if m.couldPatternMatch(node, op) {
+ return true
+ }
+ }
+ return false
+ } else {
+ // For AND relations, all operands must have potential matches
+ for _, op := range patternGroup.Operands {
+ if !m.couldPatternMatch(node, op) {
+ return false
+ }
+ }
+ return true
+ }
+ }
+
+ // For other cases, assume they could match (conservative approach)
+ return true
+}
+
+// hasMatchingTerm checks if there's any term in the node structure that could match the pattern term
+func (m *Mapper) hasMatchingTerm(node ast.Node, patternTerm *ast.Term) bool {
+ if node == nil {
+ return false
+ }
+
+ switch n := node.(type) {
+ case *ast.Term:
+ // Check if this term could match the pattern
+ // We only check key as that's the most distinctive attribute
+ return n.Key == patternTerm.Key
+ case *ast.TermGroup:
+ // Check all operands
+ for _, op := range n.Operands {
+ if m.hasMatchingTerm(op, patternTerm) {
+ return true
+ }
+ }
+ return false
+ case *ast.Token:
+ return m.hasMatchingTerm(n.Wrap, patternTerm)
+ case *ast.CatchallNode:
+ if n.Wrap != nil && m.hasMatchingTerm(n.Wrap, patternTerm) {
+ return true
+ }
+ for _, op := range n.Operands {
+ if m.hasMatchingTerm(op, patternTerm) {
+ return true
+ }
+ }
+ return false
+ default:
+ return false
+ }
+}
diff --git a/mapper/response.go b/mapper/response.go
index 99586f6..8fe1fc1 100644
--- a/mapper/response.go
+++ b/mapper/response.go
@@ -66,13 +66,18 @@
}
// If foundry/layer are empty in options, get them from the mapping list
- if replacementFoundry == "" || replacementLayer == "" {
- mappingList := m.mappingLists[mappingID]
+ mappingList := m.mappingLists[mappingID]
+ if replacementFoundry == "" {
if opts.Direction { // AtoB
replacementFoundry = mappingList.FoundryB
- replacementLayer = mappingList.LayerB
} else {
replacementFoundry = mappingList.FoundryA
+ }
+ }
+ if replacementLayer == "" {
+ if opts.Direction { // AtoB
+ replacementLayer = mappingList.LayerB
+ } else {
replacementLayer = mappingList.LayerA
}
}
diff --git a/mapper/response_test.go b/mapper/response_test.go
index b7ffbdc..67674d7 100644
--- a/mapper/response_test.go
+++ b/mapper/response_test.go
@@ -927,7 +927,8 @@
func TestResponseMappingNestedSpans(t *testing.T) {
// Snippet with deeply nested spans
responseSnippet := `{
- "snippet": "<span title=\"level1/l:outer\"><span title=\"level2/l:middle\"><span title=\"marmot/p:DET\">der</span></span></span>"
+ "snippet": "<span title=\"level1/l:outer\"><span title=\"level2/l:middle\"><span title=\"marmot/p:DET\">der</span></span></span>",
+ "author": "John Doe"
}`
// Create test mapping list
@@ -965,4 +966,7 @@
assert.Contains(t, snippet, `title="level1/l:outer"`)
assert.Contains(t, snippet, `title="level2/l:middle"`)
assert.Contains(t, snippet, `title="marmot/p:DET"`)
+
+ author := resultMap["author"].(string)
+ assert.Equal(t, "John Doe", author)
}