blob: d1a15feb95151e712a13d4d68b70d0a1c8e4c4e5 [file] [log] [blame]
package tools
import (
"context"
"fmt"
"strings"
"github.com/korap/korap-mcp/service"
"github.com/korap/korap-mcp/validation"
"github.com/mark3labs/mcp-go/mcp"
"github.com/rs/zerolog/log"
)
// SearchTool implements the Tool interface for KorAP corpus search
type SearchTool struct {
client *service.Client
validator *validation.Validator
}
// NewSearchTool creates a new search tool instance
func NewSearchTool(client *service.Client) *SearchTool {
return &SearchTool{
client: client,
validator: validation.New(log.Logger),
}
}
// Name returns the tool name
func (s *SearchTool) Name() string {
return "korap_search"
}
// Description returns the tool description
func (s *SearchTool) Description() string {
return "Search for words or phrases in KorAP corpora using various query languages"
}
// InputSchema returns the JSON schema for tool parameters
func (s *SearchTool) InputSchema() map[string]any {
return map[string]any{
"type": "object",
"properties": map[string]any{
"query": map[string]any{
"type": "string",
"description": "The search query. Supports different query languages like Poliqarp, CosmasII, or Annis depending on the selected query_language parameter.",
"minLength": 1,
"maxLength": 1000,
"examples": []string{"Haus", "[pos=NN]", "der /w1:5 Mann"},
},
"query_language": map[string]any{
"type": "string",
"description": "Query language to use for parsing the search query. Supported languages: 'poliqarp' (default; extended Poliqarp QL), 'cosmas2' (corpus query syntax of COSMAS II), 'annis' (multi-layer annotation queries), 'cql' (corpus query language), 'cqp' (Corpus Query Processor syntax), 'fcsql' (Federated Content Search queries).",
"enum": []string{"poliqarp", "cosmas2", "annis", "cql", "cqp", "fcsql"},
"default": "poliqarp",
"examples": []string{"poliqarp", "cosmas2", "annis", "cql", "cqp", "fcsql"},
},
"corpus": map[string]any{
"type": "string",
"description": "Virtual corpus query to filter search results based on metadata fields. Supports boolean operations (& | !), comparison operators (= != < > in), and regular expressions (/pattern/). Use metadata fields like corpusSigle, textClass, pubDate, textType, availability, etc. When not provided, searches all available data accessible to the user.",
"pattern": "^[a-zA-Z0-9._\\-\\s&|!=<>()/*\"']+$",
"examples": []string{"corpusSigle = \"GOE\"", "textClass = \"politics\" & pubDate in 2020", "textType = \"news\" | textType = \"blog\"", "availability = /CC.*/ & textClass != \"fiction\""},
},
"count": map[string]any{
"type": "integer",
"description": "Maximum number of search results to return. Higher values may increase response time. Use smaller values for faster responses when doing exploratory searches.",
"minimum": 0,
"maximum": 10000,
"default": 25,
"examples": []any{10, 25, 50, 100},
},
},
"required": []string{"query"},
"additionalProperties": false,
"title": "KorAP Search Parameters",
"description": "Parameters for searching text corpora using KorAP's powerful query languages and filtering capabilities.",
}
}
// Execute performs the search operation
func (s *SearchTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
log.Debug().
Str("tool", s.Name()).
Msg("Executing search tool")
// Extract required query parameter
query, err := request.RequireString("query")
if err != nil {
return nil, fmt.Errorf("query parameter is required: %w", err)
}
// Extract optional parameters with defaults
queryLang := request.GetString("query_language", "poliqarp")
corpus := request.GetString("corpus", "")
count := request.GetInt("count", 25)
// Validate the search request using the validation package
searchReq := validation.SearchRequest{
Query: query,
QueryLanguage: queryLang,
Corpus: corpus,
Count: count,
}
if err := s.validator.ValidateSearchRequest(searchReq); err != nil {
log.Warn().
Err(err).
Interface("request", searchReq).
Msg("Search request validation failed")
return nil, fmt.Errorf("invalid search request: %w", err)
}
// Sanitize inputs
query = s.validator.SanitizeQuery(query)
if corpus != "" {
corpus = s.validator.SanitizeCorpusID(corpus)
}
log.Debug().
Str("query", query).
Str("query_language", queryLang).
Str("corpus", corpus).
Int("count", count).
Msg("Parsed and validated search parameters")
// Check if client is available and authenticated
if s.client == nil {
return nil, fmt.Errorf("KorAP client not configured")
}
if !s.client.IsAuthenticated() {
log.Warn().Msg("Client not authenticated, attempting authentication")
if err := s.client.AuthenticateWithClientCredentials(ctx); err != nil {
return nil, fmt.Errorf("authentication failed: %w", err)
}
}
// Prepare search request
korapSearchReq := service.SearchRequest{
Query: query,
QueryLang: queryLang,
Collection: corpus,
Count: count,
}
// Perform the search
var searchResp service.SearchResponse
err = s.client.PostJSON(ctx, "search", korapSearchReq, &searchResp)
if err != nil {
log.Error().
Err(err).
Str("query", query).
Msg("Search request failed")
return nil, fmt.Errorf("search failed: %w", err)
}
// Validate the response
if err := s.validator.ValidateSearchResponse(&searchResp); err != nil {
log.Warn().
Err(err).
Msg("Search response validation failed, but continuing with potentially invalid data")
// Continue processing despite validation errors to be resilient
}
log.Info().
Str("query", query).
Int("total_results", searchResp.Meta.TotalResults).
Int("returned_matches", len(searchResp.Matches)).
Float64("search_time", searchResp.Meta.SearchTime).
Msg("Search completed successfully")
// Format the response
result := s.formatSearchResults(&searchResp)
return mcp.NewToolResultText(result), nil
}
// formatSearchResults formats the search response into a readable text format
func (s *SearchTool) formatSearchResults(response *service.SearchResponse) string {
var result strings.Builder
result.WriteString("KorAP Search Results\n")
result.WriteString("====================\n\n")
// Query information
result.WriteString(fmt.Sprintf("Query: %s\n", response.Query.Query))
if response.Query.QueryLang != "" {
result.WriteString(fmt.Sprintf("Query Language: %s\n", response.Query.QueryLang))
}
if response.Query.Collection != "" {
result.WriteString(fmt.Sprintf("Corpus: %s\n", response.Query.Collection))
}
result.WriteString("\n")
// Result statistics
result.WriteString("Results Summary:\n")
result.WriteString(fmt.Sprintf(" Total Results: %d\n", response.Meta.TotalResults))
result.WriteString(fmt.Sprintf(" Shown: %d-%d\n",
response.Meta.StartIndex+1,
response.Meta.StartIndex+len(response.Matches)))
if response.Meta.SearchTime > 0 {
result.WriteString(fmt.Sprintf(" Search Time: %.3f seconds\n", response.Meta.SearchTime))
}
result.WriteString("\n")
// Individual matches
if len(response.Matches) > 0 {
result.WriteString("Matches:\n")
result.WriteString("--------\n")
for i, match := range response.Matches {
result.WriteString(fmt.Sprintf("\n%d. Text: %s\n", i+1, match.TextSigle))
if match.Snippet != "" {
result.WriteString(fmt.Sprintf(" Snippet: %s\n", match.Snippet))
}
if match.PubPlace != "" {
result.WriteString(fmt.Sprintf(" Publication: %s\n", match.PubPlace))
}
if match.MatchID != "" {
result.WriteString(fmt.Sprintf(" Match ID: %s\n", match.MatchID))
}
result.WriteString(fmt.Sprintf(" Position: %d\n", match.Position))
}
} else {
result.WriteString("No matches found.\n")
}
// Additional information
if response.Query.CutOff {
result.WriteString("\nNote: Results were cut off due to limits.\n")
}
if response.Query.TimeExceeded {
result.WriteString("\nNote: Search time limit was exceeded.\n")
}
return result.String()
}