| package tools |
| |
| import ( |
| "context" |
| "fmt" |
| "strings" |
| |
| "github.com/korap/korap-mcp/service" |
| "github.com/korap/korap-mcp/validation" |
| "github.com/mark3labs/mcp-go/mcp" |
| "github.com/rs/zerolog/log" |
| ) |
| |
| // SearchTool implements the Tool interface for KorAP corpus search |
| type SearchTool struct { |
| client *service.Client |
| validator *validation.Validator |
| } |
| |
| // NewSearchTool creates a new search tool instance |
| func NewSearchTool(client *service.Client) *SearchTool { |
| return &SearchTool{ |
| client: client, |
| validator: validation.New(log.Logger), |
| } |
| } |
| |
| // Name returns the tool name |
| func (s *SearchTool) Name() string { |
| return "korap_search" |
| } |
| |
| // Description returns the tool description |
| func (s *SearchTool) Description() string { |
| return "Search for words or phrases in KorAP corpora using various query languages" |
| } |
| |
| // InputSchema returns the JSON schema for tool parameters |
| func (s *SearchTool) InputSchema() map[string]any { |
| return map[string]any{ |
| "type": "object", |
| "properties": map[string]any{ |
| "query": map[string]any{ |
| "type": "string", |
| "description": "The search query. Supports different query languages like Poliqarp, CosmasII, or Annis depending on the selected query_language parameter.", |
| "minLength": 1, |
| "maxLength": 1000, |
| "examples": []string{"Haus", "[pos=NN]", "der /w1:5 Mann"}, |
| }, |
| "query_language": map[string]any{ |
| "type": "string", |
| "description": "Query language to use for parsing the search query. Supported languages: 'poliqarp' (default; extended Poliqarp QL), 'cosmas2' (corpus query syntax of COSMAS II), 'annis' (multi-layer annotation queries), 'cql' (corpus query language), 'cqp' (Corpus Query Processor syntax), 'fcsql' (Federated Content Search queries).", |
| "enum": []string{"poliqarp", "cosmas2", "annis", "cql", "cqp", "fcsql"}, |
| "default": "poliqarp", |
| "examples": []string{"poliqarp", "cosmas2", "annis", "cql", "cqp", "fcsql"}, |
| }, |
| "corpus": map[string]any{ |
| "type": "string", |
| "description": "Virtual corpus query to filter search results based on metadata fields. Supports boolean operations (& | !), comparison operators (= != < > in), and regular expressions (/pattern/). Use metadata fields like corpusSigle, textClass, pubDate, textType, availability, etc. When not provided, searches all available data accessible to the user.", |
| "pattern": "^[a-zA-Z0-9._\\-\\s&|!=<>()/*\"']+$", |
| "examples": []string{"corpusSigle = \"GOE\"", "textClass = \"politics\" & pubDate in 2020", "textType = \"news\" | textType = \"blog\"", "availability = /CC.*/ & textClass != \"fiction\""}, |
| }, |
| "count": map[string]any{ |
| "type": "integer", |
| "description": "Maximum number of search results to return. Higher values may increase response time. Use smaller values for faster responses when doing exploratory searches.", |
| "minimum": 0, |
| "maximum": 10000, |
| "default": 25, |
| "examples": []any{10, 25, 50, 100}, |
| }, |
| }, |
| "required": []string{"query"}, |
| "additionalProperties": false, |
| "title": "KorAP Search Parameters", |
| "description": "Parameters for searching text corpora using KorAP's powerful query languages and filtering capabilities.", |
| } |
| } |
| |
| // Execute performs the search operation |
| func (s *SearchTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { |
| log.Debug(). |
| Str("tool", s.Name()). |
| Msg("Executing search tool") |
| |
| // Extract required query parameter |
| query, err := request.RequireString("query") |
| if err != nil { |
| return nil, fmt.Errorf("query parameter is required: %w", err) |
| } |
| |
| // Extract optional parameters with defaults |
| queryLang := request.GetString("query_language", "poliqarp") |
| corpus := request.GetString("corpus", "") |
| count := request.GetInt("count", 25) |
| |
| // Validate the search request using the validation package |
| searchReq := validation.SearchRequest{ |
| Query: query, |
| QueryLanguage: queryLang, |
| Corpus: corpus, |
| Count: count, |
| } |
| |
| if err := s.validator.ValidateSearchRequest(searchReq); err != nil { |
| log.Warn(). |
| Err(err). |
| Interface("request", searchReq). |
| Msg("Search request validation failed") |
| return nil, fmt.Errorf("invalid search request: %w", err) |
| } |
| |
| // Sanitize inputs |
| query = s.validator.SanitizeQuery(query) |
| if corpus != "" { |
| corpus = s.validator.SanitizeCorpusID(corpus) |
| } |
| |
| log.Debug(). |
| Str("query", query). |
| Str("query_language", queryLang). |
| Str("corpus", corpus). |
| Int("count", count). |
| Msg("Parsed and validated search parameters") |
| |
| // Check if client is available and authenticated |
| if s.client == nil { |
| return nil, fmt.Errorf("KorAP client not configured") |
| } |
| |
| if !s.client.IsAuthenticated() { |
| log.Warn().Msg("Client not authenticated, attempting authentication") |
| if err := s.client.AuthenticateWithClientCredentials(ctx); err != nil { |
| return nil, fmt.Errorf("authentication failed: %w", err) |
| } |
| } |
| |
| // Prepare search request |
| korapSearchReq := service.SearchRequest{ |
| Query: query, |
| QueryLang: queryLang, |
| Collection: corpus, |
| Count: count, |
| } |
| |
| // Perform the search |
| var searchResp service.SearchResponse |
| err = s.client.PostJSON(ctx, "search", korapSearchReq, &searchResp) |
| if err != nil { |
| log.Error(). |
| Err(err). |
| Str("query", query). |
| Msg("Search request failed") |
| return nil, fmt.Errorf("search failed: %w", err) |
| } |
| |
| // Validate the response |
| if err := s.validator.ValidateSearchResponse(&searchResp); err != nil { |
| log.Warn(). |
| Err(err). |
| Msg("Search response validation failed, but continuing with potentially invalid data") |
| // Continue processing despite validation errors to be resilient |
| } |
| |
| log.Info(). |
| Str("query", query). |
| Int("total_results", searchResp.Meta.TotalResults). |
| Int("returned_matches", len(searchResp.Matches)). |
| Float64("search_time", searchResp.Meta.SearchTime). |
| Msg("Search completed successfully") |
| |
| // Format the response |
| result := s.formatSearchResults(&searchResp) |
| |
| return mcp.NewToolResultText(result), nil |
| } |
| |
| // formatSearchResults formats the search response into a readable text format |
| func (s *SearchTool) formatSearchResults(response *service.SearchResponse) string { |
| var result strings.Builder |
| |
| result.WriteString("KorAP Search Results\n") |
| result.WriteString("====================\n\n") |
| |
| // Query information |
| result.WriteString(fmt.Sprintf("Query: %s\n", response.Query.Query)) |
| if response.Query.QueryLang != "" { |
| result.WriteString(fmt.Sprintf("Query Language: %s\n", response.Query.QueryLang)) |
| } |
| if response.Query.Collection != "" { |
| result.WriteString(fmt.Sprintf("Corpus: %s\n", response.Query.Collection)) |
| } |
| result.WriteString("\n") |
| |
| // Result statistics |
| result.WriteString("Results Summary:\n") |
| result.WriteString(fmt.Sprintf(" Total Results: %d\n", response.Meta.TotalResults)) |
| result.WriteString(fmt.Sprintf(" Shown: %d-%d\n", |
| response.Meta.StartIndex+1, |
| response.Meta.StartIndex+len(response.Matches))) |
| if response.Meta.SearchTime > 0 { |
| result.WriteString(fmt.Sprintf(" Search Time: %.3f seconds\n", response.Meta.SearchTime)) |
| } |
| result.WriteString("\n") |
| |
| // Individual matches |
| if len(response.Matches) > 0 { |
| result.WriteString("Matches:\n") |
| result.WriteString("--------\n") |
| |
| for i, match := range response.Matches { |
| result.WriteString(fmt.Sprintf("\n%d. Text: %s\n", i+1, match.TextSigle)) |
| if match.Snippet != "" { |
| result.WriteString(fmt.Sprintf(" Snippet: %s\n", match.Snippet)) |
| } |
| if match.PubPlace != "" { |
| result.WriteString(fmt.Sprintf(" Publication: %s\n", match.PubPlace)) |
| } |
| if match.MatchID != "" { |
| result.WriteString(fmt.Sprintf(" Match ID: %s\n", match.MatchID)) |
| } |
| result.WriteString(fmt.Sprintf(" Position: %d\n", match.Position)) |
| } |
| } else { |
| result.WriteString("No matches found.\n") |
| } |
| |
| // Additional information |
| if response.Query.CutOff { |
| result.WriteString("\nNote: Results were cut off due to limits.\n") |
| } |
| if response.Query.TimeExceeded { |
| result.WriteString("\nNote: Search time limit was exceeded.\n") |
| } |
| |
| return result.String() |
| } |