Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 1 | package tools |
| 2 | |
| 3 | import ( |
| 4 | "context" |
| 5 | "fmt" |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 6 | "strings" |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 7 | |
| 8 | "github.com/korap/korap-mcp/service" |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 9 | "github.com/korap/korap-mcp/validation" |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 10 | "github.com/mark3labs/mcp-go/mcp" |
| 11 | "github.com/rs/zerolog/log" |
| 12 | ) |
| 13 | |
| 14 | // SearchTool implements the Tool interface for KorAP corpus search |
| 15 | type SearchTool struct { |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 16 | client *service.Client |
| 17 | validator *validation.Validator |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 18 | } |
| 19 | |
| 20 | // NewSearchTool creates a new search tool instance |
| 21 | func NewSearchTool(client *service.Client) *SearchTool { |
| 22 | return &SearchTool{ |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 23 | client: client, |
| 24 | validator: validation.New(log.Logger), |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 25 | } |
| 26 | } |
| 27 | |
| 28 | // Name returns the tool name |
| 29 | func (s *SearchTool) Name() string { |
| 30 | return "korap_search" |
| 31 | } |
| 32 | |
| 33 | // Description returns the tool description |
| 34 | func (s *SearchTool) Description() string { |
| 35 | return "Search for words or phrases in KorAP corpora using various query languages" |
| 36 | } |
| 37 | |
| 38 | // InputSchema returns the JSON schema for tool parameters |
| 39 | func (s *SearchTool) InputSchema() map[string]interface{} { |
| 40 | return map[string]interface{}{ |
| 41 | "type": "object", |
| 42 | "properties": map[string]interface{}{ |
| 43 | "query": map[string]interface{}{ |
| 44 | "type": "string", |
| 45 | "description": "The search query (word, phrase, or pattern)", |
| 46 | }, |
| 47 | "query_language": map[string]interface{}{ |
| 48 | "type": "string", |
| 49 | "description": "Query language: 'poliqarp' (default), 'cosmas2', or 'annis'", |
| 50 | "enum": []string{"poliqarp", "cosmas2", "annis"}, |
| 51 | "default": "poliqarp", |
| 52 | }, |
| 53 | "corpus": map[string]interface{}{ |
| 54 | "type": "string", |
Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame] | 55 | "description": "Virtual corpus query to filter search results (optional, when not provided searches all available data)", |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 56 | }, |
| 57 | "count": map[string]interface{}{ |
| 58 | "type": "integer", |
| 59 | "description": "Number of results to return (max 100)", |
| 60 | "minimum": 1, |
| 61 | "maximum": 100, |
| 62 | "default": 25, |
| 63 | }, |
| 64 | }, |
| 65 | "required": []string{"query"}, |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | // Execute performs the search operation |
| 70 | func (s *SearchTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { |
| 71 | log.Debug(). |
| 72 | Str("tool", s.Name()). |
| 73 | Msg("Executing search tool") |
| 74 | |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 75 | // Extract required query parameter |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 76 | query, err := request.RequireString("query") |
| 77 | if err != nil { |
| 78 | return nil, fmt.Errorf("query parameter is required: %w", err) |
| 79 | } |
| 80 | |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 81 | // Extract optional parameters with defaults |
| 82 | queryLang := request.GetString("query_language", "poliqarp") |
| 83 | corpus := request.GetString("corpus", "") |
| 84 | count := request.GetInt("count", 25) |
| 85 | |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 86 | // Validate the search request using the validation package |
| 87 | searchReq := validation.SearchRequest{ |
| 88 | Query: query, |
| 89 | QueryLanguage: queryLang, |
| 90 | Corpus: corpus, |
| 91 | Count: count, |
| 92 | } |
| 93 | |
| 94 | if err := s.validator.ValidateSearchRequest(searchReq); err != nil { |
| 95 | log.Warn(). |
| 96 | Err(err). |
| 97 | Interface("request", searchReq). |
| 98 | Msg("Search request validation failed") |
| 99 | return nil, fmt.Errorf("invalid search request: %w", err) |
| 100 | } |
| 101 | |
| 102 | // Sanitize inputs |
| 103 | query = s.validator.SanitizeQuery(query) |
| 104 | if corpus != "" { |
| 105 | corpus = s.validator.SanitizeCorpusID(corpus) |
| 106 | } |
| 107 | |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 108 | log.Debug(). |
| 109 | Str("query", query). |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 110 | Str("query_language", queryLang). |
| 111 | Str("corpus", corpus). |
| 112 | Int("count", count). |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 113 | Msg("Parsed and validated search parameters") |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 114 | |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 115 | // Check if client is available and authenticated |
| 116 | if s.client == nil { |
| 117 | return nil, fmt.Errorf("KorAP client not configured") |
| 118 | } |
| 119 | |
| 120 | if !s.client.IsAuthenticated() { |
| 121 | log.Warn().Msg("Client not authenticated, attempting authentication") |
| 122 | if err := s.client.AuthenticateWithClientCredentials(ctx); err != nil { |
| 123 | return nil, fmt.Errorf("authentication failed: %w", err) |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | // Prepare search request |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 128 | korapSearchReq := service.SearchRequest{ |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 129 | Query: query, |
| 130 | QueryLang: queryLang, |
| 131 | Collection: corpus, |
| 132 | Count: count, |
| 133 | } |
| 134 | |
| 135 | // Perform the search |
| 136 | var searchResp service.SearchResponse |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 137 | err = s.client.PostJSON(ctx, "search", korapSearchReq, &searchResp) |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 138 | if err != nil { |
| 139 | log.Error(). |
| 140 | Err(err). |
| 141 | Str("query", query). |
| 142 | Msg("Search request failed") |
| 143 | return nil, fmt.Errorf("search failed: %w", err) |
| 144 | } |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 145 | |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 146 | // Validate the response |
| 147 | if err := s.validator.ValidateSearchResponse(&searchResp); err != nil { |
| 148 | log.Warn(). |
| 149 | Err(err). |
| 150 | Msg("Search response validation failed, but continuing with potentially invalid data") |
| 151 | // Continue processing despite validation errors to be resilient |
| 152 | } |
| 153 | |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 154 | log.Info(). |
| 155 | Str("query", query). |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 156 | Int("total_results", searchResp.Meta.TotalResults). |
| 157 | Int("returned_matches", len(searchResp.Matches)). |
| 158 | Float64("search_time", searchResp.Meta.SearchTime). |
| 159 | Msg("Search completed successfully") |
| 160 | |
| 161 | // Format the response |
| 162 | result := s.formatSearchResults(&searchResp) |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 163 | |
| 164 | return mcp.NewToolResultText(result), nil |
| 165 | } |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 166 | |
| 167 | // formatSearchResults formats the search response into a readable text format |
| 168 | func (s *SearchTool) formatSearchResults(response *service.SearchResponse) string { |
| 169 | var result strings.Builder |
| 170 | |
| 171 | result.WriteString("KorAP Search Results\n") |
| 172 | result.WriteString("====================\n\n") |
| 173 | |
| 174 | // Query information |
| 175 | result.WriteString(fmt.Sprintf("Query: %s\n", response.Query.Query)) |
| 176 | if response.Query.QueryLang != "" { |
| 177 | result.WriteString(fmt.Sprintf("Query Language: %s\n", response.Query.QueryLang)) |
| 178 | } |
| 179 | if response.Query.Collection != "" { |
| 180 | result.WriteString(fmt.Sprintf("Corpus: %s\n", response.Query.Collection)) |
| 181 | } |
| 182 | result.WriteString("\n") |
| 183 | |
| 184 | // Result statistics |
| 185 | result.WriteString("Results Summary:\n") |
| 186 | result.WriteString(fmt.Sprintf(" Total Results: %d\n", response.Meta.TotalResults)) |
| 187 | result.WriteString(fmt.Sprintf(" Shown: %d-%d\n", |
| 188 | response.Meta.StartIndex+1, |
| 189 | response.Meta.StartIndex+len(response.Matches))) |
| 190 | if response.Meta.SearchTime > 0 { |
| 191 | result.WriteString(fmt.Sprintf(" Search Time: %.3f seconds\n", response.Meta.SearchTime)) |
| 192 | } |
| 193 | result.WriteString("\n") |
| 194 | |
| 195 | // Individual matches |
| 196 | if len(response.Matches) > 0 { |
| 197 | result.WriteString("Matches:\n") |
| 198 | result.WriteString("--------\n") |
| 199 | |
| 200 | for i, match := range response.Matches { |
| 201 | result.WriteString(fmt.Sprintf("\n%d. Text: %s\n", i+1, match.TextSigle)) |
| 202 | if match.Snippet != "" { |
| 203 | result.WriteString(fmt.Sprintf(" Snippet: %s\n", match.Snippet)) |
| 204 | } |
| 205 | if match.PubPlace != "" { |
| 206 | result.WriteString(fmt.Sprintf(" Publication: %s\n", match.PubPlace)) |
| 207 | } |
| 208 | if match.MatchID != "" { |
| 209 | result.WriteString(fmt.Sprintf(" Match ID: %s\n", match.MatchID)) |
| 210 | } |
| 211 | result.WriteString(fmt.Sprintf(" Position: %d\n", match.Position)) |
| 212 | } |
| 213 | } else { |
| 214 | result.WriteString("No matches found.\n") |
| 215 | } |
| 216 | |
| 217 | // Additional information |
| 218 | if response.Query.CutOff { |
| 219 | result.WriteString("\nNote: Results were cut off due to limits.\n") |
| 220 | } |
| 221 | if response.Query.TimeExceeded { |
| 222 | result.WriteString("\nNote: Search time limit was exceeded.\n") |
| 223 | } |
| 224 | |
| 225 | return result.String() |
| 226 | } |