Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 1 | package tools |
| 2 | |
| 3 | import ( |
| 4 | "context" |
| 5 | "fmt" |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 6 | "strings" |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 7 | |
| 8 | "github.com/korap/korap-mcp/service" |
| 9 | "github.com/mark3labs/mcp-go/mcp" |
| 10 | "github.com/rs/zerolog/log" |
| 11 | ) |
| 12 | |
| 13 | // SearchTool implements the Tool interface for KorAP corpus search |
| 14 | type SearchTool struct { |
| 15 | client *service.Client |
| 16 | } |
| 17 | |
| 18 | // NewSearchTool creates a new search tool instance |
| 19 | func NewSearchTool(client *service.Client) *SearchTool { |
| 20 | return &SearchTool{ |
| 21 | client: client, |
| 22 | } |
| 23 | } |
| 24 | |
| 25 | // Name returns the tool name |
| 26 | func (s *SearchTool) Name() string { |
| 27 | return "korap_search" |
| 28 | } |
| 29 | |
| 30 | // Description returns the tool description |
| 31 | func (s *SearchTool) Description() string { |
| 32 | return "Search for words or phrases in KorAP corpora using various query languages" |
| 33 | } |
| 34 | |
| 35 | // InputSchema returns the JSON schema for tool parameters |
| 36 | func (s *SearchTool) InputSchema() map[string]interface{} { |
| 37 | return map[string]interface{}{ |
| 38 | "type": "object", |
| 39 | "properties": map[string]interface{}{ |
| 40 | "query": map[string]interface{}{ |
| 41 | "type": "string", |
| 42 | "description": "The search query (word, phrase, or pattern)", |
| 43 | }, |
| 44 | "query_language": map[string]interface{}{ |
| 45 | "type": "string", |
| 46 | "description": "Query language: 'poliqarp' (default), 'cosmas2', or 'annis'", |
| 47 | "enum": []string{"poliqarp", "cosmas2", "annis"}, |
| 48 | "default": "poliqarp", |
| 49 | }, |
| 50 | "corpus": map[string]interface{}{ |
| 51 | "type": "string", |
Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame^] | 52 | "description": "Virtual corpus query to filter search results (optional, when not provided searches all available data)", |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 53 | }, |
| 54 | "count": map[string]interface{}{ |
| 55 | "type": "integer", |
| 56 | "description": "Number of results to return (max 100)", |
| 57 | "minimum": 1, |
| 58 | "maximum": 100, |
| 59 | "default": 25, |
| 60 | }, |
| 61 | }, |
| 62 | "required": []string{"query"}, |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | // Execute performs the search operation |
| 67 | func (s *SearchTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { |
| 68 | log.Debug(). |
| 69 | Str("tool", s.Name()). |
| 70 | Msg("Executing search tool") |
| 71 | |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 72 | // Extract required query parameter |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 73 | query, err := request.RequireString("query") |
| 74 | if err != nil { |
| 75 | return nil, fmt.Errorf("query parameter is required: %w", err) |
| 76 | } |
| 77 | |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 78 | // Extract optional parameters with defaults |
| 79 | queryLang := request.GetString("query_language", "poliqarp") |
| 80 | corpus := request.GetString("corpus", "") |
| 81 | count := request.GetInt("count", 25) |
| 82 | |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 83 | log.Debug(). |
| 84 | Str("query", query). |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 85 | Str("query_language", queryLang). |
| 86 | Str("corpus", corpus). |
| 87 | Int("count", count). |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 88 | Msg("Parsed search parameters") |
| 89 | |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 90 | // Check if client is available and authenticated |
| 91 | if s.client == nil { |
| 92 | return nil, fmt.Errorf("KorAP client not configured") |
| 93 | } |
| 94 | |
| 95 | if !s.client.IsAuthenticated() { |
| 96 | log.Warn().Msg("Client not authenticated, attempting authentication") |
| 97 | if err := s.client.AuthenticateWithClientCredentials(ctx); err != nil { |
| 98 | return nil, fmt.Errorf("authentication failed: %w", err) |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | // Prepare search request |
| 103 | searchReq := service.SearchRequest{ |
| 104 | Query: query, |
| 105 | QueryLang: queryLang, |
| 106 | Collection: corpus, |
| 107 | Count: count, |
| 108 | } |
| 109 | |
| 110 | // Perform the search |
| 111 | var searchResp service.SearchResponse |
| 112 | err = s.client.PostJSON(ctx, "search", searchReq, &searchResp) |
| 113 | if err != nil { |
| 114 | log.Error(). |
| 115 | Err(err). |
| 116 | Str("query", query). |
| 117 | Msg("Search request failed") |
| 118 | return nil, fmt.Errorf("search failed: %w", err) |
| 119 | } |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 120 | |
| 121 | log.Info(). |
| 122 | Str("query", query). |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 123 | Int("total_results", searchResp.Meta.TotalResults). |
| 124 | Int("returned_matches", len(searchResp.Matches)). |
| 125 | Float64("search_time", searchResp.Meta.SearchTime). |
| 126 | Msg("Search completed successfully") |
| 127 | |
| 128 | // Format the response |
| 129 | result := s.formatSearchResults(&searchResp) |
Akron | b1c71e6 | 2025-06-12 16:08:54 +0200 | [diff] [blame] | 130 | |
| 131 | return mcp.NewToolResultText(result), nil |
| 132 | } |
Akron | 8138c35 | 2025-06-12 16:34:42 +0200 | [diff] [blame] | 133 | |
| 134 | // formatSearchResults formats the search response into a readable text format |
| 135 | func (s *SearchTool) formatSearchResults(response *service.SearchResponse) string { |
| 136 | var result strings.Builder |
| 137 | |
| 138 | result.WriteString("KorAP Search Results\n") |
| 139 | result.WriteString("====================\n\n") |
| 140 | |
| 141 | // Query information |
| 142 | result.WriteString(fmt.Sprintf("Query: %s\n", response.Query.Query)) |
| 143 | if response.Query.QueryLang != "" { |
| 144 | result.WriteString(fmt.Sprintf("Query Language: %s\n", response.Query.QueryLang)) |
| 145 | } |
| 146 | if response.Query.Collection != "" { |
| 147 | result.WriteString(fmt.Sprintf("Corpus: %s\n", response.Query.Collection)) |
| 148 | } |
| 149 | result.WriteString("\n") |
| 150 | |
| 151 | // Result statistics |
| 152 | result.WriteString("Results Summary:\n") |
| 153 | result.WriteString(fmt.Sprintf(" Total Results: %d\n", response.Meta.TotalResults)) |
| 154 | result.WriteString(fmt.Sprintf(" Shown: %d-%d\n", |
| 155 | response.Meta.StartIndex+1, |
| 156 | response.Meta.StartIndex+len(response.Matches))) |
| 157 | if response.Meta.SearchTime > 0 { |
| 158 | result.WriteString(fmt.Sprintf(" Search Time: %.3f seconds\n", response.Meta.SearchTime)) |
| 159 | } |
| 160 | result.WriteString("\n") |
| 161 | |
| 162 | // Individual matches |
| 163 | if len(response.Matches) > 0 { |
| 164 | result.WriteString("Matches:\n") |
| 165 | result.WriteString("--------\n") |
| 166 | |
| 167 | for i, match := range response.Matches { |
| 168 | result.WriteString(fmt.Sprintf("\n%d. Text: %s\n", i+1, match.TextSigle)) |
| 169 | if match.Snippet != "" { |
| 170 | result.WriteString(fmt.Sprintf(" Snippet: %s\n", match.Snippet)) |
| 171 | } |
| 172 | if match.PubPlace != "" { |
| 173 | result.WriteString(fmt.Sprintf(" Publication: %s\n", match.PubPlace)) |
| 174 | } |
| 175 | if match.MatchID != "" { |
| 176 | result.WriteString(fmt.Sprintf(" Match ID: %s\n", match.MatchID)) |
| 177 | } |
| 178 | result.WriteString(fmt.Sprintf(" Position: %d\n", match.Position)) |
| 179 | } |
| 180 | } else { |
| 181 | result.WriteString("No matches found.\n") |
| 182 | } |
| 183 | |
| 184 | // Additional information |
| 185 | if response.Query.CutOff { |
| 186 | result.WriteString("\nNote: Results were cut off due to limits.\n") |
| 187 | } |
| 188 | if response.Query.TimeExceeded { |
| 189 | result.WriteString("\nNote: Search time limit was exceeded.\n") |
| 190 | } |
| 191 | |
| 192 | return result.String() |
| 193 | } |