blob: 2a78f8e74f85d50bbf1744fbe8dd453c05eb84cd [file] [log] [blame]
Akronb1c71e62025-06-12 16:08:54 +02001package tools
2
3import (
4 "context"
5 "fmt"
Akron8138c352025-06-12 16:34:42 +02006 "strings"
Akronb1c71e62025-06-12 16:08:54 +02007
8 "github.com/korap/korap-mcp/service"
Akron81f709c2025-06-12 17:30:55 +02009 "github.com/korap/korap-mcp/validation"
Akronb1c71e62025-06-12 16:08:54 +020010 "github.com/mark3labs/mcp-go/mcp"
11 "github.com/rs/zerolog/log"
12)
13
14// SearchTool implements the Tool interface for KorAP corpus search
15type SearchTool struct {
Akron81f709c2025-06-12 17:30:55 +020016 client *service.Client
17 validator *validation.Validator
Akronb1c71e62025-06-12 16:08:54 +020018}
19
20// NewSearchTool creates a new search tool instance
21func NewSearchTool(client *service.Client) *SearchTool {
22 return &SearchTool{
Akron81f709c2025-06-12 17:30:55 +020023 client: client,
24 validator: validation.New(log.Logger),
Akronb1c71e62025-06-12 16:08:54 +020025 }
26}
27
28// Name returns the tool name
29func (s *SearchTool) Name() string {
30 return "korap_search"
31}
32
33// Description returns the tool description
34func (s *SearchTool) Description() string {
35 return "Search for words or phrases in KorAP corpora using various query languages"
36}
37
38// InputSchema returns the JSON schema for tool parameters
39func (s *SearchTool) InputSchema() map[string]interface{} {
40 return map[string]interface{}{
41 "type": "object",
42 "properties": map[string]interface{}{
43 "query": map[string]interface{}{
44 "type": "string",
45 "description": "The search query (word, phrase, or pattern)",
46 },
47 "query_language": map[string]interface{}{
48 "type": "string",
49 "description": "Query language: 'poliqarp' (default), 'cosmas2', or 'annis'",
50 "enum": []string{"poliqarp", "cosmas2", "annis"},
51 "default": "poliqarp",
52 },
53 "corpus": map[string]interface{}{
54 "type": "string",
Akronbd154ea2025-06-12 17:01:58 +020055 "description": "Virtual corpus query to filter search results (optional, when not provided searches all available data)",
Akronb1c71e62025-06-12 16:08:54 +020056 },
57 "count": map[string]interface{}{
58 "type": "integer",
59 "description": "Number of results to return (max 100)",
60 "minimum": 1,
61 "maximum": 100,
62 "default": 25,
63 },
64 },
65 "required": []string{"query"},
66 }
67}
68
69// Execute performs the search operation
70func (s *SearchTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
71 log.Debug().
72 Str("tool", s.Name()).
73 Msg("Executing search tool")
74
Akron8138c352025-06-12 16:34:42 +020075 // Extract required query parameter
Akronb1c71e62025-06-12 16:08:54 +020076 query, err := request.RequireString("query")
77 if err != nil {
78 return nil, fmt.Errorf("query parameter is required: %w", err)
79 }
80
Akron8138c352025-06-12 16:34:42 +020081 // Extract optional parameters with defaults
82 queryLang := request.GetString("query_language", "poliqarp")
83 corpus := request.GetString("corpus", "")
84 count := request.GetInt("count", 25)
85
Akron81f709c2025-06-12 17:30:55 +020086 // Validate the search request using the validation package
87 searchReq := validation.SearchRequest{
88 Query: query,
89 QueryLanguage: queryLang,
90 Corpus: corpus,
91 Count: count,
92 }
93
94 if err := s.validator.ValidateSearchRequest(searchReq); err != nil {
95 log.Warn().
96 Err(err).
97 Interface("request", searchReq).
98 Msg("Search request validation failed")
99 return nil, fmt.Errorf("invalid search request: %w", err)
100 }
101
102 // Sanitize inputs
103 query = s.validator.SanitizeQuery(query)
104 if corpus != "" {
105 corpus = s.validator.SanitizeCorpusID(corpus)
106 }
107
Akronb1c71e62025-06-12 16:08:54 +0200108 log.Debug().
109 Str("query", query).
Akron8138c352025-06-12 16:34:42 +0200110 Str("query_language", queryLang).
111 Str("corpus", corpus).
112 Int("count", count).
Akron81f709c2025-06-12 17:30:55 +0200113 Msg("Parsed and validated search parameters")
Akronb1c71e62025-06-12 16:08:54 +0200114
Akron8138c352025-06-12 16:34:42 +0200115 // Check if client is available and authenticated
116 if s.client == nil {
117 return nil, fmt.Errorf("KorAP client not configured")
118 }
119
120 if !s.client.IsAuthenticated() {
121 log.Warn().Msg("Client not authenticated, attempting authentication")
122 if err := s.client.AuthenticateWithClientCredentials(ctx); err != nil {
123 return nil, fmt.Errorf("authentication failed: %w", err)
124 }
125 }
126
127 // Prepare search request
Akron81f709c2025-06-12 17:30:55 +0200128 korapSearchReq := service.SearchRequest{
Akron8138c352025-06-12 16:34:42 +0200129 Query: query,
130 QueryLang: queryLang,
131 Collection: corpus,
132 Count: count,
133 }
134
135 // Perform the search
136 var searchResp service.SearchResponse
Akron81f709c2025-06-12 17:30:55 +0200137 err = s.client.PostJSON(ctx, "search", korapSearchReq, &searchResp)
Akron8138c352025-06-12 16:34:42 +0200138 if err != nil {
139 log.Error().
140 Err(err).
141 Str("query", query).
142 Msg("Search request failed")
143 return nil, fmt.Errorf("search failed: %w", err)
144 }
Akronb1c71e62025-06-12 16:08:54 +0200145
Akron81f709c2025-06-12 17:30:55 +0200146 // Validate the response
147 if err := s.validator.ValidateSearchResponse(&searchResp); err != nil {
148 log.Warn().
149 Err(err).
150 Msg("Search response validation failed, but continuing with potentially invalid data")
151 // Continue processing despite validation errors to be resilient
152 }
153
Akronb1c71e62025-06-12 16:08:54 +0200154 log.Info().
155 Str("query", query).
Akron8138c352025-06-12 16:34:42 +0200156 Int("total_results", searchResp.Meta.TotalResults).
157 Int("returned_matches", len(searchResp.Matches)).
158 Float64("search_time", searchResp.Meta.SearchTime).
159 Msg("Search completed successfully")
160
161 // Format the response
162 result := s.formatSearchResults(&searchResp)
Akronb1c71e62025-06-12 16:08:54 +0200163
164 return mcp.NewToolResultText(result), nil
165}
Akron8138c352025-06-12 16:34:42 +0200166
167// formatSearchResults formats the search response into a readable text format
168func (s *SearchTool) formatSearchResults(response *service.SearchResponse) string {
169 var result strings.Builder
170
171 result.WriteString("KorAP Search Results\n")
172 result.WriteString("====================\n\n")
173
174 // Query information
175 result.WriteString(fmt.Sprintf("Query: %s\n", response.Query.Query))
176 if response.Query.QueryLang != "" {
177 result.WriteString(fmt.Sprintf("Query Language: %s\n", response.Query.QueryLang))
178 }
179 if response.Query.Collection != "" {
180 result.WriteString(fmt.Sprintf("Corpus: %s\n", response.Query.Collection))
181 }
182 result.WriteString("\n")
183
184 // Result statistics
185 result.WriteString("Results Summary:\n")
186 result.WriteString(fmt.Sprintf(" Total Results: %d\n", response.Meta.TotalResults))
187 result.WriteString(fmt.Sprintf(" Shown: %d-%d\n",
188 response.Meta.StartIndex+1,
189 response.Meta.StartIndex+len(response.Matches)))
190 if response.Meta.SearchTime > 0 {
191 result.WriteString(fmt.Sprintf(" Search Time: %.3f seconds\n", response.Meta.SearchTime))
192 }
193 result.WriteString("\n")
194
195 // Individual matches
196 if len(response.Matches) > 0 {
197 result.WriteString("Matches:\n")
198 result.WriteString("--------\n")
199
200 for i, match := range response.Matches {
201 result.WriteString(fmt.Sprintf("\n%d. Text: %s\n", i+1, match.TextSigle))
202 if match.Snippet != "" {
203 result.WriteString(fmt.Sprintf(" Snippet: %s\n", match.Snippet))
204 }
205 if match.PubPlace != "" {
206 result.WriteString(fmt.Sprintf(" Publication: %s\n", match.PubPlace))
207 }
208 if match.MatchID != "" {
209 result.WriteString(fmt.Sprintf(" Match ID: %s\n", match.MatchID))
210 }
211 result.WriteString(fmt.Sprintf(" Position: %d\n", match.Position))
212 }
213 } else {
214 result.WriteString("No matches found.\n")
215 }
216
217 // Additional information
218 if response.Query.CutOff {
219 result.WriteString("\nNote: Results were cut off due to limits.\n")
220 }
221 if response.Query.TimeExceeded {
222 result.WriteString("\nNote: Search time limit was exceeded.\n")
223 }
224
225 return result.String()
226}