blob: d1a15feb95151e712a13d4d68b70d0a1c8e4c4e5 [file] [log] [blame]
Akronb1c71e62025-06-12 16:08:54 +02001package tools
2
3import (
4 "context"
5 "fmt"
Akron8138c352025-06-12 16:34:42 +02006 "strings"
Akronb1c71e62025-06-12 16:08:54 +02007
8 "github.com/korap/korap-mcp/service"
Akron81f709c2025-06-12 17:30:55 +02009 "github.com/korap/korap-mcp/validation"
Akronb1c71e62025-06-12 16:08:54 +020010 "github.com/mark3labs/mcp-go/mcp"
11 "github.com/rs/zerolog/log"
12)
13
14// SearchTool implements the Tool interface for KorAP corpus search
15type SearchTool struct {
Akron81f709c2025-06-12 17:30:55 +020016 client *service.Client
17 validator *validation.Validator
Akronb1c71e62025-06-12 16:08:54 +020018}
19
20// NewSearchTool creates a new search tool instance
21func NewSearchTool(client *service.Client) *SearchTool {
22 return &SearchTool{
Akron81f709c2025-06-12 17:30:55 +020023 client: client,
24 validator: validation.New(log.Logger),
Akronb1c71e62025-06-12 16:08:54 +020025 }
26}
27
28// Name returns the tool name
29func (s *SearchTool) Name() string {
30 return "korap_search"
31}
32
33// Description returns the tool description
34func (s *SearchTool) Description() string {
35 return "Search for words or phrases in KorAP corpora using various query languages"
36}
37
38// InputSchema returns the JSON schema for tool parameters
Akron708f3912025-06-17 12:26:02 +020039func (s *SearchTool) InputSchema() map[string]any {
40 return map[string]any{
Akronb1c71e62025-06-12 16:08:54 +020041 "type": "object",
Akron708f3912025-06-17 12:26:02 +020042 "properties": map[string]any{
43 "query": map[string]any{
Akronb1c71e62025-06-12 16:08:54 +020044 "type": "string",
Akron8db31c32025-06-17 12:22:41 +020045 "description": "The search query. Supports different query languages like Poliqarp, CosmasII, or Annis depending on the selected query_language parameter.",
46 "minLength": 1,
47 "maxLength": 1000,
48 "examples": []string{"Haus", "[pos=NN]", "der /w1:5 Mann"},
Akronb1c71e62025-06-12 16:08:54 +020049 },
Akron708f3912025-06-17 12:26:02 +020050 "query_language": map[string]any{
Akronb1c71e62025-06-12 16:08:54 +020051 "type": "string",
Akron8db31c32025-06-17 12:22:41 +020052 "description": "Query language to use for parsing the search query. Supported languages: 'poliqarp' (default; extended Poliqarp QL), 'cosmas2' (corpus query syntax of COSMAS II), 'annis' (multi-layer annotation queries), 'cql' (corpus query language), 'cqp' (Corpus Query Processor syntax), 'fcsql' (Federated Content Search queries).",
53 "enum": []string{"poliqarp", "cosmas2", "annis", "cql", "cqp", "fcsql"},
Akronb1c71e62025-06-12 16:08:54 +020054 "default": "poliqarp",
Akron8db31c32025-06-17 12:22:41 +020055 "examples": []string{"poliqarp", "cosmas2", "annis", "cql", "cqp", "fcsql"},
Akronb1c71e62025-06-12 16:08:54 +020056 },
Akron708f3912025-06-17 12:26:02 +020057 "corpus": map[string]any{
Akronb1c71e62025-06-12 16:08:54 +020058 "type": "string",
Akron8db31c32025-06-17 12:22:41 +020059 "description": "Virtual corpus query to filter search results based on metadata fields. Supports boolean operations (& | !), comparison operators (= != < > in), and regular expressions (/pattern/). Use metadata fields like corpusSigle, textClass, pubDate, textType, availability, etc. When not provided, searches all available data accessible to the user.",
60 "pattern": "^[a-zA-Z0-9._\\-\\s&|!=<>()/*\"']+$",
61 "examples": []string{"corpusSigle = \"GOE\"", "textClass = \"politics\" & pubDate in 2020", "textType = \"news\" | textType = \"blog\"", "availability = /CC.*/ & textClass != \"fiction\""},
Akronb1c71e62025-06-12 16:08:54 +020062 },
Akron708f3912025-06-17 12:26:02 +020063 "count": map[string]any{
Akronb1c71e62025-06-12 16:08:54 +020064 "type": "integer",
Akron8db31c32025-06-17 12:22:41 +020065 "description": "Maximum number of search results to return. Higher values may increase response time. Use smaller values for faster responses when doing exploratory searches.",
66 "minimum": 0,
67 "maximum": 10000,
Akronb1c71e62025-06-12 16:08:54 +020068 "default": 25,
Akron708f3912025-06-17 12:26:02 +020069 "examples": []any{10, 25, 50, 100},
Akronb1c71e62025-06-12 16:08:54 +020070 },
71 },
Akron8db31c32025-06-17 12:22:41 +020072 "required": []string{"query"},
73 "additionalProperties": false,
74 "title": "KorAP Search Parameters",
75 "description": "Parameters for searching text corpora using KorAP's powerful query languages and filtering capabilities.",
Akronb1c71e62025-06-12 16:08:54 +020076 }
77}
78
79// Execute performs the search operation
80func (s *SearchTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
81 log.Debug().
82 Str("tool", s.Name()).
83 Msg("Executing search tool")
84
Akron8138c352025-06-12 16:34:42 +020085 // Extract required query parameter
Akronb1c71e62025-06-12 16:08:54 +020086 query, err := request.RequireString("query")
87 if err != nil {
88 return nil, fmt.Errorf("query parameter is required: %w", err)
89 }
90
Akron8138c352025-06-12 16:34:42 +020091 // Extract optional parameters with defaults
92 queryLang := request.GetString("query_language", "poliqarp")
93 corpus := request.GetString("corpus", "")
94 count := request.GetInt("count", 25)
95
Akron81f709c2025-06-12 17:30:55 +020096 // Validate the search request using the validation package
97 searchReq := validation.SearchRequest{
98 Query: query,
99 QueryLanguage: queryLang,
100 Corpus: corpus,
101 Count: count,
102 }
103
104 if err := s.validator.ValidateSearchRequest(searchReq); err != nil {
105 log.Warn().
106 Err(err).
107 Interface("request", searchReq).
108 Msg("Search request validation failed")
109 return nil, fmt.Errorf("invalid search request: %w", err)
110 }
111
112 // Sanitize inputs
113 query = s.validator.SanitizeQuery(query)
114 if corpus != "" {
115 corpus = s.validator.SanitizeCorpusID(corpus)
116 }
117
Akronb1c71e62025-06-12 16:08:54 +0200118 log.Debug().
119 Str("query", query).
Akron8138c352025-06-12 16:34:42 +0200120 Str("query_language", queryLang).
121 Str("corpus", corpus).
122 Int("count", count).
Akron81f709c2025-06-12 17:30:55 +0200123 Msg("Parsed and validated search parameters")
Akronb1c71e62025-06-12 16:08:54 +0200124
Akron8138c352025-06-12 16:34:42 +0200125 // Check if client is available and authenticated
126 if s.client == nil {
127 return nil, fmt.Errorf("KorAP client not configured")
128 }
129
130 if !s.client.IsAuthenticated() {
131 log.Warn().Msg("Client not authenticated, attempting authentication")
132 if err := s.client.AuthenticateWithClientCredentials(ctx); err != nil {
133 return nil, fmt.Errorf("authentication failed: %w", err)
134 }
135 }
136
137 // Prepare search request
Akron81f709c2025-06-12 17:30:55 +0200138 korapSearchReq := service.SearchRequest{
Akron8138c352025-06-12 16:34:42 +0200139 Query: query,
140 QueryLang: queryLang,
141 Collection: corpus,
142 Count: count,
143 }
144
145 // Perform the search
146 var searchResp service.SearchResponse
Akron81f709c2025-06-12 17:30:55 +0200147 err = s.client.PostJSON(ctx, "search", korapSearchReq, &searchResp)
Akron8138c352025-06-12 16:34:42 +0200148 if err != nil {
149 log.Error().
150 Err(err).
151 Str("query", query).
152 Msg("Search request failed")
153 return nil, fmt.Errorf("search failed: %w", err)
154 }
Akronb1c71e62025-06-12 16:08:54 +0200155
Akron81f709c2025-06-12 17:30:55 +0200156 // Validate the response
157 if err := s.validator.ValidateSearchResponse(&searchResp); err != nil {
158 log.Warn().
159 Err(err).
160 Msg("Search response validation failed, but continuing with potentially invalid data")
161 // Continue processing despite validation errors to be resilient
162 }
163
Akronb1c71e62025-06-12 16:08:54 +0200164 log.Info().
165 Str("query", query).
Akron8138c352025-06-12 16:34:42 +0200166 Int("total_results", searchResp.Meta.TotalResults).
167 Int("returned_matches", len(searchResp.Matches)).
168 Float64("search_time", searchResp.Meta.SearchTime).
169 Msg("Search completed successfully")
170
171 // Format the response
172 result := s.formatSearchResults(&searchResp)
Akronb1c71e62025-06-12 16:08:54 +0200173
174 return mcp.NewToolResultText(result), nil
175}
Akron8138c352025-06-12 16:34:42 +0200176
177// formatSearchResults formats the search response into a readable text format
178func (s *SearchTool) formatSearchResults(response *service.SearchResponse) string {
179 var result strings.Builder
180
181 result.WriteString("KorAP Search Results\n")
182 result.WriteString("====================\n\n")
183
184 // Query information
185 result.WriteString(fmt.Sprintf("Query: %s\n", response.Query.Query))
186 if response.Query.QueryLang != "" {
187 result.WriteString(fmt.Sprintf("Query Language: %s\n", response.Query.QueryLang))
188 }
189 if response.Query.Collection != "" {
190 result.WriteString(fmt.Sprintf("Corpus: %s\n", response.Query.Collection))
191 }
192 result.WriteString("\n")
193
194 // Result statistics
195 result.WriteString("Results Summary:\n")
196 result.WriteString(fmt.Sprintf(" Total Results: %d\n", response.Meta.TotalResults))
197 result.WriteString(fmt.Sprintf(" Shown: %d-%d\n",
198 response.Meta.StartIndex+1,
199 response.Meta.StartIndex+len(response.Matches)))
200 if response.Meta.SearchTime > 0 {
201 result.WriteString(fmt.Sprintf(" Search Time: %.3f seconds\n", response.Meta.SearchTime))
202 }
203 result.WriteString("\n")
204
205 // Individual matches
206 if len(response.Matches) > 0 {
207 result.WriteString("Matches:\n")
208 result.WriteString("--------\n")
209
210 for i, match := range response.Matches {
211 result.WriteString(fmt.Sprintf("\n%d. Text: %s\n", i+1, match.TextSigle))
212 if match.Snippet != "" {
213 result.WriteString(fmt.Sprintf(" Snippet: %s\n", match.Snippet))
214 }
215 if match.PubPlace != "" {
216 result.WriteString(fmt.Sprintf(" Publication: %s\n", match.PubPlace))
217 }
218 if match.MatchID != "" {
219 result.WriteString(fmt.Sprintf(" Match ID: %s\n", match.MatchID))
220 }
221 result.WriteString(fmt.Sprintf(" Position: %d\n", match.Position))
222 }
223 } else {
224 result.WriteString("No matches found.\n")
225 }
226
227 // Additional information
228 if response.Query.CutOff {
229 result.WriteString("\nNote: Results were cut off due to limits.\n")
230 }
231 if response.Query.TimeExceeded {
232 result.WriteString("\nNote: Search time limit was exceeded.\n")
233 }
234
235 return result.String()
236}