blob: c3da81323f1c0852e87a46011c02cf4f5962347c [file] [log] [blame]
Akronbd154ea2025-06-12 17:01:58 +02001package tools
2
3import (
4 "context"
5 "fmt"
6 "strings"
7
8 "github.com/korap/korap-mcp/service"
Akron81f709c2025-06-12 17:30:55 +02009 "github.com/korap/korap-mcp/validation"
Akronbd154ea2025-06-12 17:01:58 +020010 "github.com/mark3labs/mcp-go/mcp"
11 "github.com/rs/zerolog/log"
12)
13
14// MetadataTool implements the Tool interface for KorAP corpus metadata retrieval
15type MetadataTool struct {
Akron81f709c2025-06-12 17:30:55 +020016 client *service.Client
17 validator *validation.Validator
Akronbd154ea2025-06-12 17:01:58 +020018}
19
20// NewMetadataTool creates a new metadata tool instance
21func NewMetadataTool(client *service.Client) *MetadataTool {
22 return &MetadataTool{
Akron81f709c2025-06-12 17:30:55 +020023 client: client,
24 validator: validation.New(log.Logger),
Akronbd154ea2025-06-12 17:01:58 +020025 }
26}
27
28// Name returns the tool name
29func (m *MetadataTool) Name() string {
30 return "korap_metadata"
31}
32
33// Description returns the tool description
34func (m *MetadataTool) Description() string {
35 return "Retrieve metadata and statistics for KorAP corpora"
36}
37
38// InputSchema returns the JSON schema for tool parameters
39func (m *MetadataTool) InputSchema() map[string]interface{} {
40 return map[string]interface{}{
41 "type": "object",
42 "properties": map[string]interface{}{
43 "action": map[string]interface{}{
44 "type": "string",
45 "description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics",
46 "enum": []string{"list", "statistics"},
47 "default": "list",
48 },
49 "corpus": map[string]interface{}{
50 "type": "string",
51 "description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)",
52 },
53 },
54 "required": []string{"action"},
55 }
56}
57
58// Execute performs the metadata retrieval operation
59func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
60 log.Debug().
61 Str("tool", m.Name()).
62 Msg("Executing metadata tool")
63
64 // Extract required action parameter
65 action, err := request.RequireString("action")
66 if err != nil {
67 return nil, fmt.Errorf("action parameter is required: %w", err)
68 }
69
70 // Extract optional corpus parameter
71 corpus := request.GetString("corpus", "")
72
Akron81f709c2025-06-12 17:30:55 +020073 // Validate the metadata request using the validation package
74 metadataReq := validation.MetadataRequest{
75 Action: action,
76 Corpus: corpus,
77 }
78
79 if err := m.validator.ValidateMetadataRequest(metadataReq); err != nil {
80 log.Warn().
81 Err(err).
82 Interface("request", metadataReq).
83 Msg("Metadata request validation failed")
84 return nil, fmt.Errorf("invalid metadata request: %w", err)
85 }
86
87 // Sanitize inputs
88 if corpus != "" {
89 corpus = m.validator.SanitizeCorpusID(corpus)
90 }
91
Akronbd154ea2025-06-12 17:01:58 +020092 log.Debug().
93 Str("action", action).
94 Str("corpus", corpus).
Akron81f709c2025-06-12 17:30:55 +020095 Msg("Parsed and validated metadata parameters")
Akronbd154ea2025-06-12 17:01:58 +020096
97 // Check if client is available and authenticated
98 if m.client == nil {
99 return nil, fmt.Errorf("KorAP client not configured")
100 }
101
102 if !m.client.IsAuthenticated() {
103 log.Warn().Msg("Client not authenticated, attempting authentication")
104 if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil {
105 return nil, fmt.Errorf("authentication failed: %w", err)
106 }
107 }
108
109 // Handle different actions
110 switch action {
111 case "list":
112 return m.handleListCorpora(ctx)
113 case "statistics":
114 return m.handleCorpusStatistics(ctx, corpus)
115 default:
116 // This should never be reached due to validation above
117 return nil, fmt.Errorf("unknown action: %s", action)
118 }
119}
120
121// handleListCorpora retrieves and formats the list of available corpora
122func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) {
123 log.Debug().Msg("Retrieving corpus list")
124
125 var corpusListResp service.CorpusListResponse
126 err := m.client.GetJSON(ctx, "corpus", &corpusListResp)
127 if err != nil {
128 log.Error().
129 Err(err).
130 Msg("Failed to retrieve corpus list")
131 return nil, fmt.Errorf("failed to retrieve corpus list: %w", err)
132 }
133
Akron81f709c2025-06-12 17:30:55 +0200134 // Validate the response
135 if err := m.validator.ValidateCorpusListResponse(&corpusListResp); err != nil {
136 log.Warn().
137 Err(err).
138 Msg("Corpus list response validation failed, but continuing with potentially invalid data")
139 // Continue processing despite validation errors to be resilient
140 }
141
Akronbd154ea2025-06-12 17:01:58 +0200142 log.Info().
143 Int("corpus_count", len(corpusListResp.Corpora)).
144 Msg("Corpus list retrieved successfully")
145
146 result := m.formatCorpusList(&corpusListResp)
147 return mcp.NewToolResultText(result), nil
148}
149
150// handleCorpusStatistics retrieves and formats statistics for a corpus query
151func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) {
152 log.Debug().
153 Str("corpus", corpus).
154 Msg("Retrieving corpus statistics")
155
156 var statsResp service.StatisticsResponse
157 var endpoint string
158 if corpus == "" {
159 endpoint = "statistics"
160 } else {
161 endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus)
162 }
163
164 err := m.client.GetJSON(ctx, endpoint, &statsResp)
165 if err != nil {
166 log.Error().
167 Err(err).
168 Str("corpus", corpus).
169 Msg("Failed to retrieve corpus statistics")
170 return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err)
171 }
172
Akron81f709c2025-06-12 17:30:55 +0200173 // Validate the response
174 if err := m.validator.ValidateStatisticsResponse(&statsResp); err != nil {
175 log.Warn().
176 Err(err).
177 Msg("Statistics response validation failed, but continuing with potentially invalid data")
178 // Continue processing despite validation errors to be resilient
179 }
180
Akronbd154ea2025-06-12 17:01:58 +0200181 log.Info().
182 Str("corpus", corpus).
183 Int("documents", statsResp.Documents).
184 Int("tokens", statsResp.Tokens).
185 Msg("Corpus statistics retrieved successfully")
186
187 result := m.formatCorpusStatistics(corpus, &statsResp)
188 return mcp.NewToolResultText(result), nil
189}
190
191// formatCorpusList formats the corpus list response into a readable text format
192func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string {
193 var result strings.Builder
194
195 result.WriteString("KorAP Available Corpora\n")
196 result.WriteString("=======================\n\n")
197
198 if len(response.Corpora) == 0 {
199 result.WriteString("No corpora available.\n")
200 return result.String()
201 }
202
203 result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora)))
204
205 for i, corpus := range response.Corpora {
206 result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name))
207 result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID))
208
209 if corpus.Description != "" {
210 result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description))
211 }
212
213 if corpus.Documents > 0 {
214 result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents))
215 }
216
217 if corpus.Tokens > 0 {
218 result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens))
219 }
220
221 if corpus.Sentences > 0 {
222 result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences))
223 }
224
225 if corpus.Paragraphs > 0 {
226 result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs))
227 }
228
229 result.WriteString("\n")
230 }
231
232 return result.String()
233}
234
235// formatCorpusStatistics formats the corpus statistics response into a readable text format
236func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string {
237 var result strings.Builder
238
239 result.WriteString("KorAP Corpus Statistics\n")
240 result.WriteString("=======================\n\n")
241
242 if corpus == "" {
243 result.WriteString("Corpus Query: (all available data)\n\n")
244 } else {
245 result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus))
246 }
247
248 result.WriteString("Statistics:\n")
249 result.WriteString("-----------\n")
250 result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents))
251 result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens))
252
253 if response.Sentences > 0 {
254 result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences))
255 }
256
257 if response.Paragraphs > 0 {
258 result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs))
259 }
260
261 // Add any additional fields if present
262 if len(response.Fields) > 0 {
263 result.WriteString("\nAdditional Fields:\n")
264 result.WriteString("------------------\n")
265 for key, value := range response.Fields {
266 result.WriteString(fmt.Sprintf("%s: %v\n", key, value))
267 }
268 }
269
270 return result.String()
271}