Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame] | 1 | package tools |
| 2 | |
| 3 | import ( |
| 4 | "context" |
| 5 | "fmt" |
| 6 | "strings" |
| 7 | |
| 8 | "github.com/korap/korap-mcp/service" |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 9 | "github.com/korap/korap-mcp/validation" |
Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame] | 10 | "github.com/mark3labs/mcp-go/mcp" |
| 11 | "github.com/rs/zerolog/log" |
| 12 | ) |
| 13 | |
| 14 | // MetadataTool implements the Tool interface for KorAP corpus metadata retrieval |
| 15 | type MetadataTool struct { |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 16 | client *service.Client |
| 17 | validator *validation.Validator |
Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame] | 18 | } |
| 19 | |
| 20 | // NewMetadataTool creates a new metadata tool instance |
| 21 | func NewMetadataTool(client *service.Client) *MetadataTool { |
| 22 | return &MetadataTool{ |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 23 | client: client, |
| 24 | validator: validation.New(log.Logger), |
Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame] | 25 | } |
| 26 | } |
| 27 | |
| 28 | // Name returns the tool name |
| 29 | func (m *MetadataTool) Name() string { |
| 30 | return "korap_metadata" |
| 31 | } |
| 32 | |
| 33 | // Description returns the tool description |
| 34 | func (m *MetadataTool) Description() string { |
| 35 | return "Retrieve metadata and statistics for KorAP corpora" |
| 36 | } |
| 37 | |
| 38 | // InputSchema returns the JSON schema for tool parameters |
| 39 | func (m *MetadataTool) InputSchema() map[string]interface{} { |
| 40 | return map[string]interface{}{ |
| 41 | "type": "object", |
| 42 | "properties": map[string]interface{}{ |
| 43 | "action": map[string]interface{}{ |
| 44 | "type": "string", |
| 45 | "description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics", |
| 46 | "enum": []string{"list", "statistics"}, |
| 47 | "default": "list", |
| 48 | }, |
| 49 | "corpus": map[string]interface{}{ |
| 50 | "type": "string", |
| 51 | "description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)", |
| 52 | }, |
| 53 | }, |
| 54 | "required": []string{"action"}, |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | // Execute performs the metadata retrieval operation |
| 59 | func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { |
| 60 | log.Debug(). |
| 61 | Str("tool", m.Name()). |
| 62 | Msg("Executing metadata tool") |
| 63 | |
| 64 | // Extract required action parameter |
| 65 | action, err := request.RequireString("action") |
| 66 | if err != nil { |
| 67 | return nil, fmt.Errorf("action parameter is required: %w", err) |
| 68 | } |
| 69 | |
| 70 | // Extract optional corpus parameter |
| 71 | corpus := request.GetString("corpus", "") |
| 72 | |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 73 | // Validate the metadata request using the validation package |
| 74 | metadataReq := validation.MetadataRequest{ |
| 75 | Action: action, |
| 76 | Corpus: corpus, |
| 77 | } |
| 78 | |
| 79 | if err := m.validator.ValidateMetadataRequest(metadataReq); err != nil { |
| 80 | log.Warn(). |
| 81 | Err(err). |
| 82 | Interface("request", metadataReq). |
| 83 | Msg("Metadata request validation failed") |
| 84 | return nil, fmt.Errorf("invalid metadata request: %w", err) |
| 85 | } |
| 86 | |
| 87 | // Sanitize inputs |
| 88 | if corpus != "" { |
| 89 | corpus = m.validator.SanitizeCorpusID(corpus) |
| 90 | } |
| 91 | |
Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame] | 92 | log.Debug(). |
| 93 | Str("action", action). |
| 94 | Str("corpus", corpus). |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 95 | Msg("Parsed and validated metadata parameters") |
Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame] | 96 | |
| 97 | // Check if client is available and authenticated |
| 98 | if m.client == nil { |
| 99 | return nil, fmt.Errorf("KorAP client not configured") |
| 100 | } |
| 101 | |
| 102 | if !m.client.IsAuthenticated() { |
| 103 | log.Warn().Msg("Client not authenticated, attempting authentication") |
| 104 | if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil { |
| 105 | return nil, fmt.Errorf("authentication failed: %w", err) |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | // Handle different actions |
| 110 | switch action { |
| 111 | case "list": |
| 112 | return m.handleListCorpora(ctx) |
| 113 | case "statistics": |
| 114 | return m.handleCorpusStatistics(ctx, corpus) |
| 115 | default: |
| 116 | // This should never be reached due to validation above |
| 117 | return nil, fmt.Errorf("unknown action: %s", action) |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | // handleListCorpora retrieves and formats the list of available corpora |
| 122 | func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) { |
| 123 | log.Debug().Msg("Retrieving corpus list") |
| 124 | |
| 125 | var corpusListResp service.CorpusListResponse |
| 126 | err := m.client.GetJSON(ctx, "corpus", &corpusListResp) |
| 127 | if err != nil { |
| 128 | log.Error(). |
| 129 | Err(err). |
| 130 | Msg("Failed to retrieve corpus list") |
| 131 | return nil, fmt.Errorf("failed to retrieve corpus list: %w", err) |
| 132 | } |
| 133 | |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 134 | // Validate the response |
| 135 | if err := m.validator.ValidateCorpusListResponse(&corpusListResp); err != nil { |
| 136 | log.Warn(). |
| 137 | Err(err). |
| 138 | Msg("Corpus list response validation failed, but continuing with potentially invalid data") |
| 139 | // Continue processing despite validation errors to be resilient |
| 140 | } |
| 141 | |
Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame] | 142 | log.Info(). |
| 143 | Int("corpus_count", len(corpusListResp.Corpora)). |
| 144 | Msg("Corpus list retrieved successfully") |
| 145 | |
| 146 | result := m.formatCorpusList(&corpusListResp) |
| 147 | return mcp.NewToolResultText(result), nil |
| 148 | } |
| 149 | |
| 150 | // handleCorpusStatistics retrieves and formats statistics for a corpus query |
| 151 | func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) { |
| 152 | log.Debug(). |
| 153 | Str("corpus", corpus). |
| 154 | Msg("Retrieving corpus statistics") |
| 155 | |
| 156 | var statsResp service.StatisticsResponse |
| 157 | var endpoint string |
| 158 | if corpus == "" { |
| 159 | endpoint = "statistics" |
| 160 | } else { |
| 161 | endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus) |
| 162 | } |
| 163 | |
| 164 | err := m.client.GetJSON(ctx, endpoint, &statsResp) |
| 165 | if err != nil { |
| 166 | log.Error(). |
| 167 | Err(err). |
| 168 | Str("corpus", corpus). |
| 169 | Msg("Failed to retrieve corpus statistics") |
| 170 | return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err) |
| 171 | } |
| 172 | |
Akron | 81f709c | 2025-06-12 17:30:55 +0200 | [diff] [blame^] | 173 | // Validate the response |
| 174 | if err := m.validator.ValidateStatisticsResponse(&statsResp); err != nil { |
| 175 | log.Warn(). |
| 176 | Err(err). |
| 177 | Msg("Statistics response validation failed, but continuing with potentially invalid data") |
| 178 | // Continue processing despite validation errors to be resilient |
| 179 | } |
| 180 | |
Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame] | 181 | log.Info(). |
| 182 | Str("corpus", corpus). |
| 183 | Int("documents", statsResp.Documents). |
| 184 | Int("tokens", statsResp.Tokens). |
| 185 | Msg("Corpus statistics retrieved successfully") |
| 186 | |
| 187 | result := m.formatCorpusStatistics(corpus, &statsResp) |
| 188 | return mcp.NewToolResultText(result), nil |
| 189 | } |
| 190 | |
| 191 | // formatCorpusList formats the corpus list response into a readable text format |
| 192 | func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string { |
| 193 | var result strings.Builder |
| 194 | |
| 195 | result.WriteString("KorAP Available Corpora\n") |
| 196 | result.WriteString("=======================\n\n") |
| 197 | |
| 198 | if len(response.Corpora) == 0 { |
| 199 | result.WriteString("No corpora available.\n") |
| 200 | return result.String() |
| 201 | } |
| 202 | |
| 203 | result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora))) |
| 204 | |
| 205 | for i, corpus := range response.Corpora { |
| 206 | result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name)) |
| 207 | result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID)) |
| 208 | |
| 209 | if corpus.Description != "" { |
| 210 | result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description)) |
| 211 | } |
| 212 | |
| 213 | if corpus.Documents > 0 { |
| 214 | result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents)) |
| 215 | } |
| 216 | |
| 217 | if corpus.Tokens > 0 { |
| 218 | result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens)) |
| 219 | } |
| 220 | |
| 221 | if corpus.Sentences > 0 { |
| 222 | result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences)) |
| 223 | } |
| 224 | |
| 225 | if corpus.Paragraphs > 0 { |
| 226 | result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs)) |
| 227 | } |
| 228 | |
| 229 | result.WriteString("\n") |
| 230 | } |
| 231 | |
| 232 | return result.String() |
| 233 | } |
| 234 | |
| 235 | // formatCorpusStatistics formats the corpus statistics response into a readable text format |
| 236 | func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string { |
| 237 | var result strings.Builder |
| 238 | |
| 239 | result.WriteString("KorAP Corpus Statistics\n") |
| 240 | result.WriteString("=======================\n\n") |
| 241 | |
| 242 | if corpus == "" { |
| 243 | result.WriteString("Corpus Query: (all available data)\n\n") |
| 244 | } else { |
| 245 | result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus)) |
| 246 | } |
| 247 | |
| 248 | result.WriteString("Statistics:\n") |
| 249 | result.WriteString("-----------\n") |
| 250 | result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents)) |
| 251 | result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens)) |
| 252 | |
| 253 | if response.Sentences > 0 { |
| 254 | result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences)) |
| 255 | } |
| 256 | |
| 257 | if response.Paragraphs > 0 { |
| 258 | result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs)) |
| 259 | } |
| 260 | |
| 261 | // Add any additional fields if present |
| 262 | if len(response.Fields) > 0 { |
| 263 | result.WriteString("\nAdditional Fields:\n") |
| 264 | result.WriteString("------------------\n") |
| 265 | for key, value := range response.Fields { |
| 266 | result.WriteString(fmt.Sprintf("%s: %v\n", key, value)) |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | return result.String() |
| 271 | } |