Akron | bd154ea | 2025-06-12 17:01:58 +0200 | [diff] [blame^] | 1 | package tools |
| 2 | |
| 3 | import ( |
| 4 | "context" |
| 5 | "fmt" |
| 6 | "strings" |
| 7 | |
| 8 | "github.com/korap/korap-mcp/service" |
| 9 | "github.com/mark3labs/mcp-go/mcp" |
| 10 | "github.com/rs/zerolog/log" |
| 11 | ) |
| 12 | |
| 13 | // MetadataTool implements the Tool interface for KorAP corpus metadata retrieval |
| 14 | type MetadataTool struct { |
| 15 | client *service.Client |
| 16 | } |
| 17 | |
| 18 | // NewMetadataTool creates a new metadata tool instance |
| 19 | func NewMetadataTool(client *service.Client) *MetadataTool { |
| 20 | return &MetadataTool{ |
| 21 | client: client, |
| 22 | } |
| 23 | } |
| 24 | |
| 25 | // Name returns the tool name |
| 26 | func (m *MetadataTool) Name() string { |
| 27 | return "korap_metadata" |
| 28 | } |
| 29 | |
| 30 | // Description returns the tool description |
| 31 | func (m *MetadataTool) Description() string { |
| 32 | return "Retrieve metadata and statistics for KorAP corpora" |
| 33 | } |
| 34 | |
| 35 | // InputSchema returns the JSON schema for tool parameters |
| 36 | func (m *MetadataTool) InputSchema() map[string]interface{} { |
| 37 | return map[string]interface{}{ |
| 38 | "type": "object", |
| 39 | "properties": map[string]interface{}{ |
| 40 | "action": map[string]interface{}{ |
| 41 | "type": "string", |
| 42 | "description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics", |
| 43 | "enum": []string{"list", "statistics"}, |
| 44 | "default": "list", |
| 45 | }, |
| 46 | "corpus": map[string]interface{}{ |
| 47 | "type": "string", |
| 48 | "description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)", |
| 49 | }, |
| 50 | }, |
| 51 | "required": []string{"action"}, |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | // Execute performs the metadata retrieval operation |
| 56 | func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { |
| 57 | log.Debug(). |
| 58 | Str("tool", m.Name()). |
| 59 | Msg("Executing metadata tool") |
| 60 | |
| 61 | // Extract required action parameter |
| 62 | action, err := request.RequireString("action") |
| 63 | if err != nil { |
| 64 | return nil, fmt.Errorf("action parameter is required: %w", err) |
| 65 | } |
| 66 | |
| 67 | // Extract optional corpus parameter |
| 68 | corpus := request.GetString("corpus", "") |
| 69 | |
| 70 | log.Debug(). |
| 71 | Str("action", action). |
| 72 | Str("corpus", corpus). |
| 73 | Msg("Parsed metadata parameters") |
| 74 | |
| 75 | // Validate parameters before authentication |
| 76 | switch action { |
| 77 | case "list": |
| 78 | // No additional validation needed for list |
| 79 | case "statistics": |
| 80 | // No additional validation needed for statistics - corpus is optional |
| 81 | default: |
| 82 | return nil, fmt.Errorf("unknown action: %s", action) |
| 83 | } |
| 84 | |
| 85 | // Check if client is available and authenticated |
| 86 | if m.client == nil { |
| 87 | return nil, fmt.Errorf("KorAP client not configured") |
| 88 | } |
| 89 | |
| 90 | if !m.client.IsAuthenticated() { |
| 91 | log.Warn().Msg("Client not authenticated, attempting authentication") |
| 92 | if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil { |
| 93 | return nil, fmt.Errorf("authentication failed: %w", err) |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | // Handle different actions |
| 98 | switch action { |
| 99 | case "list": |
| 100 | return m.handleListCorpora(ctx) |
| 101 | case "statistics": |
| 102 | return m.handleCorpusStatistics(ctx, corpus) |
| 103 | default: |
| 104 | // This should never be reached due to validation above |
| 105 | return nil, fmt.Errorf("unknown action: %s", action) |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | // handleListCorpora retrieves and formats the list of available corpora |
| 110 | func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) { |
| 111 | log.Debug().Msg("Retrieving corpus list") |
| 112 | |
| 113 | var corpusListResp service.CorpusListResponse |
| 114 | err := m.client.GetJSON(ctx, "corpus", &corpusListResp) |
| 115 | if err != nil { |
| 116 | log.Error(). |
| 117 | Err(err). |
| 118 | Msg("Failed to retrieve corpus list") |
| 119 | return nil, fmt.Errorf("failed to retrieve corpus list: %w", err) |
| 120 | } |
| 121 | |
| 122 | log.Info(). |
| 123 | Int("corpus_count", len(corpusListResp.Corpora)). |
| 124 | Msg("Corpus list retrieved successfully") |
| 125 | |
| 126 | result := m.formatCorpusList(&corpusListResp) |
| 127 | return mcp.NewToolResultText(result), nil |
| 128 | } |
| 129 | |
| 130 | // handleCorpusStatistics retrieves and formats statistics for a corpus query |
| 131 | func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) { |
| 132 | log.Debug(). |
| 133 | Str("corpus", corpus). |
| 134 | Msg("Retrieving corpus statistics") |
| 135 | |
| 136 | var statsResp service.StatisticsResponse |
| 137 | var endpoint string |
| 138 | if corpus == "" { |
| 139 | endpoint = "statistics" |
| 140 | } else { |
| 141 | endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus) |
| 142 | } |
| 143 | |
| 144 | err := m.client.GetJSON(ctx, endpoint, &statsResp) |
| 145 | if err != nil { |
| 146 | log.Error(). |
| 147 | Err(err). |
| 148 | Str("corpus", corpus). |
| 149 | Msg("Failed to retrieve corpus statistics") |
| 150 | return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err) |
| 151 | } |
| 152 | |
| 153 | log.Info(). |
| 154 | Str("corpus", corpus). |
| 155 | Int("documents", statsResp.Documents). |
| 156 | Int("tokens", statsResp.Tokens). |
| 157 | Msg("Corpus statistics retrieved successfully") |
| 158 | |
| 159 | result := m.formatCorpusStatistics(corpus, &statsResp) |
| 160 | return mcp.NewToolResultText(result), nil |
| 161 | } |
| 162 | |
| 163 | // formatCorpusList formats the corpus list response into a readable text format |
| 164 | func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string { |
| 165 | var result strings.Builder |
| 166 | |
| 167 | result.WriteString("KorAP Available Corpora\n") |
| 168 | result.WriteString("=======================\n\n") |
| 169 | |
| 170 | if len(response.Corpora) == 0 { |
| 171 | result.WriteString("No corpora available.\n") |
| 172 | return result.String() |
| 173 | } |
| 174 | |
| 175 | result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora))) |
| 176 | |
| 177 | for i, corpus := range response.Corpora { |
| 178 | result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name)) |
| 179 | result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID)) |
| 180 | |
| 181 | if corpus.Description != "" { |
| 182 | result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description)) |
| 183 | } |
| 184 | |
| 185 | if corpus.Documents > 0 { |
| 186 | result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents)) |
| 187 | } |
| 188 | |
| 189 | if corpus.Tokens > 0 { |
| 190 | result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens)) |
| 191 | } |
| 192 | |
| 193 | if corpus.Sentences > 0 { |
| 194 | result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences)) |
| 195 | } |
| 196 | |
| 197 | if corpus.Paragraphs > 0 { |
| 198 | result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs)) |
| 199 | } |
| 200 | |
| 201 | result.WriteString("\n") |
| 202 | } |
| 203 | |
| 204 | return result.String() |
| 205 | } |
| 206 | |
| 207 | // formatCorpusStatistics formats the corpus statistics response into a readable text format |
| 208 | func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string { |
| 209 | var result strings.Builder |
| 210 | |
| 211 | result.WriteString("KorAP Corpus Statistics\n") |
| 212 | result.WriteString("=======================\n\n") |
| 213 | |
| 214 | if corpus == "" { |
| 215 | result.WriteString("Corpus Query: (all available data)\n\n") |
| 216 | } else { |
| 217 | result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus)) |
| 218 | } |
| 219 | |
| 220 | result.WriteString("Statistics:\n") |
| 221 | result.WriteString("-----------\n") |
| 222 | result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents)) |
| 223 | result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens)) |
| 224 | |
| 225 | if response.Sentences > 0 { |
| 226 | result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences)) |
| 227 | } |
| 228 | |
| 229 | if response.Paragraphs > 0 { |
| 230 | result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs)) |
| 231 | } |
| 232 | |
| 233 | // Add any additional fields if present |
| 234 | if len(response.Fields) > 0 { |
| 235 | result.WriteString("\nAdditional Fields:\n") |
| 236 | result.WriteString("------------------\n") |
| 237 | for key, value := range response.Fields { |
| 238 | result.WriteString(fmt.Sprintf("%s: %v\n", key, value)) |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | return result.String() |
| 243 | } |