| package tools |
| |
| import ( |
| "context" |
| "fmt" |
| "strings" |
| |
| "github.com/korap/korap-mcp/service" |
| "github.com/korap/korap-mcp/validation" |
| "github.com/mark3labs/mcp-go/mcp" |
| "github.com/rs/zerolog/log" |
| ) |
| |
| // MetadataTool implements the Tool interface for KorAP corpus metadata retrieval |
| type MetadataTool struct { |
| client *service.Client |
| validator *validation.Validator |
| } |
| |
| // NewMetadataTool creates a new metadata tool instance |
| func NewMetadataTool(client *service.Client) *MetadataTool { |
| return &MetadataTool{ |
| client: client, |
| validator: validation.New(log.Logger), |
| } |
| } |
| |
| // Name returns the tool name |
| func (m *MetadataTool) Name() string { |
| return "korap_metadata" |
| } |
| |
| // Description returns the tool description |
| func (m *MetadataTool) Description() string { |
| return "Retrieve metadata and statistics for KorAP corpora" |
| } |
| |
| // InputSchema returns the JSON schema for tool parameters |
| func (m *MetadataTool) InputSchema() map[string]interface{} { |
| return map[string]interface{}{ |
| "type": "object", |
| "properties": map[string]interface{}{ |
| "action": map[string]interface{}{ |
| "type": "string", |
| "description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics", |
| "enum": []string{"list", "statistics"}, |
| "default": "list", |
| }, |
| "corpus": map[string]interface{}{ |
| "type": "string", |
| "description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)", |
| }, |
| }, |
| "required": []string{"action"}, |
| } |
| } |
| |
| // Execute performs the metadata retrieval operation |
| func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { |
| log.Debug(). |
| Str("tool", m.Name()). |
| Msg("Executing metadata tool") |
| |
| // Extract required action parameter |
| action, err := request.RequireString("action") |
| if err != nil { |
| return nil, fmt.Errorf("action parameter is required: %w", err) |
| } |
| |
| // Extract optional corpus parameter |
| corpus := request.GetString("corpus", "") |
| |
| // Validate the metadata request using the validation package |
| metadataReq := validation.MetadataRequest{ |
| Action: action, |
| Corpus: corpus, |
| } |
| |
| if err := m.validator.ValidateMetadataRequest(metadataReq); err != nil { |
| log.Warn(). |
| Err(err). |
| Interface("request", metadataReq). |
| Msg("Metadata request validation failed") |
| return nil, fmt.Errorf("invalid metadata request: %w", err) |
| } |
| |
| // Sanitize inputs |
| if corpus != "" { |
| corpus = m.validator.SanitizeCorpusID(corpus) |
| } |
| |
| log.Debug(). |
| Str("action", action). |
| Str("corpus", corpus). |
| Msg("Parsed and validated metadata parameters") |
| |
| // Check if client is available and authenticated |
| if m.client == nil { |
| return nil, fmt.Errorf("KorAP client not configured") |
| } |
| |
| if !m.client.IsAuthenticated() { |
| log.Warn().Msg("Client not authenticated, attempting authentication") |
| if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil { |
| return nil, fmt.Errorf("authentication failed: %w", err) |
| } |
| } |
| |
| // Handle different actions |
| switch action { |
| case "list": |
| return m.handleListCorpora(ctx) |
| case "statistics": |
| return m.handleCorpusStatistics(ctx, corpus) |
| default: |
| // This should never be reached due to validation above |
| return nil, fmt.Errorf("unknown action: %s", action) |
| } |
| } |
| |
| // handleListCorpora retrieves and formats the list of available corpora |
| func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) { |
| log.Debug().Msg("Retrieving corpus list") |
| |
| var corpusListResp service.CorpusListResponse |
| err := m.client.GetJSON(ctx, "corpus", &corpusListResp) |
| if err != nil { |
| log.Error(). |
| Err(err). |
| Msg("Failed to retrieve corpus list") |
| return nil, fmt.Errorf("failed to retrieve corpus list: %w", err) |
| } |
| |
| // Validate the response |
| if err := m.validator.ValidateCorpusListResponse(&corpusListResp); err != nil { |
| log.Warn(). |
| Err(err). |
| Msg("Corpus list response validation failed, but continuing with potentially invalid data") |
| // Continue processing despite validation errors to be resilient |
| } |
| |
| log.Info(). |
| Int("corpus_count", len(corpusListResp.Corpora)). |
| Msg("Corpus list retrieved successfully") |
| |
| result := m.formatCorpusList(&corpusListResp) |
| return mcp.NewToolResultText(result), nil |
| } |
| |
| // handleCorpusStatistics retrieves and formats statistics for a corpus query |
| func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) { |
| log.Debug(). |
| Str("corpus", corpus). |
| Msg("Retrieving corpus statistics") |
| |
| var statsResp service.StatisticsResponse |
| var endpoint string |
| if corpus == "" { |
| endpoint = "statistics" |
| } else { |
| endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus) |
| } |
| |
| err := m.client.GetJSON(ctx, endpoint, &statsResp) |
| if err != nil { |
| log.Error(). |
| Err(err). |
| Str("corpus", corpus). |
| Msg("Failed to retrieve corpus statistics") |
| return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err) |
| } |
| |
| // Validate the response |
| if err := m.validator.ValidateStatisticsResponse(&statsResp); err != nil { |
| log.Warn(). |
| Err(err). |
| Msg("Statistics response validation failed, but continuing with potentially invalid data") |
| // Continue processing despite validation errors to be resilient |
| } |
| |
| log.Info(). |
| Str("corpus", corpus). |
| Int("documents", statsResp.Documents). |
| Int("tokens", statsResp.Tokens). |
| Msg("Corpus statistics retrieved successfully") |
| |
| result := m.formatCorpusStatistics(corpus, &statsResp) |
| return mcp.NewToolResultText(result), nil |
| } |
| |
| // formatCorpusList formats the corpus list response into a readable text format |
| func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string { |
| var result strings.Builder |
| |
| result.WriteString("KorAP Available Corpora\n") |
| result.WriteString("=======================\n\n") |
| |
| if len(response.Corpora) == 0 { |
| result.WriteString("No corpora available.\n") |
| return result.String() |
| } |
| |
| result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora))) |
| |
| for i, corpus := range response.Corpora { |
| result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name)) |
| result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID)) |
| |
| if corpus.Description != "" { |
| result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description)) |
| } |
| |
| if corpus.Documents > 0 { |
| result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents)) |
| } |
| |
| if corpus.Tokens > 0 { |
| result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens)) |
| } |
| |
| if corpus.Sentences > 0 { |
| result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences)) |
| } |
| |
| if corpus.Paragraphs > 0 { |
| result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs)) |
| } |
| |
| result.WriteString("\n") |
| } |
| |
| return result.String() |
| } |
| |
| // formatCorpusStatistics formats the corpus statistics response into a readable text format |
| func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string { |
| var result strings.Builder |
| |
| result.WriteString("KorAP Corpus Statistics\n") |
| result.WriteString("=======================\n\n") |
| |
| if corpus == "" { |
| result.WriteString("Corpus Query: (all available data)\n\n") |
| } else { |
| result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus)) |
| } |
| |
| result.WriteString("Statistics:\n") |
| result.WriteString("-----------\n") |
| result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents)) |
| result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens)) |
| |
| if response.Sentences > 0 { |
| result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences)) |
| } |
| |
| if response.Paragraphs > 0 { |
| result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs)) |
| } |
| |
| // Add any additional fields if present |
| if len(response.Fields) > 0 { |
| result.WriteString("\nAdditional Fields:\n") |
| result.WriteString("------------------\n") |
| for key, value := range response.Fields { |
| result.WriteString(fmt.Sprintf("%s: %v\n", key, value)) |
| } |
| } |
| |
| return result.String() |
| } |