| package tools |
| |
| import ( |
| "context" |
| "fmt" |
| "strings" |
| |
| "github.com/korap/korap-mcp/service" |
| "github.com/korap/korap-mcp/validation" |
| "github.com/mark3labs/mcp-go/mcp" |
| "github.com/rs/zerolog/log" |
| ) |
| |
| // MetadataTool implements the Tool interface for KorAP corpus metadata retrieval |
| type MetadataTool struct { |
| client *service.Client |
| validator *validation.Validator |
| } |
| |
| // NewMetadataTool creates a new metadata tool instance |
| func NewMetadataTool(client *service.Client) *MetadataTool { |
| return &MetadataTool{ |
| client: client, |
| validator: validation.New(log.Logger), |
| } |
| } |
| |
| // Name returns the tool name |
| func (m *MetadataTool) Name() string { |
| return "korap_metadata" |
| } |
| |
| // Description returns the tool description |
| func (m *MetadataTool) Description() string { |
| return "Retrieve metadata and statistics for KorAP corpora" |
| } |
| |
| // InputSchema returns the JSON schema for tool parameters |
| func (m *MetadataTool) InputSchema() map[string]interface{} { |
| return map[string]interface{}{ |
| "type": "object", |
| "properties": map[string]interface{}{ |
| "action": map[string]interface{}{ |
| "type": "string", |
| "description": "Type of metadata operation to perform. 'list' retrieves all available corpora with their basic information, 'statistics' provides detailed corpus statistics.", |
| "enum": []string{"list", "statistics"}, |
| "default": "list", |
| "examples": []string{"list", "statistics"}, |
| }, |
| "corpus": map[string]interface{}{ |
| "type": "string", |
| "description": "Virtual corpus query to filter results based on metadata fields. For 'list' action, this parameter is ignored. For 'statistics' action, specifies which subset of data to analyze using metadata queries with boolean operations (& | !), comparison operators (= != < > in), and regular expressions (/pattern/). When not provided with 'statistics', returns statistics for all accessible data.", |
| "pattern": "^[a-zA-Z0-9._\\-\\s&|!=<>()/*\"']+$", |
| "examples": []string{"corpusSigle = \"GOE\"", "textClass = \"politics\" & pubDate in 2020", "textType = \"news\" | textType = \"blog\"", "availability = /CC.*/ & textClass != \"fiction\""}, |
| }, |
| }, |
| "required": []string{"action"}, |
| "additionalProperties": false, |
| "title": "KorAP Metadata Parameters", |
| "description": "Parameters for retrieving corpus metadata and statistics from KorAP, including corpus lists and detailed statistical information.", |
| } |
| } |
| |
| // Execute performs the metadata retrieval operation |
| func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { |
| log.Debug(). |
| Str("tool", m.Name()). |
| Msg("Executing metadata tool") |
| |
| // Extract required action parameter |
| action, err := request.RequireString("action") |
| if err != nil { |
| return nil, fmt.Errorf("action parameter is required: %w", err) |
| } |
| |
| // Extract optional corpus parameter |
| corpus := request.GetString("corpus", "") |
| |
| // Validate the metadata request using the validation package |
| metadataReq := validation.MetadataRequest{ |
| Action: action, |
| Corpus: corpus, |
| } |
| |
| if err := m.validator.ValidateMetadataRequest(metadataReq); err != nil { |
| log.Warn(). |
| Err(err). |
| Interface("request", metadataReq). |
| Msg("Metadata request validation failed") |
| return nil, fmt.Errorf("invalid metadata request: %w", err) |
| } |
| |
| // Sanitize inputs |
| if corpus != "" { |
| corpus = m.validator.SanitizeCorpusID(corpus) |
| } |
| |
| log.Debug(). |
| Str("action", action). |
| Str("corpus", corpus). |
| Msg("Parsed and validated metadata parameters") |
| |
| // Check if client is available and authenticated |
| if m.client == nil { |
| return nil, fmt.Errorf("KorAP client not configured") |
| } |
| |
| if !m.client.IsAuthenticated() { |
| log.Warn().Msg("Client not authenticated, attempting authentication") |
| if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil { |
| return nil, fmt.Errorf("authentication failed: %w", err) |
| } |
| } |
| |
| // Handle different actions |
| switch action { |
| case "list": |
| return m.handleListCorpora(ctx) |
| case "statistics": |
| return m.handleCorpusStatistics(ctx, corpus) |
| default: |
| // This should never be reached due to validation above |
| return nil, fmt.Errorf("unknown action: %s", action) |
| } |
| } |
| |
| // handleListCorpora retrieves and formats the list of available corpora |
| func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) { |
| log.Debug().Msg("Retrieving corpus list") |
| |
| var corpusListResp service.CorpusListResponse |
| err := m.client.GetJSON(ctx, "corpus", &corpusListResp) |
| if err != nil { |
| log.Error(). |
| Err(err). |
| Msg("Failed to retrieve corpus list") |
| return nil, fmt.Errorf("failed to retrieve corpus list: %w", err) |
| } |
| |
| // Validate the response |
| if err := m.validator.ValidateCorpusListResponse(&corpusListResp); err != nil { |
| log.Warn(). |
| Err(err). |
| Msg("Corpus list response validation failed, but continuing with potentially invalid data") |
| // Continue processing despite validation errors to be resilient |
| } |
| |
| log.Info(). |
| Int("corpus_count", len(corpusListResp.Corpora)). |
| Msg("Corpus list retrieved successfully") |
| |
| result := m.formatCorpusList(&corpusListResp) |
| return mcp.NewToolResultText(result), nil |
| } |
| |
| // handleCorpusStatistics retrieves and formats statistics for a corpus query |
| func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) { |
| log.Debug(). |
| Str("corpus", corpus). |
| Msg("Retrieving corpus statistics") |
| |
| var statsResp service.StatisticsResponse |
| var endpoint string |
| if corpus == "" { |
| endpoint = "statistics" |
| } else { |
| endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus) |
| } |
| |
| err := m.client.GetJSON(ctx, endpoint, &statsResp) |
| if err != nil { |
| log.Error(). |
| Err(err). |
| Str("corpus", corpus). |
| Msg("Failed to retrieve corpus statistics") |
| return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err) |
| } |
| |
| // Validate the response |
| if err := m.validator.ValidateStatisticsResponse(&statsResp); err != nil { |
| log.Warn(). |
| Err(err). |
| Msg("Statistics response validation failed, but continuing with potentially invalid data") |
| // Continue processing despite validation errors to be resilient |
| } |
| |
| log.Info(). |
| Str("corpus", corpus). |
| Int("documents", statsResp.Documents). |
| Int("tokens", statsResp.Tokens). |
| Msg("Corpus statistics retrieved successfully") |
| |
| result := m.formatCorpusStatistics(corpus, &statsResp) |
| return mcp.NewToolResultText(result), nil |
| } |
| |
| // formatCorpusList formats the corpus list response into a readable text format |
| func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string { |
| var result strings.Builder |
| |
| result.WriteString("KorAP Available Corpora\n") |
| result.WriteString("=======================\n\n") |
| |
| if len(response.Corpora) == 0 { |
| result.WriteString("No corpora available.\n") |
| return result.String() |
| } |
| |
| result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora))) |
| |
| for i, corpus := range response.Corpora { |
| result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name)) |
| result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID)) |
| |
| if corpus.Description != "" { |
| result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description)) |
| } |
| |
| if corpus.Documents > 0 { |
| result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents)) |
| } |
| |
| if corpus.Tokens > 0 { |
| result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens)) |
| } |
| |
| if corpus.Sentences > 0 { |
| result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences)) |
| } |
| |
| if corpus.Paragraphs > 0 { |
| result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs)) |
| } |
| |
| result.WriteString("\n") |
| } |
| |
| return result.String() |
| } |
| |
| // formatCorpusStatistics formats the corpus statistics response into a readable text format |
| func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string { |
| var result strings.Builder |
| |
| result.WriteString("KorAP Corpus Statistics\n") |
| result.WriteString("=======================\n\n") |
| |
| if corpus == "" { |
| result.WriteString("Corpus Query: (all available data)\n\n") |
| } else { |
| result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus)) |
| } |
| |
| result.WriteString("Statistics:\n") |
| result.WriteString("-----------\n") |
| result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents)) |
| result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens)) |
| |
| if response.Sentences > 0 { |
| result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences)) |
| } |
| |
| if response.Paragraphs > 0 { |
| result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs)) |
| } |
| |
| // Add any additional fields if present |
| if len(response.Fields) > 0 { |
| result.WriteString("\nAdditional Fields:\n") |
| result.WriteString("------------------\n") |
| for key, value := range response.Fields { |
| result.WriteString(fmt.Sprintf("%s: %v\n", key, value)) |
| } |
| } |
| |
| return result.String() |
| } |