blob: c3da81323f1c0852e87a46011c02cf4f5962347c [file] [log] [blame]
package tools
import (
"context"
"fmt"
"strings"
"github.com/korap/korap-mcp/service"
"github.com/korap/korap-mcp/validation"
"github.com/mark3labs/mcp-go/mcp"
"github.com/rs/zerolog/log"
)
// MetadataTool implements the Tool interface for KorAP corpus metadata retrieval
type MetadataTool struct {
client *service.Client
validator *validation.Validator
}
// NewMetadataTool creates a new metadata tool instance
func NewMetadataTool(client *service.Client) *MetadataTool {
return &MetadataTool{
client: client,
validator: validation.New(log.Logger),
}
}
// Name returns the tool name
func (m *MetadataTool) Name() string {
return "korap_metadata"
}
// Description returns the tool description
func (m *MetadataTool) Description() string {
return "Retrieve metadata and statistics for KorAP corpora"
}
// InputSchema returns the JSON schema for tool parameters
func (m *MetadataTool) InputSchema() map[string]interface{} {
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics",
"enum": []string{"list", "statistics"},
"default": "list",
},
"corpus": map[string]interface{}{
"type": "string",
"description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)",
},
},
"required": []string{"action"},
}
}
// Execute performs the metadata retrieval operation
func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
log.Debug().
Str("tool", m.Name()).
Msg("Executing metadata tool")
// Extract required action parameter
action, err := request.RequireString("action")
if err != nil {
return nil, fmt.Errorf("action parameter is required: %w", err)
}
// Extract optional corpus parameter
corpus := request.GetString("corpus", "")
// Validate the metadata request using the validation package
metadataReq := validation.MetadataRequest{
Action: action,
Corpus: corpus,
}
if err := m.validator.ValidateMetadataRequest(metadataReq); err != nil {
log.Warn().
Err(err).
Interface("request", metadataReq).
Msg("Metadata request validation failed")
return nil, fmt.Errorf("invalid metadata request: %w", err)
}
// Sanitize inputs
if corpus != "" {
corpus = m.validator.SanitizeCorpusID(corpus)
}
log.Debug().
Str("action", action).
Str("corpus", corpus).
Msg("Parsed and validated metadata parameters")
// Check if client is available and authenticated
if m.client == nil {
return nil, fmt.Errorf("KorAP client not configured")
}
if !m.client.IsAuthenticated() {
log.Warn().Msg("Client not authenticated, attempting authentication")
if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil {
return nil, fmt.Errorf("authentication failed: %w", err)
}
}
// Handle different actions
switch action {
case "list":
return m.handleListCorpora(ctx)
case "statistics":
return m.handleCorpusStatistics(ctx, corpus)
default:
// This should never be reached due to validation above
return nil, fmt.Errorf("unknown action: %s", action)
}
}
// handleListCorpora retrieves and formats the list of available corpora
func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) {
log.Debug().Msg("Retrieving corpus list")
var corpusListResp service.CorpusListResponse
err := m.client.GetJSON(ctx, "corpus", &corpusListResp)
if err != nil {
log.Error().
Err(err).
Msg("Failed to retrieve corpus list")
return nil, fmt.Errorf("failed to retrieve corpus list: %w", err)
}
// Validate the response
if err := m.validator.ValidateCorpusListResponse(&corpusListResp); err != nil {
log.Warn().
Err(err).
Msg("Corpus list response validation failed, but continuing with potentially invalid data")
// Continue processing despite validation errors to be resilient
}
log.Info().
Int("corpus_count", len(corpusListResp.Corpora)).
Msg("Corpus list retrieved successfully")
result := m.formatCorpusList(&corpusListResp)
return mcp.NewToolResultText(result), nil
}
// handleCorpusStatistics retrieves and formats statistics for a corpus query
func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) {
log.Debug().
Str("corpus", corpus).
Msg("Retrieving corpus statistics")
var statsResp service.StatisticsResponse
var endpoint string
if corpus == "" {
endpoint = "statistics"
} else {
endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus)
}
err := m.client.GetJSON(ctx, endpoint, &statsResp)
if err != nil {
log.Error().
Err(err).
Str("corpus", corpus).
Msg("Failed to retrieve corpus statistics")
return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err)
}
// Validate the response
if err := m.validator.ValidateStatisticsResponse(&statsResp); err != nil {
log.Warn().
Err(err).
Msg("Statistics response validation failed, but continuing with potentially invalid data")
// Continue processing despite validation errors to be resilient
}
log.Info().
Str("corpus", corpus).
Int("documents", statsResp.Documents).
Int("tokens", statsResp.Tokens).
Msg("Corpus statistics retrieved successfully")
result := m.formatCorpusStatistics(corpus, &statsResp)
return mcp.NewToolResultText(result), nil
}
// formatCorpusList formats the corpus list response into a readable text format
func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string {
var result strings.Builder
result.WriteString("KorAP Available Corpora\n")
result.WriteString("=======================\n\n")
if len(response.Corpora) == 0 {
result.WriteString("No corpora available.\n")
return result.String()
}
result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora)))
for i, corpus := range response.Corpora {
result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name))
result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID))
if corpus.Description != "" {
result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description))
}
if corpus.Documents > 0 {
result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents))
}
if corpus.Tokens > 0 {
result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens))
}
if corpus.Sentences > 0 {
result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences))
}
if corpus.Paragraphs > 0 {
result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs))
}
result.WriteString("\n")
}
return result.String()
}
// formatCorpusStatistics formats the corpus statistics response into a readable text format
func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string {
var result strings.Builder
result.WriteString("KorAP Corpus Statistics\n")
result.WriteString("=======================\n\n")
if corpus == "" {
result.WriteString("Corpus Query: (all available data)\n\n")
} else {
result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus))
}
result.WriteString("Statistics:\n")
result.WriteString("-----------\n")
result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents))
result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens))
if response.Sentences > 0 {
result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences))
}
if response.Paragraphs > 0 {
result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs))
}
// Add any additional fields if present
if len(response.Fields) > 0 {
result.WriteString("\nAdditional Fields:\n")
result.WriteString("------------------\n")
for key, value := range response.Fields {
result.WriteString(fmt.Sprintf("%s: %v\n", key, value))
}
}
return result.String()
}