blob: 6fd84b1833690f3e00ffa05535e54584b40b695e [file] [log] [blame]
package tools
import (
"context"
"fmt"
"strings"
"github.com/korap/korap-mcp/service"
"github.com/mark3labs/mcp-go/mcp"
"github.com/rs/zerolog/log"
)
// MetadataTool implements the Tool interface for KorAP corpus metadata retrieval
type MetadataTool struct {
client *service.Client
}
// NewMetadataTool creates a new metadata tool instance
func NewMetadataTool(client *service.Client) *MetadataTool {
return &MetadataTool{
client: client,
}
}
// Name returns the tool name
func (m *MetadataTool) Name() string {
return "korap_metadata"
}
// Description returns the tool description
func (m *MetadataTool) Description() string {
return "Retrieve metadata and statistics for KorAP corpora"
}
// InputSchema returns the JSON schema for tool parameters
func (m *MetadataTool) InputSchema() map[string]interface{} {
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics",
"enum": []string{"list", "statistics"},
"default": "list",
},
"corpus": map[string]interface{}{
"type": "string",
"description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)",
},
},
"required": []string{"action"},
}
}
// Execute performs the metadata retrieval operation
func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
log.Debug().
Str("tool", m.Name()).
Msg("Executing metadata tool")
// Extract required action parameter
action, err := request.RequireString("action")
if err != nil {
return nil, fmt.Errorf("action parameter is required: %w", err)
}
// Extract optional corpus parameter
corpus := request.GetString("corpus", "")
log.Debug().
Str("action", action).
Str("corpus", corpus).
Msg("Parsed metadata parameters")
// Validate parameters before authentication
switch action {
case "list":
// No additional validation needed for list
case "statistics":
// No additional validation needed for statistics - corpus is optional
default:
return nil, fmt.Errorf("unknown action: %s", action)
}
// Check if client is available and authenticated
if m.client == nil {
return nil, fmt.Errorf("KorAP client not configured")
}
if !m.client.IsAuthenticated() {
log.Warn().Msg("Client not authenticated, attempting authentication")
if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil {
return nil, fmt.Errorf("authentication failed: %w", err)
}
}
// Handle different actions
switch action {
case "list":
return m.handleListCorpora(ctx)
case "statistics":
return m.handleCorpusStatistics(ctx, corpus)
default:
// This should never be reached due to validation above
return nil, fmt.Errorf("unknown action: %s", action)
}
}
// handleListCorpora retrieves and formats the list of available corpora
func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) {
log.Debug().Msg("Retrieving corpus list")
var corpusListResp service.CorpusListResponse
err := m.client.GetJSON(ctx, "corpus", &corpusListResp)
if err != nil {
log.Error().
Err(err).
Msg("Failed to retrieve corpus list")
return nil, fmt.Errorf("failed to retrieve corpus list: %w", err)
}
log.Info().
Int("corpus_count", len(corpusListResp.Corpora)).
Msg("Corpus list retrieved successfully")
result := m.formatCorpusList(&corpusListResp)
return mcp.NewToolResultText(result), nil
}
// handleCorpusStatistics retrieves and formats statistics for a corpus query
func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) {
log.Debug().
Str("corpus", corpus).
Msg("Retrieving corpus statistics")
var statsResp service.StatisticsResponse
var endpoint string
if corpus == "" {
endpoint = "statistics"
} else {
endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus)
}
err := m.client.GetJSON(ctx, endpoint, &statsResp)
if err != nil {
log.Error().
Err(err).
Str("corpus", corpus).
Msg("Failed to retrieve corpus statistics")
return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err)
}
log.Info().
Str("corpus", corpus).
Int("documents", statsResp.Documents).
Int("tokens", statsResp.Tokens).
Msg("Corpus statistics retrieved successfully")
result := m.formatCorpusStatistics(corpus, &statsResp)
return mcp.NewToolResultText(result), nil
}
// formatCorpusList formats the corpus list response into a readable text format
func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string {
var result strings.Builder
result.WriteString("KorAP Available Corpora\n")
result.WriteString("=======================\n\n")
if len(response.Corpora) == 0 {
result.WriteString("No corpora available.\n")
return result.String()
}
result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora)))
for i, corpus := range response.Corpora {
result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name))
result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID))
if corpus.Description != "" {
result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description))
}
if corpus.Documents > 0 {
result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents))
}
if corpus.Tokens > 0 {
result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens))
}
if corpus.Sentences > 0 {
result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences))
}
if corpus.Paragraphs > 0 {
result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs))
}
result.WriteString("\n")
}
return result.String()
}
// formatCorpusStatistics formats the corpus statistics response into a readable text format
func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string {
var result strings.Builder
result.WriteString("KorAP Corpus Statistics\n")
result.WriteString("=======================\n\n")
if corpus == "" {
result.WriteString("Corpus Query: (all available data)\n\n")
} else {
result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus))
}
result.WriteString("Statistics:\n")
result.WriteString("-----------\n")
result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents))
result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens))
if response.Sentences > 0 {
result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences))
}
if response.Paragraphs > 0 {
result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs))
}
// Add any additional fields if present
if len(response.Fields) > 0 {
result.WriteString("\nAdditional Fields:\n")
result.WriteString("------------------\n")
for key, value := range response.Fields {
result.WriteString(fmt.Sprintf("%s: %v\n", key, value))
}
}
return result.String()
}