Support metadata retrieval
diff --git a/tools/metadata.go b/tools/metadata.go
new file mode 100644
index 0000000..6fd84b1
--- /dev/null
+++ b/tools/metadata.go
@@ -0,0 +1,243 @@
+package tools
+
+import (
+ "context"
+ "fmt"
+ "strings"
+
+ "github.com/korap/korap-mcp/service"
+ "github.com/mark3labs/mcp-go/mcp"
+ "github.com/rs/zerolog/log"
+)
+
+// MetadataTool implements the Tool interface for KorAP corpus metadata retrieval
+type MetadataTool struct {
+ client *service.Client
+}
+
+// NewMetadataTool creates a new metadata tool instance
+func NewMetadataTool(client *service.Client) *MetadataTool {
+ return &MetadataTool{
+ client: client,
+ }
+}
+
+// Name returns the tool name
+func (m *MetadataTool) Name() string {
+ return "korap_metadata"
+}
+
+// Description returns the tool description
+func (m *MetadataTool) Description() string {
+ return "Retrieve metadata and statistics for KorAP corpora"
+}
+
+// InputSchema returns the JSON schema for tool parameters
+func (m *MetadataTool) InputSchema() map[string]interface{} {
+ return map[string]interface{}{
+ "type": "object",
+ "properties": map[string]interface{}{
+ "action": map[string]interface{}{
+ "type": "string",
+ "description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics",
+ "enum": []string{"list", "statistics"},
+ "default": "list",
+ },
+ "corpus": map[string]interface{}{
+ "type": "string",
+ "description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)",
+ },
+ },
+ "required": []string{"action"},
+ }
+}
+
+// Execute performs the metadata retrieval operation
+func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+ log.Debug().
+ Str("tool", m.Name()).
+ Msg("Executing metadata tool")
+
+ // Extract required action parameter
+ action, err := request.RequireString("action")
+ if err != nil {
+ return nil, fmt.Errorf("action parameter is required: %w", err)
+ }
+
+ // Extract optional corpus parameter
+ corpus := request.GetString("corpus", "")
+
+ log.Debug().
+ Str("action", action).
+ Str("corpus", corpus).
+ Msg("Parsed metadata parameters")
+
+ // Validate parameters before authentication
+ switch action {
+ case "list":
+ // No additional validation needed for list
+ case "statistics":
+ // No additional validation needed for statistics - corpus is optional
+ default:
+ return nil, fmt.Errorf("unknown action: %s", action)
+ }
+
+ // Check if client is available and authenticated
+ if m.client == nil {
+ return nil, fmt.Errorf("KorAP client not configured")
+ }
+
+ if !m.client.IsAuthenticated() {
+ log.Warn().Msg("Client not authenticated, attempting authentication")
+ if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil {
+ return nil, fmt.Errorf("authentication failed: %w", err)
+ }
+ }
+
+ // Handle different actions
+ switch action {
+ case "list":
+ return m.handleListCorpora(ctx)
+ case "statistics":
+ return m.handleCorpusStatistics(ctx, corpus)
+ default:
+ // This should never be reached due to validation above
+ return nil, fmt.Errorf("unknown action: %s", action)
+ }
+}
+
+// handleListCorpora retrieves and formats the list of available corpora
+func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) {
+ log.Debug().Msg("Retrieving corpus list")
+
+ var corpusListResp service.CorpusListResponse
+ err := m.client.GetJSON(ctx, "corpus", &corpusListResp)
+ if err != nil {
+ log.Error().
+ Err(err).
+ Msg("Failed to retrieve corpus list")
+ return nil, fmt.Errorf("failed to retrieve corpus list: %w", err)
+ }
+
+ log.Info().
+ Int("corpus_count", len(corpusListResp.Corpora)).
+ Msg("Corpus list retrieved successfully")
+
+ result := m.formatCorpusList(&corpusListResp)
+ return mcp.NewToolResultText(result), nil
+}
+
+// handleCorpusStatistics retrieves and formats statistics for a corpus query
+func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) {
+ log.Debug().
+ Str("corpus", corpus).
+ Msg("Retrieving corpus statistics")
+
+ var statsResp service.StatisticsResponse
+ var endpoint string
+ if corpus == "" {
+ endpoint = "statistics"
+ } else {
+ endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus)
+ }
+
+ err := m.client.GetJSON(ctx, endpoint, &statsResp)
+ if err != nil {
+ log.Error().
+ Err(err).
+ Str("corpus", corpus).
+ Msg("Failed to retrieve corpus statistics")
+ return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err)
+ }
+
+ log.Info().
+ Str("corpus", corpus).
+ Int("documents", statsResp.Documents).
+ Int("tokens", statsResp.Tokens).
+ Msg("Corpus statistics retrieved successfully")
+
+ result := m.formatCorpusStatistics(corpus, &statsResp)
+ return mcp.NewToolResultText(result), nil
+}
+
+// formatCorpusList formats the corpus list response into a readable text format
+func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string {
+ var result strings.Builder
+
+ result.WriteString("KorAP Available Corpora\n")
+ result.WriteString("=======================\n\n")
+
+ if len(response.Corpora) == 0 {
+ result.WriteString("No corpora available.\n")
+ return result.String()
+ }
+
+ result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora)))
+
+ for i, corpus := range response.Corpora {
+ result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name))
+ result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID))
+
+ if corpus.Description != "" {
+ result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description))
+ }
+
+ if corpus.Documents > 0 {
+ result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents))
+ }
+
+ if corpus.Tokens > 0 {
+ result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens))
+ }
+
+ if corpus.Sentences > 0 {
+ result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences))
+ }
+
+ if corpus.Paragraphs > 0 {
+ result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs))
+ }
+
+ result.WriteString("\n")
+ }
+
+ return result.String()
+}
+
+// formatCorpusStatistics formats the corpus statistics response into a readable text format
+func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string {
+ var result strings.Builder
+
+ result.WriteString("KorAP Corpus Statistics\n")
+ result.WriteString("=======================\n\n")
+
+ if corpus == "" {
+ result.WriteString("Corpus Query: (all available data)\n\n")
+ } else {
+ result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus))
+ }
+
+ result.WriteString("Statistics:\n")
+ result.WriteString("-----------\n")
+ result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents))
+ result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens))
+
+ if response.Sentences > 0 {
+ result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences))
+ }
+
+ if response.Paragraphs > 0 {
+ result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs))
+ }
+
+ // Add any additional fields if present
+ if len(response.Fields) > 0 {
+ result.WriteString("\nAdditional Fields:\n")
+ result.WriteString("------------------\n")
+ for key, value := range response.Fields {
+ result.WriteString(fmt.Sprintf("%s: %v\n", key, value))
+ }
+ }
+
+ return result.String()
+}