Support metadata retrieval

commit: bd154ea40354e09aed17447c33398ed8a5eafedd [log] [tgz]
author: Akron <nils@diewald-online.de> Thu Jun 12 17:01:58 2025 +0200
committer: Akron <nils@diewald-online.de> Thu Jun 12 17:01:58 2025 +0200
tree: cf45b4e3428a7ec62dad4ff1b64f234b16c02e8f
parent: 8138c3538c91266aefddd5e64174b7e6cb61b38b [diff] [blame]
diff --git a/tools/metadata.go b/tools/metadata.go
new file mode 100644
index 0000000..6fd84b1
--- /dev/null
+++ b/tools/metadata.go

@@ -0,0 +1,243 @@
+package tools
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/korap/korap-mcp/service"
+	"github.com/mark3labs/mcp-go/mcp"
+	"github.com/rs/zerolog/log"
+)
+
+// MetadataTool implements the Tool interface for KorAP corpus metadata retrieval
+type MetadataTool struct {
+	client *service.Client
+}
+
+// NewMetadataTool creates a new metadata tool instance
+func NewMetadataTool(client *service.Client) *MetadataTool {
+	return &MetadataTool{
+		client: client,
+	}
+}
+
+// Name returns the tool name
+func (m *MetadataTool) Name() string {
+	return "korap_metadata"
+}
+
+// Description returns the tool description
+func (m *MetadataTool) Description() string {
+	return "Retrieve metadata and statistics for KorAP corpora"
+}
+
+// InputSchema returns the JSON schema for tool parameters
+func (m *MetadataTool) InputSchema() map[string]interface{} {
+	return map[string]interface{}{
+		"type": "object",
+		"properties": map[string]interface{}{
+			"action": map[string]interface{}{
+				"type":        "string",
+				"description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics",
+				"enum":        []string{"list", "statistics"},
+				"default":     "list",
+			},
+			"corpus": map[string]interface{}{
+				"type":        "string",
+				"description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)",
+			},
+		},
+		"required": []string{"action"},
+	}
+}
+
+// Execute performs the metadata retrieval operation
+func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+	log.Debug().
+		Str("tool", m.Name()).
+		Msg("Executing metadata tool")
+
+	// Extract required action parameter
+	action, err := request.RequireString("action")
+	if err != nil {
+		return nil, fmt.Errorf("action parameter is required: %w", err)
+	}
+
+	// Extract optional corpus parameter
+	corpus := request.GetString("corpus", "")
+
+	log.Debug().
+		Str("action", action).
+		Str("corpus", corpus).
+		Msg("Parsed metadata parameters")
+
+	// Validate parameters before authentication
+	switch action {
+	case "list":
+		// No additional validation needed for list
+	case "statistics":
+		// No additional validation needed for statistics - corpus is optional
+	default:
+		return nil, fmt.Errorf("unknown action: %s", action)
+	}
+
+	// Check if client is available and authenticated
+	if m.client == nil {
+		return nil, fmt.Errorf("KorAP client not configured")
+	}
+
+	if !m.client.IsAuthenticated() {
+		log.Warn().Msg("Client not authenticated, attempting authentication")
+		if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil {
+			return nil, fmt.Errorf("authentication failed: %w", err)
+		}
+	}
+
+	// Handle different actions
+	switch action {
+	case "list":
+		return m.handleListCorpora(ctx)
+	case "statistics":
+		return m.handleCorpusStatistics(ctx, corpus)
+	default:
+		// This should never be reached due to validation above
+		return nil, fmt.Errorf("unknown action: %s", action)
+	}
+}
+
+// handleListCorpora retrieves and formats the list of available corpora
+func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) {
+	log.Debug().Msg("Retrieving corpus list")
+
+	var corpusListResp service.CorpusListResponse
+	err := m.client.GetJSON(ctx, "corpus", &corpusListResp)
+	if err != nil {
+		log.Error().
+			Err(err).
+			Msg("Failed to retrieve corpus list")
+		return nil, fmt.Errorf("failed to retrieve corpus list: %w", err)
+	}
+
+	log.Info().
+		Int("corpus_count", len(corpusListResp.Corpora)).
+		Msg("Corpus list retrieved successfully")
+
+	result := m.formatCorpusList(&corpusListResp)
+	return mcp.NewToolResultText(result), nil
+}
+
+// handleCorpusStatistics retrieves and formats statistics for a corpus query
+func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) {
+	log.Debug().
+		Str("corpus", corpus).
+		Msg("Retrieving corpus statistics")
+
+	var statsResp service.StatisticsResponse
+	var endpoint string
+	if corpus == "" {
+		endpoint = "statistics"
+	} else {
+		endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus)
+	}
+
+	err := m.client.GetJSON(ctx, endpoint, &statsResp)
+	if err != nil {
+		log.Error().
+			Err(err).
+			Str("corpus", corpus).
+			Msg("Failed to retrieve corpus statistics")
+		return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err)
+	}
+
+	log.Info().
+		Str("corpus", corpus).
+		Int("documents", statsResp.Documents).
+		Int("tokens", statsResp.Tokens).
+		Msg("Corpus statistics retrieved successfully")
+
+	result := m.formatCorpusStatistics(corpus, &statsResp)
+	return mcp.NewToolResultText(result), nil
+}
+
+// formatCorpusList formats the corpus list response into a readable text format
+func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string {
+	var result strings.Builder
+
+	result.WriteString("KorAP Available Corpora\n")
+	result.WriteString("=======================\n\n")
+
+	if len(response.Corpora) == 0 {
+		result.WriteString("No corpora available.\n")
+		return result.String()
+	}
+
+	result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora)))
+
+	for i, corpus := range response.Corpora {
+		result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name))
+		result.WriteString(fmt.Sprintf("   ID: %s\n", corpus.ID))
+
+		if corpus.Description != "" {
+			result.WriteString(fmt.Sprintf("   Description: %s\n", corpus.Description))
+		}
+
+		if corpus.Documents > 0 {
+			result.WriteString(fmt.Sprintf("   Documents: %d\n", corpus.Documents))
+		}
+
+		if corpus.Tokens > 0 {
+			result.WriteString(fmt.Sprintf("   Tokens: %d\n", corpus.Tokens))
+		}
+
+		if corpus.Sentences > 0 {
+			result.WriteString(fmt.Sprintf("   Sentences: %d\n", corpus.Sentences))
+		}
+
+		if corpus.Paragraphs > 0 {
+			result.WriteString(fmt.Sprintf("   Paragraphs: %d\n", corpus.Paragraphs))
+		}
+
+		result.WriteString("\n")
+	}
+
+	return result.String()
+}
+
+// formatCorpusStatistics formats the corpus statistics response into a readable text format
+func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string {
+	var result strings.Builder
+
+	result.WriteString("KorAP Corpus Statistics\n")
+	result.WriteString("=======================\n\n")
+
+	if corpus == "" {
+		result.WriteString("Corpus Query: (all available data)\n\n")
+	} else {
+		result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus))
+	}
+
+	result.WriteString("Statistics:\n")
+	result.WriteString("-----------\n")
+	result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents))
+	result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens))
+
+	if response.Sentences > 0 {
+		result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences))
+	}
+
+	if response.Paragraphs > 0 {
+		result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs))
+	}
+
+	// Add any additional fields if present
+	if len(response.Fields) > 0 {
+		result.WriteString("\nAdditional Fields:\n")
+		result.WriteString("------------------\n")
+		for key, value := range response.Fields {
+			result.WriteString(fmt.Sprintf("%s: %v\n", key, value))
+		}
+	}
+
+	return result.String()
+}
commit	bd154ea40354e09aed17447c33398ed8a5eafedd	[log] [tgz]
author	Akron <nils@diewald-online.de>	Thu Jun 12 17:01:58 2025 +0200
committer	Akron <nils@diewald-online.de>	Thu Jun 12 17:01:58 2025 +0200
tree	cf45b4e3428a7ec62dad4ff1b64f234b16c02e8f
parent	8138c3538c91266aefddd5e64174b7e6cb61b38b [diff] [blame]