Support metadata retrieval
diff --git a/tools/metadata.go b/tools/metadata.go
new file mode 100644
index 0000000..6fd84b1
--- /dev/null
+++ b/tools/metadata.go
@@ -0,0 +1,243 @@
+package tools
+
+import (
+ "context"
+ "fmt"
+ "strings"
+
+ "github.com/korap/korap-mcp/service"
+ "github.com/mark3labs/mcp-go/mcp"
+ "github.com/rs/zerolog/log"
+)
+
+// MetadataTool implements the Tool interface for KorAP corpus metadata retrieval
+type MetadataTool struct {
+ client *service.Client
+}
+
+// NewMetadataTool creates a new metadata tool instance
+func NewMetadataTool(client *service.Client) *MetadataTool {
+ return &MetadataTool{
+ client: client,
+ }
+}
+
+// Name returns the tool name
+func (m *MetadataTool) Name() string {
+ return "korap_metadata"
+}
+
+// Description returns the tool description
+func (m *MetadataTool) Description() string {
+ return "Retrieve metadata and statistics for KorAP corpora"
+}
+
+// InputSchema returns the JSON schema for tool parameters
+func (m *MetadataTool) InputSchema() map[string]interface{} {
+ return map[string]interface{}{
+ "type": "object",
+ "properties": map[string]interface{}{
+ "action": map[string]interface{}{
+ "type": "string",
+ "description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics",
+ "enum": []string{"list", "statistics"},
+ "default": "list",
+ },
+ "corpus": map[string]interface{}{
+ "type": "string",
+ "description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)",
+ },
+ },
+ "required": []string{"action"},
+ }
+}
+
+// Execute performs the metadata retrieval operation
+func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+ log.Debug().
+ Str("tool", m.Name()).
+ Msg("Executing metadata tool")
+
+ // Extract required action parameter
+ action, err := request.RequireString("action")
+ if err != nil {
+ return nil, fmt.Errorf("action parameter is required: %w", err)
+ }
+
+ // Extract optional corpus parameter
+ corpus := request.GetString("corpus", "")
+
+ log.Debug().
+ Str("action", action).
+ Str("corpus", corpus).
+ Msg("Parsed metadata parameters")
+
+ // Validate parameters before authentication
+ switch action {
+ case "list":
+ // No additional validation needed for list
+ case "statistics":
+ // No additional validation needed for statistics - corpus is optional
+ default:
+ return nil, fmt.Errorf("unknown action: %s", action)
+ }
+
+ // Check if client is available and authenticated
+ if m.client == nil {
+ return nil, fmt.Errorf("KorAP client not configured")
+ }
+
+ if !m.client.IsAuthenticated() {
+ log.Warn().Msg("Client not authenticated, attempting authentication")
+ if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil {
+ return nil, fmt.Errorf("authentication failed: %w", err)
+ }
+ }
+
+ // Handle different actions
+ switch action {
+ case "list":
+ return m.handleListCorpora(ctx)
+ case "statistics":
+ return m.handleCorpusStatistics(ctx, corpus)
+ default:
+ // This should never be reached due to validation above
+ return nil, fmt.Errorf("unknown action: %s", action)
+ }
+}
+
+// handleListCorpora retrieves and formats the list of available corpora
+func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) {
+ log.Debug().Msg("Retrieving corpus list")
+
+ var corpusListResp service.CorpusListResponse
+ err := m.client.GetJSON(ctx, "corpus", &corpusListResp)
+ if err != nil {
+ log.Error().
+ Err(err).
+ Msg("Failed to retrieve corpus list")
+ return nil, fmt.Errorf("failed to retrieve corpus list: %w", err)
+ }
+
+ log.Info().
+ Int("corpus_count", len(corpusListResp.Corpora)).
+ Msg("Corpus list retrieved successfully")
+
+ result := m.formatCorpusList(&corpusListResp)
+ return mcp.NewToolResultText(result), nil
+}
+
+// handleCorpusStatistics retrieves and formats statistics for a corpus query
+func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) {
+ log.Debug().
+ Str("corpus", corpus).
+ Msg("Retrieving corpus statistics")
+
+ var statsResp service.StatisticsResponse
+ var endpoint string
+ if corpus == "" {
+ endpoint = "statistics"
+ } else {
+ endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus)
+ }
+
+ err := m.client.GetJSON(ctx, endpoint, &statsResp)
+ if err != nil {
+ log.Error().
+ Err(err).
+ Str("corpus", corpus).
+ Msg("Failed to retrieve corpus statistics")
+ return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err)
+ }
+
+ log.Info().
+ Str("corpus", corpus).
+ Int("documents", statsResp.Documents).
+ Int("tokens", statsResp.Tokens).
+ Msg("Corpus statistics retrieved successfully")
+
+ result := m.formatCorpusStatistics(corpus, &statsResp)
+ return mcp.NewToolResultText(result), nil
+}
+
+// formatCorpusList formats the corpus list response into a readable text format
+func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string {
+ var result strings.Builder
+
+ result.WriteString("KorAP Available Corpora\n")
+ result.WriteString("=======================\n\n")
+
+ if len(response.Corpora) == 0 {
+ result.WriteString("No corpora available.\n")
+ return result.String()
+ }
+
+ result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora)))
+
+ for i, corpus := range response.Corpora {
+ result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name))
+ result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID))
+
+ if corpus.Description != "" {
+ result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description))
+ }
+
+ if corpus.Documents > 0 {
+ result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents))
+ }
+
+ if corpus.Tokens > 0 {
+ result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens))
+ }
+
+ if corpus.Sentences > 0 {
+ result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences))
+ }
+
+ if corpus.Paragraphs > 0 {
+ result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs))
+ }
+
+ result.WriteString("\n")
+ }
+
+ return result.String()
+}
+
+// formatCorpusStatistics formats the corpus statistics response into a readable text format
+func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string {
+ var result strings.Builder
+
+ result.WriteString("KorAP Corpus Statistics\n")
+ result.WriteString("=======================\n\n")
+
+ if corpus == "" {
+ result.WriteString("Corpus Query: (all available data)\n\n")
+ } else {
+ result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus))
+ }
+
+ result.WriteString("Statistics:\n")
+ result.WriteString("-----------\n")
+ result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents))
+ result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens))
+
+ if response.Sentences > 0 {
+ result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences))
+ }
+
+ if response.Paragraphs > 0 {
+ result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs))
+ }
+
+ // Add any additional fields if present
+ if len(response.Fields) > 0 {
+ result.WriteString("\nAdditional Fields:\n")
+ result.WriteString("------------------\n")
+ for key, value := range response.Fields {
+ result.WriteString(fmt.Sprintf("%s: %v\n", key, value))
+ }
+ }
+
+ return result.String()
+}
diff --git a/tools/metadata_test.go b/tools/metadata_test.go
new file mode 100644
index 0000000..88c7e6d
--- /dev/null
+++ b/tools/metadata_test.go
@@ -0,0 +1,285 @@
+package tools
+
+import (
+ "context"
+ "testing"
+
+ "github.com/korap/korap-mcp/service"
+ "github.com/mark3labs/mcp-go/mcp"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMetadataTool_Name(t *testing.T) {
+ client := &service.Client{}
+ tool := NewMetadataTool(client)
+
+ assert.Equal(t, "korap_metadata", tool.Name())
+}
+
+func TestMetadataTool_Description(t *testing.T) {
+ client := &service.Client{}
+ tool := NewMetadataTool(client)
+
+ expected := "Retrieve metadata and statistics for KorAP corpora"
+ assert.Equal(t, expected, tool.Description())
+}
+
+func TestMetadataTool_InputSchema(t *testing.T) {
+ client := &service.Client{}
+ tool := NewMetadataTool(client)
+
+ schema := tool.InputSchema()
+
+ // Verify it's an object type
+ assert.Equal(t, "object", schema["type"])
+
+ // Verify properties exist
+ properties, ok := schema["properties"].(map[string]interface{})
+ assert.True(t, ok)
+ assert.Contains(t, properties, "action")
+ assert.Contains(t, properties, "corpus")
+
+ // Verify action property details
+ action, ok := properties["action"].(map[string]interface{})
+ assert.True(t, ok)
+ assert.Equal(t, "string", action["type"])
+
+ enum, ok := action["enum"].([]string)
+ assert.True(t, ok)
+ assert.Contains(t, enum, "list")
+ assert.Contains(t, enum, "statistics")
+ assert.Equal(t, "list", action["default"])
+
+ // Verify required fields
+ required, ok := schema["required"].([]string)
+ assert.True(t, ok)
+ assert.Contains(t, required, "action")
+}
+
+func TestNewMetadataTool(t *testing.T) {
+ client := &service.Client{}
+ tool := NewMetadataTool(client)
+
+ assert.NotNil(t, tool)
+ assert.Equal(t, client, tool.client)
+}
+
+func TestMetadataTool_Execute_MissingAction(t *testing.T) {
+ client := &service.Client{}
+ tool := NewMetadataTool(client)
+
+ // Create request without action parameter
+ request := mcp.CallToolRequest{
+ Params: mcp.CallToolParams{
+ Arguments: map[string]interface{}{},
+ },
+ }
+
+ _, err := tool.Execute(context.Background(), request)
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "action parameter is required")
+}
+
+func TestMetadataTool_Execute_NilClient(t *testing.T) {
+ tool := NewMetadataTool(nil)
+
+ request := mcp.CallToolRequest{
+ Params: mcp.CallToolParams{
+ Arguments: map[string]interface{}{
+ "action": "list",
+ },
+ },
+ }
+
+ _, err := tool.Execute(context.Background(), request)
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "KorAP client not configured")
+}
+
+func TestMetadataTool_Execute_UnknownAction(t *testing.T) {
+ client := &service.Client{}
+ tool := NewMetadataTool(client)
+
+ request := mcp.CallToolRequest{
+ Params: mcp.CallToolParams{
+ Arguments: map[string]interface{}{
+ "action": "unknown",
+ },
+ },
+ }
+
+ _, err := tool.Execute(context.Background(), request)
+ assert.Error(t, err)
+ // The unknown action error should come before authentication
+ assert.Contains(t, err.Error(), "unknown action: unknown")
+}
+
+func TestMetadataTool_Execute_StatisticsWithoutCorpus(t *testing.T) {
+ tool := NewMetadataTool(nil)
+
+ request := mcp.CallToolRequest{
+ Params: mcp.CallToolParams{
+ Arguments: map[string]interface{}{
+ "action": "statistics",
+ },
+ },
+ }
+
+ _, err := tool.Execute(context.Background(), request)
+ assert.Error(t, err)
+ // Should fail at client check since corpus is now optional
+ assert.Contains(t, err.Error(), "KorAP client not configured")
+}
+
+func TestMetadataTool_Execute_ParameterExtraction(t *testing.T) {
+ // This test verifies that parameters are extracted correctly
+ // It should fail with authentication error since we don't have a mock server
+ // but we can verify the parameters were parsed correctly by checking the log messages
+
+ client := &service.Client{}
+ tool := NewMetadataTool(client)
+
+ tests := []struct {
+ name string
+ arguments map[string]interface{}
+ expectErr bool
+ }{
+ {
+ name: "list_action",
+ arguments: map[string]interface{}{
+ "action": "list",
+ },
+ expectErr: true, // Will fail at authentication
+ },
+ {
+ name: "statistics_action",
+ arguments: map[string]interface{}{
+ "action": "statistics",
+ "corpus": "test-corpus",
+ },
+ expectErr: true, // Will fail at authentication
+ },
+ {
+ name: "statistics_with_empty_corpus",
+ arguments: map[string]interface{}{
+ "action": "statistics",
+ "corpus": "",
+ },
+ expectErr: true, // Will fail at authentication (corpus is optional)
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ request := mcp.CallToolRequest{
+ Params: mcp.CallToolParams{
+ Arguments: tt.arguments,
+ },
+ }
+
+ _, err := tool.Execute(context.Background(), request)
+ if tt.expectErr {
+ assert.Error(t, err)
+ }
+ })
+ }
+}
+
+func TestMetadataTool_formatCorpusList(t *testing.T) {
+ client := &service.Client{}
+ tool := NewMetadataTool(client)
+
+ // Test empty response
+ emptyResponse := &service.CorpusListResponse{
+ Corpora: []service.CorpusInfo{},
+ }
+
+ result := tool.formatCorpusList(emptyResponse)
+ assert.Contains(t, result, "KorAP Available Corpora")
+ assert.Contains(t, result, "No corpora available")
+
+ // Test response with corpora
+ responseWithCorpora := &service.CorpusListResponse{
+ Corpora: []service.CorpusInfo{
+ {
+ ID: "corpus1",
+ Name: "Test Corpus 1",
+ Description: "A test corpus",
+ Documents: 100,
+ Tokens: 50000,
+ Sentences: 2500,
+ Paragraphs: 500,
+ },
+ {
+ ID: "corpus2",
+ Name: "Test Corpus 2",
+ Documents: 200,
+ Tokens: 75000,
+ },
+ },
+ }
+
+ result = tool.formatCorpusList(responseWithCorpora)
+ assert.Contains(t, result, "KorAP Available Corpora")
+ assert.Contains(t, result, "Total Corpora: 2")
+ assert.Contains(t, result, "1. Test Corpus 1")
+ assert.Contains(t, result, "ID: corpus1")
+ assert.Contains(t, result, "Description: A test corpus")
+ assert.Contains(t, result, "Documents: 100")
+ assert.Contains(t, result, "Tokens: 50000")
+ assert.Contains(t, result, "Sentences: 2500")
+ assert.Contains(t, result, "Paragraphs: 500")
+ assert.Contains(t, result, "2. Test Corpus 2")
+ assert.Contains(t, result, "ID: corpus2")
+ assert.Contains(t, result, "Documents: 200")
+ assert.Contains(t, result, "Tokens: 75000")
+}
+
+func TestMetadataTool_formatCorpusStatistics(t *testing.T) {
+ client := &service.Client{}
+ tool := NewMetadataTool(client)
+
+ // Test minimal statistics
+ minimalStats := &service.StatisticsResponse{
+ Documents: 100,
+ Tokens: 50000,
+ }
+
+ result := tool.formatCorpusStatistics("test-corpus", minimalStats)
+ assert.Contains(t, result, "KorAP Corpus Statistics")
+ assert.Contains(t, result, "Corpus Query: test-corpus")
+ assert.Contains(t, result, "Documents: 100")
+ assert.Contains(t, result, "Tokens: 50000")
+
+ // Test complete statistics with additional fields
+ completeStats := &service.StatisticsResponse{
+ Documents: 200,
+ Tokens: 100000,
+ Sentences: 5000,
+ Paragraphs: 1000,
+ Fields: map[string]interface{}{
+ "genre": "literature",
+ "language": "German",
+ "year": 2023,
+ },
+ }
+
+ result = tool.formatCorpusStatistics("complete-corpus", completeStats)
+ assert.Contains(t, result, "KorAP Corpus Statistics")
+ assert.Contains(t, result, "Corpus Query: complete-corpus")
+ assert.Contains(t, result, "Documents: 200")
+ assert.Contains(t, result, "Tokens: 100000")
+ assert.Contains(t, result, "Sentences: 5000")
+ assert.Contains(t, result, "Paragraphs: 1000")
+ assert.Contains(t, result, "Additional Fields:")
+ assert.Contains(t, result, "genre: literature")
+ assert.Contains(t, result, "language: German")
+ assert.Contains(t, result, "year: 2023")
+
+ // Test empty corpus query (all available data)
+ result = tool.formatCorpusStatistics("", minimalStats)
+ assert.Contains(t, result, "KorAP Corpus Statistics")
+ assert.Contains(t, result, "Corpus Query: (all available data)")
+ assert.Contains(t, result, "Documents: 100")
+ assert.Contains(t, result, "Tokens: 50000")
+}
diff --git a/tools/search.go b/tools/search.go
index 7a0422d..c96f407 100644
--- a/tools/search.go
+++ b/tools/search.go
@@ -49,7 +49,7 @@
},
"corpus": map[string]interface{}{
"type": "string",
- "description": "Virtual corpus to search in",
+ "description": "Virtual corpus query to filter search results (optional, when not provided searches all available data)",
},
"count": map[string]interface{}{
"type": "integer",