Blame - tools/metadata.go - KorAP/KorAP-MCP

blob: 6fd84b1833690f3e00ffa05535e54584b40b695e [file] [log] [blame]

Akron	bd154ea	2025-06-12 17:01:58 +0200	[diff] [blame^]	1	package tools
				2
				3	import (
				4	"context"
				5	"fmt"
				6	"strings"
				7
				8	"github.com/korap/korap-mcp/service"
				9	"github.com/mark3labs/mcp-go/mcp"
				10	"github.com/rs/zerolog/log"
				11	)
				12
				13	// MetadataTool implements the Tool interface for KorAP corpus metadata retrieval
				14	type MetadataTool struct {
				15	client *service.Client
				16	}
				17
				18	// NewMetadataTool creates a new metadata tool instance
				19	func NewMetadataTool(client service.Client) MetadataTool {
				20	return &MetadataTool{
				21	client: client,
				22	}
				23	}
				24
				25	// Name returns the tool name
				26	func (m *MetadataTool) Name() string {
				27	return "korap_metadata"
				28	}
				29
				30	// Description returns the tool description
				31	func (m *MetadataTool) Description() string {
				32	return "Retrieve metadata and statistics for KorAP corpora"
				33	}
				34
				35	// InputSchema returns the JSON schema for tool parameters
				36	func (m *MetadataTool) InputSchema() map[string]interface{} {
				37	return map[string]interface{}{
				38	"type": "object",
				39	"properties": map[string]interface{}{
				40	"action": map[string]interface{}{
				41	"type": "string",
				42	"description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics",
				43	"enum": []string{"list", "statistics"},
				44	"default": "list",
				45	},
				46	"corpus": map[string]interface{}{
				47	"type": "string",
				48	"description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)",
				49	},
				50	},
				51	"required": []string{"action"},
				52	}
				53	}
				54
				55	// Execute performs the metadata retrieval operation
				56	func (m MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (mcp.CallToolResult, error) {
				57	log.Debug().
				58	Str("tool", m.Name()).
				59	Msg("Executing metadata tool")
				60
				61	// Extract required action parameter
				62	action, err := request.RequireString("action")
				63	if err != nil {
				64	return nil, fmt.Errorf("action parameter is required: %w", err)
				65	}
				66
				67	// Extract optional corpus parameter
				68	corpus := request.GetString("corpus", "")
				69
				70	log.Debug().
				71	Str("action", action).
				72	Str("corpus", corpus).
				73	Msg("Parsed metadata parameters")
				74
				75	// Validate parameters before authentication
				76	switch action {
				77	case "list":
				78	// No additional validation needed for list
				79	case "statistics":
				80	// No additional validation needed for statistics - corpus is optional
				81	default:
				82	return nil, fmt.Errorf("unknown action: %s", action)
				83	}
				84
				85	// Check if client is available and authenticated
				86	if m.client == nil {
				87	return nil, fmt.Errorf("KorAP client not configured")
				88	}
				89
				90	if !m.client.IsAuthenticated() {
				91	log.Warn().Msg("Client not authenticated, attempting authentication")
				92	if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil {
				93	return nil, fmt.Errorf("authentication failed: %w", err)
				94	}
				95	}
				96
				97	// Handle different actions
				98	switch action {
				99	case "list":
				100	return m.handleListCorpora(ctx)
				101	case "statistics":
				102	return m.handleCorpusStatistics(ctx, corpus)
				103	default:
				104	// This should never be reached due to validation above
				105	return nil, fmt.Errorf("unknown action: %s", action)
				106	}
				107	}
				108
				109	// handleListCorpora retrieves and formats the list of available corpora
				110	func (m MetadataTool) handleListCorpora(ctx context.Context) (mcp.CallToolResult, error) {
				111	log.Debug().Msg("Retrieving corpus list")
				112
				113	var corpusListResp service.CorpusListResponse
				114	err := m.client.GetJSON(ctx, "corpus", &corpusListResp)
				115	if err != nil {
				116	log.Error().
				117	Err(err).
				118	Msg("Failed to retrieve corpus list")
				119	return nil, fmt.Errorf("failed to retrieve corpus list: %w", err)
				120	}
				121
				122	log.Info().
				123	Int("corpus_count", len(corpusListResp.Corpora)).
				124	Msg("Corpus list retrieved successfully")
				125
				126	result := m.formatCorpusList(&corpusListResp)
				127	return mcp.NewToolResultText(result), nil
				128	}
				129
				130	// handleCorpusStatistics retrieves and formats statistics for a corpus query
				131	func (m MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (mcp.CallToolResult, error) {
				132	log.Debug().
				133	Str("corpus", corpus).
				134	Msg("Retrieving corpus statistics")
				135
				136	var statsResp service.StatisticsResponse
				137	var endpoint string
				138	if corpus == "" {
				139	endpoint = "statistics"
				140	} else {
				141	endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus)
				142	}
				143
				144	err := m.client.GetJSON(ctx, endpoint, &statsResp)
				145	if err != nil {
				146	log.Error().
				147	Err(err).
				148	Str("corpus", corpus).
				149	Msg("Failed to retrieve corpus statistics")
				150	return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err)
				151	}
				152
				153	log.Info().
				154	Str("corpus", corpus).
				155	Int("documents", statsResp.Documents).
				156	Int("tokens", statsResp.Tokens).
				157	Msg("Corpus statistics retrieved successfully")
				158
				159	result := m.formatCorpusStatistics(corpus, &statsResp)
				160	return mcp.NewToolResultText(result), nil
				161	}
				162
				163	// formatCorpusList formats the corpus list response into a readable text format
				164	func (m MetadataTool) formatCorpusList(response service.CorpusListResponse) string {
				165	var result strings.Builder
				166
				167	result.WriteString("KorAP Available Corpora\n")
				168	result.WriteString("=======================\n\n")
				169
				170	if len(response.Corpora) == 0 {
				171	result.WriteString("No corpora available.\n")
				172	return result.String()
				173	}
				174
				175	result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora)))
				176
				177	for i, corpus := range response.Corpora {
				178	result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name))
				179	result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID))
				180
				181	if corpus.Description != "" {
				182	result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description))
				183	}
				184
				185	if corpus.Documents > 0 {
				186	result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents))
				187	}
				188
				189	if corpus.Tokens > 0 {
				190	result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens))
				191	}
				192
				193	if corpus.Sentences > 0 {
				194	result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences))
				195	}
				196
				197	if corpus.Paragraphs > 0 {
				198	result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs))
				199	}
				200
				201	result.WriteString("\n")
				202	}
				203
				204	return result.String()
				205	}
				206
				207	// formatCorpusStatistics formats the corpus statistics response into a readable text format
				208	func (m MetadataTool) formatCorpusStatistics(corpus string, response service.StatisticsResponse) string {
				209	var result strings.Builder
				210
				211	result.WriteString("KorAP Corpus Statistics\n")
				212	result.WriteString("=======================\n\n")
				213
				214	if corpus == "" {
				215	result.WriteString("Corpus Query: (all available data)\n\n")
				216	} else {
				217	result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus))
				218	}
				219
				220	result.WriteString("Statistics:\n")
				221	result.WriteString("-----------\n")
				222	result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents))
				223	result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens))
				224
				225	if response.Sentences > 0 {
				226	result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences))
				227	}
				228
				229	if response.Paragraphs > 0 {
				230	result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs))
				231	}
				232
				233	// Add any additional fields if present
				234	if len(response.Fields) > 0 {
				235	result.WriteString("\nAdditional Fields:\n")
				236	result.WriteString("------------------\n")
				237	for key, value := range response.Fields {
				238	result.WriteString(fmt.Sprintf("%s: %v\n", key, value))
				239	}
				240	}
				241
				242	return result.String()
				243	}