Blame - tools/metadata.go - KorAP/KorAP-MCP

blob: c3da81323f1c0852e87a46011c02cf4f5962347c [file] [log] [blame]

Akron	bd154ea	2025-06-12 17:01:58 +0200	[diff] [blame]	1	package tools
				2
				3	import (
				4	"context"
				5	"fmt"
				6	"strings"
				7
				8	"github.com/korap/korap-mcp/service"
Akron	81f709c	2025-06-12 17:30:55 +0200	[diff] [blame^]	9	"github.com/korap/korap-mcp/validation"
Akron	bd154ea	2025-06-12 17:01:58 +0200	[diff] [blame]	10	"github.com/mark3labs/mcp-go/mcp"
				11	"github.com/rs/zerolog/log"
				12	)
				13
				14	// MetadataTool implements the Tool interface for KorAP corpus metadata retrieval
				15	type MetadataTool struct {
Akron	81f709c	2025-06-12 17:30:55 +0200	[diff] [blame^]	16	client *service.Client
				17	validator *validation.Validator
Akron	bd154ea	2025-06-12 17:01:58 +0200	[diff] [blame]	18	}
				19
				20	// NewMetadataTool creates a new metadata tool instance
				21	func NewMetadataTool(client service.Client) MetadataTool {
				22	return &MetadataTool{
Akron	81f709c	2025-06-12 17:30:55 +0200	[diff] [blame^]	23	client: client,
				24	validator: validation.New(log.Logger),
Akron	bd154ea	2025-06-12 17:01:58 +0200	[diff] [blame]	25	}
				26	}
				27
				28	// Name returns the tool name
				29	func (m *MetadataTool) Name() string {
				30	return "korap_metadata"
				31	}
				32
				33	// Description returns the tool description
				34	func (m *MetadataTool) Description() string {
				35	return "Retrieve metadata and statistics for KorAP corpora"
				36	}
				37
				38	// InputSchema returns the JSON schema for tool parameters
				39	func (m *MetadataTool) InputSchema() map[string]interface{} {
				40	return map[string]interface{}{
				41	"type": "object",
				42	"properties": map[string]interface{}{
				43	"action": map[string]interface{}{
				44	"type": "string",
				45	"description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics",
				46	"enum": []string{"list", "statistics"},
				47	"default": "list",
				48	},
				49	"corpus": map[string]interface{}{
				50	"type": "string",
				51	"description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)",
				52	},
				53	},
				54	"required": []string{"action"},
				55	}
				56	}
				57
				58	// Execute performs the metadata retrieval operation
				59	func (m MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (mcp.CallToolResult, error) {
				60	log.Debug().
				61	Str("tool", m.Name()).
				62	Msg("Executing metadata tool")
				63
				64	// Extract required action parameter
				65	action, err := request.RequireString("action")
				66	if err != nil {
				67	return nil, fmt.Errorf("action parameter is required: %w", err)
				68	}
				69
				70	// Extract optional corpus parameter
				71	corpus := request.GetString("corpus", "")
				72
Akron	81f709c	2025-06-12 17:30:55 +0200	[diff] [blame^]	73	// Validate the metadata request using the validation package
				74	metadataReq := validation.MetadataRequest{
				75	Action: action,
				76	Corpus: corpus,
				77	}
				78
				79	if err := m.validator.ValidateMetadataRequest(metadataReq); err != nil {
				80	log.Warn().
				81	Err(err).
				82	Interface("request", metadataReq).
				83	Msg("Metadata request validation failed")
				84	return nil, fmt.Errorf("invalid metadata request: %w", err)
				85	}
				86
				87	// Sanitize inputs
				88	if corpus != "" {
				89	corpus = m.validator.SanitizeCorpusID(corpus)
				90	}
				91
Akron	bd154ea	2025-06-12 17:01:58 +0200	[diff] [blame]	92	log.Debug().
				93	Str("action", action).
				94	Str("corpus", corpus).
Akron	81f709c	2025-06-12 17:30:55 +0200	[diff] [blame^]	95	Msg("Parsed and validated metadata parameters")
Akron	bd154ea	2025-06-12 17:01:58 +0200	[diff] [blame]	96
				97	// Check if client is available and authenticated
				98	if m.client == nil {
				99	return nil, fmt.Errorf("KorAP client not configured")
				100	}
				101
				102	if !m.client.IsAuthenticated() {
				103	log.Warn().Msg("Client not authenticated, attempting authentication")
				104	if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil {
				105	return nil, fmt.Errorf("authentication failed: %w", err)
				106	}
				107	}
				108
				109	// Handle different actions
				110	switch action {
				111	case "list":
				112	return m.handleListCorpora(ctx)
				113	case "statistics":
				114	return m.handleCorpusStatistics(ctx, corpus)
				115	default:
				116	// This should never be reached due to validation above
				117	return nil, fmt.Errorf("unknown action: %s", action)
				118	}
				119	}
				120
				121	// handleListCorpora retrieves and formats the list of available corpora
				122	func (m MetadataTool) handleListCorpora(ctx context.Context) (mcp.CallToolResult, error) {
				123	log.Debug().Msg("Retrieving corpus list")
				124
				125	var corpusListResp service.CorpusListResponse
				126	err := m.client.GetJSON(ctx, "corpus", &corpusListResp)
				127	if err != nil {
				128	log.Error().
				129	Err(err).
				130	Msg("Failed to retrieve corpus list")
				131	return nil, fmt.Errorf("failed to retrieve corpus list: %w", err)
				132	}
				133
Akron	81f709c	2025-06-12 17:30:55 +0200	[diff] [blame^]	134	// Validate the response
				135	if err := m.validator.ValidateCorpusListResponse(&corpusListResp); err != nil {
				136	log.Warn().
				137	Err(err).
				138	Msg("Corpus list response validation failed, but continuing with potentially invalid data")
				139	// Continue processing despite validation errors to be resilient
				140	}
				141
Akron	bd154ea	2025-06-12 17:01:58 +0200	[diff] [blame]	142	log.Info().
				143	Int("corpus_count", len(corpusListResp.Corpora)).
				144	Msg("Corpus list retrieved successfully")
				145
				146	result := m.formatCorpusList(&corpusListResp)
				147	return mcp.NewToolResultText(result), nil
				148	}
				149
				150	// handleCorpusStatistics retrieves and formats statistics for a corpus query
				151	func (m MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (mcp.CallToolResult, error) {
				152	log.Debug().
				153	Str("corpus", corpus).
				154	Msg("Retrieving corpus statistics")
				155
				156	var statsResp service.StatisticsResponse
				157	var endpoint string
				158	if corpus == "" {
				159	endpoint = "statistics"
				160	} else {
				161	endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus)
				162	}
				163
				164	err := m.client.GetJSON(ctx, endpoint, &statsResp)
				165	if err != nil {
				166	log.Error().
				167	Err(err).
				168	Str("corpus", corpus).
				169	Msg("Failed to retrieve corpus statistics")
				170	return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err)
				171	}
				172
Akron	81f709c	2025-06-12 17:30:55 +0200	[diff] [blame^]	173	// Validate the response
				174	if err := m.validator.ValidateStatisticsResponse(&statsResp); err != nil {
				175	log.Warn().
				176	Err(err).
				177	Msg("Statistics response validation failed, but continuing with potentially invalid data")
				178	// Continue processing despite validation errors to be resilient
				179	}
				180
Akron	bd154ea	2025-06-12 17:01:58 +0200	[diff] [blame]	181	log.Info().
				182	Str("corpus", corpus).
				183	Int("documents", statsResp.Documents).
				184	Int("tokens", statsResp.Tokens).
				185	Msg("Corpus statistics retrieved successfully")
				186
				187	result := m.formatCorpusStatistics(corpus, &statsResp)
				188	return mcp.NewToolResultText(result), nil
				189	}
				190
				191	// formatCorpusList formats the corpus list response into a readable text format
				192	func (m MetadataTool) formatCorpusList(response service.CorpusListResponse) string {
				193	var result strings.Builder
				194
				195	result.WriteString("KorAP Available Corpora\n")
				196	result.WriteString("=======================\n\n")
				197
				198	if len(response.Corpora) == 0 {
				199	result.WriteString("No corpora available.\n")
				200	return result.String()
				201	}
				202
				203	result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora)))
				204
				205	for i, corpus := range response.Corpora {
				206	result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name))
				207	result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID))
				208
				209	if corpus.Description != "" {
				210	result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description))
				211	}
				212
				213	if corpus.Documents > 0 {
				214	result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents))
				215	}
				216
				217	if corpus.Tokens > 0 {
				218	result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens))
				219	}
				220
				221	if corpus.Sentences > 0 {
				222	result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences))
				223	}
				224
				225	if corpus.Paragraphs > 0 {
				226	result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs))
				227	}
				228
				229	result.WriteString("\n")
				230	}
				231
				232	return result.String()
				233	}
				234
				235	// formatCorpusStatistics formats the corpus statistics response into a readable text format
				236	func (m MetadataTool) formatCorpusStatistics(corpus string, response service.StatisticsResponse) string {
				237	var result strings.Builder
				238
				239	result.WriteString("KorAP Corpus Statistics\n")
				240	result.WriteString("=======================\n\n")
				241
				242	if corpus == "" {
				243	result.WriteString("Corpus Query: (all available data)\n\n")
				244	} else {
				245	result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus))
				246	}
				247
				248	result.WriteString("Statistics:\n")
				249	result.WriteString("-----------\n")
				250	result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents))
				251	result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens))
				252
				253	if response.Sentences > 0 {
				254	result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences))
				255	}
				256
				257	if response.Paragraphs > 0 {
				258	result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs))
				259	}
				260
				261	// Add any additional fields if present
				262	if len(response.Fields) > 0 {
				263	result.WriteString("\nAdditional Fields:\n")
				264	result.WriteString("------------------\n")
				265	for key, value := range response.Fields {
				266	result.WriteString(fmt.Sprintf("%s: %v\n", key, value))
				267	}
				268	}
				269
				270	return result.String()
				271	}