blob: 6fd84b1833690f3e00ffa05535e54584b40b695e [file] [log] [blame]
Akronbd154ea2025-06-12 17:01:58 +02001package tools
2
3import (
4 "context"
5 "fmt"
6 "strings"
7
8 "github.com/korap/korap-mcp/service"
9 "github.com/mark3labs/mcp-go/mcp"
10 "github.com/rs/zerolog/log"
11)
12
13// MetadataTool implements the Tool interface for KorAP corpus metadata retrieval
14type MetadataTool struct {
15 client *service.Client
16}
17
18// NewMetadataTool creates a new metadata tool instance
19func NewMetadataTool(client *service.Client) *MetadataTool {
20 return &MetadataTool{
21 client: client,
22 }
23}
24
25// Name returns the tool name
26func (m *MetadataTool) Name() string {
27 return "korap_metadata"
28}
29
30// Description returns the tool description
31func (m *MetadataTool) Description() string {
32 return "Retrieve metadata and statistics for KorAP corpora"
33}
34
35// InputSchema returns the JSON schema for tool parameters
36func (m *MetadataTool) InputSchema() map[string]interface{} {
37 return map[string]interface{}{
38 "type": "object",
39 "properties": map[string]interface{}{
40 "action": map[string]interface{}{
41 "type": "string",
42 "description": "Type of metadata to retrieve: 'list' for corpus list, 'statistics' for corpus statistics",
43 "enum": []string{"list", "statistics"},
44 "default": "list",
45 },
46 "corpus": map[string]interface{}{
47 "type": "string",
48 "description": "Virtual corpus query to filter results (optional, when not provided refers to all data available to the user)",
49 },
50 },
51 "required": []string{"action"},
52 }
53}
54
55// Execute performs the metadata retrieval operation
56func (m *MetadataTool) Execute(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
57 log.Debug().
58 Str("tool", m.Name()).
59 Msg("Executing metadata tool")
60
61 // Extract required action parameter
62 action, err := request.RequireString("action")
63 if err != nil {
64 return nil, fmt.Errorf("action parameter is required: %w", err)
65 }
66
67 // Extract optional corpus parameter
68 corpus := request.GetString("corpus", "")
69
70 log.Debug().
71 Str("action", action).
72 Str("corpus", corpus).
73 Msg("Parsed metadata parameters")
74
75 // Validate parameters before authentication
76 switch action {
77 case "list":
78 // No additional validation needed for list
79 case "statistics":
80 // No additional validation needed for statistics - corpus is optional
81 default:
82 return nil, fmt.Errorf("unknown action: %s", action)
83 }
84
85 // Check if client is available and authenticated
86 if m.client == nil {
87 return nil, fmt.Errorf("KorAP client not configured")
88 }
89
90 if !m.client.IsAuthenticated() {
91 log.Warn().Msg("Client not authenticated, attempting authentication")
92 if err := m.client.AuthenticateWithClientCredentials(ctx); err != nil {
93 return nil, fmt.Errorf("authentication failed: %w", err)
94 }
95 }
96
97 // Handle different actions
98 switch action {
99 case "list":
100 return m.handleListCorpora(ctx)
101 case "statistics":
102 return m.handleCorpusStatistics(ctx, corpus)
103 default:
104 // This should never be reached due to validation above
105 return nil, fmt.Errorf("unknown action: %s", action)
106 }
107}
108
109// handleListCorpora retrieves and formats the list of available corpora
110func (m *MetadataTool) handleListCorpora(ctx context.Context) (*mcp.CallToolResult, error) {
111 log.Debug().Msg("Retrieving corpus list")
112
113 var corpusListResp service.CorpusListResponse
114 err := m.client.GetJSON(ctx, "corpus", &corpusListResp)
115 if err != nil {
116 log.Error().
117 Err(err).
118 Msg("Failed to retrieve corpus list")
119 return nil, fmt.Errorf("failed to retrieve corpus list: %w", err)
120 }
121
122 log.Info().
123 Int("corpus_count", len(corpusListResp.Corpora)).
124 Msg("Corpus list retrieved successfully")
125
126 result := m.formatCorpusList(&corpusListResp)
127 return mcp.NewToolResultText(result), nil
128}
129
130// handleCorpusStatistics retrieves and formats statistics for a corpus query
131func (m *MetadataTool) handleCorpusStatistics(ctx context.Context, corpus string) (*mcp.CallToolResult, error) {
132 log.Debug().
133 Str("corpus", corpus).
134 Msg("Retrieving corpus statistics")
135
136 var statsResp service.StatisticsResponse
137 var endpoint string
138 if corpus == "" {
139 endpoint = "statistics"
140 } else {
141 endpoint = fmt.Sprintf("statistics?corpusQuery=%s", corpus)
142 }
143
144 err := m.client.GetJSON(ctx, endpoint, &statsResp)
145 if err != nil {
146 log.Error().
147 Err(err).
148 Str("corpus", corpus).
149 Msg("Failed to retrieve corpus statistics")
150 return nil, fmt.Errorf("failed to retrieve corpus statistics: %w", err)
151 }
152
153 log.Info().
154 Str("corpus", corpus).
155 Int("documents", statsResp.Documents).
156 Int("tokens", statsResp.Tokens).
157 Msg("Corpus statistics retrieved successfully")
158
159 result := m.formatCorpusStatistics(corpus, &statsResp)
160 return mcp.NewToolResultText(result), nil
161}
162
163// formatCorpusList formats the corpus list response into a readable text format
164func (m *MetadataTool) formatCorpusList(response *service.CorpusListResponse) string {
165 var result strings.Builder
166
167 result.WriteString("KorAP Available Corpora\n")
168 result.WriteString("=======================\n\n")
169
170 if len(response.Corpora) == 0 {
171 result.WriteString("No corpora available.\n")
172 return result.String()
173 }
174
175 result.WriteString(fmt.Sprintf("Total Corpora: %d\n\n", len(response.Corpora)))
176
177 for i, corpus := range response.Corpora {
178 result.WriteString(fmt.Sprintf("%d. %s\n", i+1, corpus.Name))
179 result.WriteString(fmt.Sprintf(" ID: %s\n", corpus.ID))
180
181 if corpus.Description != "" {
182 result.WriteString(fmt.Sprintf(" Description: %s\n", corpus.Description))
183 }
184
185 if corpus.Documents > 0 {
186 result.WriteString(fmt.Sprintf(" Documents: %d\n", corpus.Documents))
187 }
188
189 if corpus.Tokens > 0 {
190 result.WriteString(fmt.Sprintf(" Tokens: %d\n", corpus.Tokens))
191 }
192
193 if corpus.Sentences > 0 {
194 result.WriteString(fmt.Sprintf(" Sentences: %d\n", corpus.Sentences))
195 }
196
197 if corpus.Paragraphs > 0 {
198 result.WriteString(fmt.Sprintf(" Paragraphs: %d\n", corpus.Paragraphs))
199 }
200
201 result.WriteString("\n")
202 }
203
204 return result.String()
205}
206
207// formatCorpusStatistics formats the corpus statistics response into a readable text format
208func (m *MetadataTool) formatCorpusStatistics(corpus string, response *service.StatisticsResponse) string {
209 var result strings.Builder
210
211 result.WriteString("KorAP Corpus Statistics\n")
212 result.WriteString("=======================\n\n")
213
214 if corpus == "" {
215 result.WriteString("Corpus Query: (all available data)\n\n")
216 } else {
217 result.WriteString(fmt.Sprintf("Corpus Query: %s\n\n", corpus))
218 }
219
220 result.WriteString("Statistics:\n")
221 result.WriteString("-----------\n")
222 result.WriteString(fmt.Sprintf("Documents: %d\n", response.Documents))
223 result.WriteString(fmt.Sprintf("Tokens: %d\n", response.Tokens))
224
225 if response.Sentences > 0 {
226 result.WriteString(fmt.Sprintf("Sentences: %d\n", response.Sentences))
227 }
228
229 if response.Paragraphs > 0 {
230 result.WriteString(fmt.Sprintf("Paragraphs: %d\n", response.Paragraphs))
231 }
232
233 // Add any additional fields if present
234 if len(response.Fields) > 0 {
235 result.WriteString("\nAdditional Fields:\n")
236 result.WriteString("------------------\n")
237 for key, value := range response.Fields {
238 result.WriteString(fmt.Sprintf("%s: %v\n", key, value))
239 }
240 }
241
242 return result.String()
243}