blob: e2ac02d0bca9c7e948e1d0f9390f0da494658d0f [file] [log] [blame]
Akron2cbdab52025-05-23 17:57:10 +02001package main
2
3import (
4 "bytes"
5 "encoding/json"
Akron74e1c072025-05-26 14:38:25 +02006 "errors"
Akron2cbdab52025-05-23 17:57:10 +02007 "fmt"
8 "net/http"
9 "net/http/httptest"
10 "net/url"
Akron74e1c072025-05-26 14:38:25 +020011 "strings"
Akron2cbdab52025-05-23 17:57:10 +020012 "testing"
13
Akrona00d4752025-05-26 17:34:36 +020014 tmconfig "github.com/KorAP/KoralPipe-TermMapper/config"
Akronfa55bb22025-05-26 15:10:42 +020015 "github.com/KorAP/KoralPipe-TermMapper/mapper"
Akron2cbdab52025-05-23 17:57:10 +020016 "github.com/gofiber/fiber/v2"
Akron74e1c072025-05-26 14:38:25 +020017 "github.com/stretchr/testify/assert"
18 "github.com/stretchr/testify/require"
Akron2cbdab52025-05-23 17:57:10 +020019)
20
21// FuzzInput represents the input data for the fuzzer
22type FuzzInput struct {
23 MapID string
24 Direction string
25 FoundryA string
26 FoundryB string
27 LayerA string
28 LayerB string
29 Body []byte
30}
31
32func FuzzTransformEndpoint(f *testing.F) {
Akrona00d4752025-05-26 17:34:36 +020033 // Create test mapping list
34 mappingList := tmconfig.MappingList{
35 ID: "test-mapper",
36 FoundryA: "opennlp",
37 LayerA: "p",
38 FoundryB: "upos",
39 LayerB: "p",
40 Mappings: []tmconfig.MappingRule{
41 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
42 "[DET] <> [opennlp/p=DET]",
43 },
Akron2cbdab52025-05-23 17:57:10 +020044 }
45
46 // Create mapper
Akrona00d4752025-05-26 17:34:36 +020047 m, err := mapper.NewMapper([]tmconfig.MappingList{mappingList})
Akron2cbdab52025-05-23 17:57:10 +020048 if err != nil {
49 f.Fatal(err)
50 }
51
Akron40aaa632025-06-03 17:57:52 +020052 // Create mock config for testing
Akron06d21f02025-06-04 14:36:07 +020053 mockConfig := &tmconfig.MappingConfig{
Akron40aaa632025-06-03 17:57:52 +020054 Lists: []tmconfig.MappingList{mappingList},
55 }
56
Akron2cbdab52025-05-23 17:57:10 +020057 // Create fiber app
58 app := fiber.New(fiber.Config{
59 DisableStartupMessage: true,
60 ErrorHandler: func(c *fiber.Ctx, err error) error {
Akron40aaa632025-06-03 17:57:52 +020061 // For body limit errors, return 413 status code
62 if err.Error() == "body size exceeds the given limit" || errors.Is(err, fiber.ErrRequestEntityTooLarge) {
63 return c.Status(fiber.StatusRequestEntityTooLarge).JSON(fiber.Map{
64 "error": fmt.Sprintf("request body too large (max %d bytes)", maxInputLength),
65 })
66 }
67 // For other errors, return 500 status code
Akron2cbdab52025-05-23 17:57:10 +020068 return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{
Akron40aaa632025-06-03 17:57:52 +020069 "error": err.Error(),
Akron2cbdab52025-05-23 17:57:10 +020070 })
71 },
Akron74e1c072025-05-26 14:38:25 +020072 BodyLimit: maxInputLength,
Akron2cbdab52025-05-23 17:57:10 +020073 })
Akron40aaa632025-06-03 17:57:52 +020074 setupRoutes(app, m, mockConfig)
Akron2cbdab52025-05-23 17:57:10 +020075
76 // Add seed corpus
77 f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"@type": "koral:token"}`)) // Valid minimal input
78 f.Add("test-mapper", "btoa", "custom", "", "", "", []byte(`{"@type": "koral:token"}`)) // Valid with foundry override
79 f.Add("", "", "", "", "", "", []byte(`{}`)) // Empty parameters
80 f.Add("nonexistent", "invalid", "!@#$", "%^&*", "()", "[]", []byte(`invalid json`)) // Invalid everything
81 f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"@type": "koral:token", "wrap": null}`)) // Valid JSON, invalid structure
82 f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"@type": "koral:token", "wrap": {"@type": "unknown"}}`)) // Unknown type
83 f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"@type": "koral:token", "wrap": {"@type": "koral:term"}}`)) // Missing required fields
Akron74e1c072025-05-26 14:38:25 +020084 f.Add("0", "0", strings.Repeat("\x83", 1000), "0", "Q", "", []byte("0")) // Failing fuzz test case
Akron2cbdab52025-05-23 17:57:10 +020085
86 f.Fuzz(func(t *testing.T, mapID, dir, foundryA, foundryB, layerA, layerB string, body []byte) {
Akron74e1c072025-05-26 14:38:25 +020087
88 // Validate input first
89 if err := validateInput(mapID, dir, foundryA, foundryB, layerA, layerB, body); err != nil {
90 // Skip this test case as it's invalid
91 t.Skip(err)
92 }
93
Akron2cbdab52025-05-23 17:57:10 +020094 // Build URL with query parameters
95 params := url.Values{}
96 if dir != "" {
97 params.Set("dir", dir)
98 }
99 if foundryA != "" {
100 params.Set("foundryA", foundryA)
101 }
102 if foundryB != "" {
103 params.Set("foundryB", foundryB)
104 }
105 if layerA != "" {
106 params.Set("layerA", layerA)
107 }
108 if layerB != "" {
109 params.Set("layerB", layerB)
110 }
111
112 url := fmt.Sprintf("/%s/query", url.PathEscape(mapID))
113 if len(params) > 0 {
114 url += "?" + params.Encode()
115 }
116
117 // Make request
118 req := httptest.NewRequest(http.MethodPost, url, bytes.NewReader(body))
119 req.Header.Set("Content-Type", "application/json")
120 resp, err := app.Test(req)
121 if err != nil {
122 t.Fatal(err)
123 }
124 defer resp.Body.Close()
125
126 // Verify that we always get a valid response
127 if resp.StatusCode != http.StatusOK &&
128 resp.StatusCode != http.StatusBadRequest &&
129 resp.StatusCode != http.StatusInternalServerError {
130 t.Errorf("unexpected status code: %d", resp.StatusCode)
131 }
132
133 // Verify that the response is valid JSON
Akrone5aaf0a2025-06-02 16:43:54 +0200134 var result any
Akron2cbdab52025-05-23 17:57:10 +0200135 if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
136 t.Errorf("invalid JSON response: %v", err)
137 }
138
139 // For error responses, verify that we have an error message
140 if resp.StatusCode != http.StatusOK {
Akrone5aaf0a2025-06-02 16:43:54 +0200141 // For error responses, we expect a JSON object with an error field
142 if resultMap, ok := result.(map[string]any); ok {
143 if errMsg, ok := resultMap["error"].(string); !ok || errMsg == "" {
144 t.Error("error response missing error message")
145 }
146 } else {
147 t.Error("error response should be a JSON object")
Akron2cbdab52025-05-23 17:57:10 +0200148 }
149 }
150 })
151}
Akron74e1c072025-05-26 14:38:25 +0200152
Akron4de47a92025-06-27 11:58:11 +0200153func FuzzResponseTransformEndpoint(f *testing.F) {
154 // Create test mapping list
155 mappingList := tmconfig.MappingList{
156 ID: "test-mapper",
157 FoundryA: "marmot",
158 LayerA: "m",
159 FoundryB: "opennlp",
160 LayerB: "p",
161 Mappings: []tmconfig.MappingRule{
162 "[gender=masc] <> [p=M & m=M]",
163 },
164 }
165
166 // Create mapper
167 m, err := mapper.NewMapper([]tmconfig.MappingList{mappingList})
168 if err != nil {
169 f.Fatal(err)
170 }
171
172 // Create mock config for testing
173 mockConfig := &tmconfig.MappingConfig{
174 Lists: []tmconfig.MappingList{mappingList},
175 }
176
177 // Create fiber app
178 app := fiber.New(fiber.Config{
179 DisableStartupMessage: true,
180 ErrorHandler: func(c *fiber.Ctx, err error) error {
181 // For body limit errors, return 413 status code
182 if err.Error() == "body size exceeds the given limit" || errors.Is(err, fiber.ErrRequestEntityTooLarge) {
183 return c.Status(fiber.StatusRequestEntityTooLarge).JSON(fiber.Map{
184 "error": fmt.Sprintf("request body too large (max %d bytes)", maxInputLength),
185 })
186 }
187 // For other errors, return 500 status code
188 return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{
189 "error": err.Error(),
190 })
191 },
192 BodyLimit: maxInputLength,
193 })
194 setupRoutes(app, m, mockConfig)
195
196 // Add seed corpus
197 f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"snippet": "<span>test</span>"}`)) // Valid minimal input
198 f.Add("test-mapper", "btoa", "custom", "", "", "", []byte(`{"snippet": "<span title=\"test\">word</span>"}`)) // Valid with foundry override
199 f.Add("", "", "", "", "", "", []byte(`{}`)) // Empty parameters
200 f.Add("nonexistent", "invalid", "!@#$", "%^&*", "()", "[]", []byte(`invalid json`)) // Invalid everything
201 f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"snippet": null}`)) // Valid JSON with null snippet
202 f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"snippet": 123}`)) // Valid JSON with non-string snippet
203 f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"}`)) // Valid response snippet
204
205 f.Fuzz(func(t *testing.T, mapID, dir, foundryA, foundryB, layerA, layerB string, body []byte) {
206
207 // Validate input first
208 if err := validateInput(mapID, dir, foundryA, foundryB, layerA, layerB, body); err != nil {
209 // Skip this test case as it's invalid
210 t.Skip(err)
211 }
212
213 // Build URL with query parameters
214 params := url.Values{}
215 if dir != "" {
216 params.Set("dir", dir)
217 }
218 if foundryA != "" {
219 params.Set("foundryA", foundryA)
220 }
221 if foundryB != "" {
222 params.Set("foundryB", foundryB)
223 }
224 if layerA != "" {
225 params.Set("layerA", layerA)
226 }
227 if layerB != "" {
228 params.Set("layerB", layerB)
229 }
230
231 url := fmt.Sprintf("/%s/response", url.PathEscape(mapID))
232 if len(params) > 0 {
233 url += "?" + params.Encode()
234 }
235
236 // Make request
237 req := httptest.NewRequest(http.MethodPost, url, bytes.NewReader(body))
238 req.Header.Set("Content-Type", "application/json")
239 resp, err := app.Test(req)
240 if err != nil {
241 t.Fatal(err)
242 }
243 defer resp.Body.Close()
244
245 // Verify that we always get a valid response
246 if resp.StatusCode != http.StatusOK &&
247 resp.StatusCode != http.StatusBadRequest &&
248 resp.StatusCode != http.StatusInternalServerError {
249 t.Errorf("unexpected status code: %d", resp.StatusCode)
250 }
251
252 // Verify that the response is valid JSON
253 var result any
254 if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
255 t.Errorf("invalid JSON response: %v", err)
256 }
257
258 // For error responses, verify that we have an error message
259 if resp.StatusCode != http.StatusOK {
260 // For error responses, we expect a JSON object with an error field
261 if resultMap, ok := result.(map[string]any); ok {
262 if errMsg, ok := resultMap["error"].(string); !ok || errMsg == "" {
263 t.Error("error response missing error message")
264 }
265 } else {
266 t.Error("error response should be a JSON object")
267 }
268 }
269 })
270}
271
Akron74e1c072025-05-26 14:38:25 +0200272func TestLargeInput(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +0200273 // Create test mapping list
274 mappingList := tmconfig.MappingList{
275 ID: "test-mapper",
276 Mappings: []tmconfig.MappingRule{
277 "[A] <> [B]",
278 },
279 }
Akron74e1c072025-05-26 14:38:25 +0200280
281 // Create mapper
Akrona00d4752025-05-26 17:34:36 +0200282 m, err := mapper.NewMapper([]tmconfig.MappingList{mappingList})
Akron74e1c072025-05-26 14:38:25 +0200283 require.NoError(t, err)
284
Akron40aaa632025-06-03 17:57:52 +0200285 // Create mock config for testing
Akron06d21f02025-06-04 14:36:07 +0200286 mockConfig := &tmconfig.MappingConfig{
Akron40aaa632025-06-03 17:57:52 +0200287 Lists: []tmconfig.MappingList{mappingList},
288 }
289
Akron74e1c072025-05-26 14:38:25 +0200290 // Create fiber app
291 app := fiber.New(fiber.Config{
292 DisableStartupMessage: true,
293 ErrorHandler: func(c *fiber.Ctx, err error) error {
294 // For body limit errors, return 413 status code
295 if err.Error() == "body size exceeds the given limit" || errors.Is(err, fiber.ErrRequestEntityTooLarge) {
296 return c.Status(fiber.StatusRequestEntityTooLarge).JSON(fiber.Map{
297 "error": fmt.Sprintf("request body too large (max %d bytes)", maxInputLength),
298 })
299 }
300 // For other errors, return 500 status code
301 return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{
302 "error": err.Error(),
303 })
304 },
305 BodyLimit: maxInputLength,
306 })
Akron40aaa632025-06-03 17:57:52 +0200307 setupRoutes(app, m, mockConfig)
Akron74e1c072025-05-26 14:38:25 +0200308
309 tests := []struct {
310 name string
311 mapID string
312 direction string
313 foundryA string
314 foundryB string
315 layerA string
316 layerB string
317 input string
318 expectedCode int
319 expectedError string
320 }{
321 {
322 name: "Large map ID",
323 mapID: strings.Repeat("a", maxParamLength+1),
324 direction: "atob",
325 input: "{}",
326 expectedCode: http.StatusBadRequest,
Akron69d43bf2025-05-26 17:09:00 +0200327 expectedError: "mapID too long (max 1024 bytes)",
Akron74e1c072025-05-26 14:38:25 +0200328 },
329 {
330 name: "Large direction",
331 mapID: "test-mapper",
332 direction: strings.Repeat("a", maxParamLength+1),
333 input: "{}",
334 expectedCode: http.StatusBadRequest,
Akron69d43bf2025-05-26 17:09:00 +0200335 expectedError: "dir too long (max 1024 bytes)",
Akron74e1c072025-05-26 14:38:25 +0200336 },
337 {
338 name: "Large foundryA",
339 mapID: "test-mapper",
340 direction: "atob",
341 foundryA: strings.Repeat("a", maxParamLength+1),
342 input: "{}",
343 expectedCode: http.StatusBadRequest,
344 expectedError: "foundryA too long (max 1024 bytes)",
345 },
346 {
347 name: "Invalid characters in mapID",
348 mapID: "test<>mapper",
349 direction: "atob",
350 input: "{}",
351 expectedCode: http.StatusBadRequest,
352 expectedError: "mapID contains invalid characters",
353 },
354 {
355 name: "Large request body",
356 mapID: "test-mapper",
357 direction: "atob",
358 input: strings.Repeat("a", maxInputLength+1),
359 expectedCode: http.StatusRequestEntityTooLarge,
360 expectedError: "body size exceeds the given limit",
361 },
362 }
363
364 for _, tt := range tests {
365 t.Run(tt.name, func(t *testing.T) {
366 // Build URL with query parameters
367 url := "/" + tt.mapID + "/query"
368 if tt.direction != "" {
369 url += "?dir=" + tt.direction
370 }
371 if tt.foundryA != "" {
372 url += "&foundryA=" + tt.foundryA
373 }
374 if tt.foundryB != "" {
375 url += "&foundryB=" + tt.foundryB
376 }
377 if tt.layerA != "" {
378 url += "&layerA=" + tt.layerA
379 }
380 if tt.layerB != "" {
381 url += "&layerB=" + tt.layerB
382 }
383
384 // Make request
385 req := httptest.NewRequest(http.MethodPost, url, strings.NewReader(tt.input))
386 req.Header.Set("Content-Type", "application/json")
387 resp, err := app.Test(req)
388
389 if resp == nil {
390 assert.Equal(t, tt.expectedError, err.Error())
391 return
392 }
393
394 require.NoError(t, err)
395 defer resp.Body.Close()
396
397 // Check status code
398 assert.Equal(t, tt.expectedCode, resp.StatusCode)
399
400 // Check error message
Akron121c66e2025-06-02 16:34:05 +0200401 var result map[string]any
Akron74e1c072025-05-26 14:38:25 +0200402 err = json.NewDecoder(resp.Body).Decode(&result)
403 require.NoError(t, err)
404 errMsg, ok := result["error"].(string)
405 require.True(t, ok)
406 assert.Equal(t, tt.expectedError, errMsg)
407 })
408 }
409}
Akron121c66e2025-06-02 16:34:05 +0200410
411// # Run fuzzing for 1 minute
412// go test -fuzz=FuzzTransformEndpoint -fuzztime=1m ./cmd/termmapper
413//
414// # Run fuzzing until a crash is found or Ctrl+C is pressed
415// go test -fuzz=FuzzTransformEndpoint ./cmd/termmapper
416//
417// # Run fuzzing with verbose output
418// go test -fuzz=FuzzTransformEndpoint -v ./cmd/termmapper
419//
420// go test -run=FuzzTransformEndpoint/testdata/fuzz/FuzzTransformEndpoint/$SEED