Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 1 | package main |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "encoding/json" |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 6 | "errors" |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 7 | "fmt" |
| 8 | "net/http" |
| 9 | "net/http/httptest" |
| 10 | "net/url" |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 11 | "strings" |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 12 | "testing" |
| 13 | |
Akron | a00d475 | 2025-05-26 17:34:36 +0200 | [diff] [blame] | 14 | tmconfig "github.com/KorAP/KoralPipe-TermMapper/config" |
Akron | fa55bb2 | 2025-05-26 15:10:42 +0200 | [diff] [blame] | 15 | "github.com/KorAP/KoralPipe-TermMapper/mapper" |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 16 | "github.com/gofiber/fiber/v2" |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 17 | "github.com/stretchr/testify/assert" |
| 18 | "github.com/stretchr/testify/require" |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 19 | ) |
| 20 | |
| 21 | // FuzzInput represents the input data for the fuzzer |
| 22 | type FuzzInput struct { |
| 23 | MapID string |
| 24 | Direction string |
| 25 | FoundryA string |
| 26 | FoundryB string |
| 27 | LayerA string |
| 28 | LayerB string |
| 29 | Body []byte |
| 30 | } |
| 31 | |
| 32 | func FuzzTransformEndpoint(f *testing.F) { |
Akron | a00d475 | 2025-05-26 17:34:36 +0200 | [diff] [blame] | 33 | // Create test mapping list |
| 34 | mappingList := tmconfig.MappingList{ |
| 35 | ID: "test-mapper", |
| 36 | FoundryA: "opennlp", |
| 37 | LayerA: "p", |
| 38 | FoundryB: "upos", |
| 39 | LayerB: "p", |
| 40 | Mappings: []tmconfig.MappingRule{ |
| 41 | "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]", |
| 42 | "[DET] <> [opennlp/p=DET]", |
| 43 | }, |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 44 | } |
| 45 | |
| 46 | // Create mapper |
Akron | a00d475 | 2025-05-26 17:34:36 +0200 | [diff] [blame] | 47 | m, err := mapper.NewMapper([]tmconfig.MappingList{mappingList}) |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 48 | if err != nil { |
| 49 | f.Fatal(err) |
| 50 | } |
| 51 | |
| 52 | // Create fiber app |
| 53 | app := fiber.New(fiber.Config{ |
| 54 | DisableStartupMessage: true, |
| 55 | ErrorHandler: func(c *fiber.Ctx, err error) error { |
| 56 | // Ensure we always return a valid JSON response even for panic cases |
| 57 | return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{ |
| 58 | "error": "internal server error", |
| 59 | }) |
| 60 | }, |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 61 | BodyLimit: maxInputLength, |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 62 | }) |
| 63 | setupRoutes(app, m) |
| 64 | |
| 65 | // Add seed corpus |
| 66 | f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"@type": "koral:token"}`)) // Valid minimal input |
| 67 | f.Add("test-mapper", "btoa", "custom", "", "", "", []byte(`{"@type": "koral:token"}`)) // Valid with foundry override |
| 68 | f.Add("", "", "", "", "", "", []byte(`{}`)) // Empty parameters |
| 69 | f.Add("nonexistent", "invalid", "!@#$", "%^&*", "()", "[]", []byte(`invalid json`)) // Invalid everything |
| 70 | f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"@type": "koral:token", "wrap": null}`)) // Valid JSON, invalid structure |
| 71 | f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"@type": "koral:token", "wrap": {"@type": "unknown"}}`)) // Unknown type |
| 72 | f.Add("test-mapper", "atob", "", "", "", "", []byte(`{"@type": "koral:token", "wrap": {"@type": "koral:term"}}`)) // Missing required fields |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 73 | f.Add("0", "0", strings.Repeat("\x83", 1000), "0", "Q", "", []byte("0")) // Failing fuzz test case |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 74 | |
| 75 | f.Fuzz(func(t *testing.T, mapID, dir, foundryA, foundryB, layerA, layerB string, body []byte) { |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 76 | |
| 77 | // Validate input first |
| 78 | if err := validateInput(mapID, dir, foundryA, foundryB, layerA, layerB, body); err != nil { |
| 79 | // Skip this test case as it's invalid |
| 80 | t.Skip(err) |
| 81 | } |
| 82 | |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 83 | // Build URL with query parameters |
| 84 | params := url.Values{} |
| 85 | if dir != "" { |
| 86 | params.Set("dir", dir) |
| 87 | } |
| 88 | if foundryA != "" { |
| 89 | params.Set("foundryA", foundryA) |
| 90 | } |
| 91 | if foundryB != "" { |
| 92 | params.Set("foundryB", foundryB) |
| 93 | } |
| 94 | if layerA != "" { |
| 95 | params.Set("layerA", layerA) |
| 96 | } |
| 97 | if layerB != "" { |
| 98 | params.Set("layerB", layerB) |
| 99 | } |
| 100 | |
| 101 | url := fmt.Sprintf("/%s/query", url.PathEscape(mapID)) |
| 102 | if len(params) > 0 { |
| 103 | url += "?" + params.Encode() |
| 104 | } |
| 105 | |
| 106 | // Make request |
| 107 | req := httptest.NewRequest(http.MethodPost, url, bytes.NewReader(body)) |
| 108 | req.Header.Set("Content-Type", "application/json") |
| 109 | resp, err := app.Test(req) |
| 110 | if err != nil { |
| 111 | t.Fatal(err) |
| 112 | } |
| 113 | defer resp.Body.Close() |
| 114 | |
| 115 | // Verify that we always get a valid response |
| 116 | if resp.StatusCode != http.StatusOK && |
| 117 | resp.StatusCode != http.StatusBadRequest && |
| 118 | resp.StatusCode != http.StatusInternalServerError { |
| 119 | t.Errorf("unexpected status code: %d", resp.StatusCode) |
| 120 | } |
| 121 | |
| 122 | // Verify that the response is valid JSON |
Akron | 121c66e | 2025-06-02 16:34:05 +0200 | [diff] [blame^] | 123 | var result map[string]any |
Akron | 2cbdab5 | 2025-05-23 17:57:10 +0200 | [diff] [blame] | 124 | if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { |
| 125 | t.Errorf("invalid JSON response: %v", err) |
| 126 | } |
| 127 | |
| 128 | // For error responses, verify that we have an error message |
| 129 | if resp.StatusCode != http.StatusOK { |
| 130 | if errMsg, ok := result["error"].(string); !ok || errMsg == "" { |
| 131 | t.Error("error response missing error message") |
| 132 | } |
| 133 | } |
| 134 | }) |
| 135 | } |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 136 | |
| 137 | func TestLargeInput(t *testing.T) { |
Akron | a00d475 | 2025-05-26 17:34:36 +0200 | [diff] [blame] | 138 | // Create test mapping list |
| 139 | mappingList := tmconfig.MappingList{ |
| 140 | ID: "test-mapper", |
| 141 | Mappings: []tmconfig.MappingRule{ |
| 142 | "[A] <> [B]", |
| 143 | }, |
| 144 | } |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 145 | |
| 146 | // Create mapper |
Akron | a00d475 | 2025-05-26 17:34:36 +0200 | [diff] [blame] | 147 | m, err := mapper.NewMapper([]tmconfig.MappingList{mappingList}) |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 148 | require.NoError(t, err) |
| 149 | |
| 150 | // Create fiber app |
| 151 | app := fiber.New(fiber.Config{ |
| 152 | DisableStartupMessage: true, |
| 153 | ErrorHandler: func(c *fiber.Ctx, err error) error { |
| 154 | // For body limit errors, return 413 status code |
| 155 | if err.Error() == "body size exceeds the given limit" || errors.Is(err, fiber.ErrRequestEntityTooLarge) { |
| 156 | return c.Status(fiber.StatusRequestEntityTooLarge).JSON(fiber.Map{ |
| 157 | "error": fmt.Sprintf("request body too large (max %d bytes)", maxInputLength), |
| 158 | }) |
| 159 | } |
| 160 | // For other errors, return 500 status code |
| 161 | return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{ |
| 162 | "error": err.Error(), |
| 163 | }) |
| 164 | }, |
| 165 | BodyLimit: maxInputLength, |
| 166 | }) |
| 167 | setupRoutes(app, m) |
| 168 | |
| 169 | tests := []struct { |
| 170 | name string |
| 171 | mapID string |
| 172 | direction string |
| 173 | foundryA string |
| 174 | foundryB string |
| 175 | layerA string |
| 176 | layerB string |
| 177 | input string |
| 178 | expectedCode int |
| 179 | expectedError string |
| 180 | }{ |
| 181 | { |
| 182 | name: "Large map ID", |
| 183 | mapID: strings.Repeat("a", maxParamLength+1), |
| 184 | direction: "atob", |
| 185 | input: "{}", |
| 186 | expectedCode: http.StatusBadRequest, |
Akron | 69d43bf | 2025-05-26 17:09:00 +0200 | [diff] [blame] | 187 | expectedError: "mapID too long (max 1024 bytes)", |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 188 | }, |
| 189 | { |
| 190 | name: "Large direction", |
| 191 | mapID: "test-mapper", |
| 192 | direction: strings.Repeat("a", maxParamLength+1), |
| 193 | input: "{}", |
| 194 | expectedCode: http.StatusBadRequest, |
Akron | 69d43bf | 2025-05-26 17:09:00 +0200 | [diff] [blame] | 195 | expectedError: "dir too long (max 1024 bytes)", |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 196 | }, |
| 197 | { |
| 198 | name: "Large foundryA", |
| 199 | mapID: "test-mapper", |
| 200 | direction: "atob", |
| 201 | foundryA: strings.Repeat("a", maxParamLength+1), |
| 202 | input: "{}", |
| 203 | expectedCode: http.StatusBadRequest, |
| 204 | expectedError: "foundryA too long (max 1024 bytes)", |
| 205 | }, |
| 206 | { |
| 207 | name: "Invalid characters in mapID", |
| 208 | mapID: "test<>mapper", |
| 209 | direction: "atob", |
| 210 | input: "{}", |
| 211 | expectedCode: http.StatusBadRequest, |
| 212 | expectedError: "mapID contains invalid characters", |
| 213 | }, |
| 214 | { |
| 215 | name: "Large request body", |
| 216 | mapID: "test-mapper", |
| 217 | direction: "atob", |
| 218 | input: strings.Repeat("a", maxInputLength+1), |
| 219 | expectedCode: http.StatusRequestEntityTooLarge, |
| 220 | expectedError: "body size exceeds the given limit", |
| 221 | }, |
| 222 | } |
| 223 | |
| 224 | for _, tt := range tests { |
| 225 | t.Run(tt.name, func(t *testing.T) { |
| 226 | // Build URL with query parameters |
| 227 | url := "/" + tt.mapID + "/query" |
| 228 | if tt.direction != "" { |
| 229 | url += "?dir=" + tt.direction |
| 230 | } |
| 231 | if tt.foundryA != "" { |
| 232 | url += "&foundryA=" + tt.foundryA |
| 233 | } |
| 234 | if tt.foundryB != "" { |
| 235 | url += "&foundryB=" + tt.foundryB |
| 236 | } |
| 237 | if tt.layerA != "" { |
| 238 | url += "&layerA=" + tt.layerA |
| 239 | } |
| 240 | if tt.layerB != "" { |
| 241 | url += "&layerB=" + tt.layerB |
| 242 | } |
| 243 | |
| 244 | // Make request |
| 245 | req := httptest.NewRequest(http.MethodPost, url, strings.NewReader(tt.input)) |
| 246 | req.Header.Set("Content-Type", "application/json") |
| 247 | resp, err := app.Test(req) |
| 248 | |
| 249 | if resp == nil { |
| 250 | assert.Equal(t, tt.expectedError, err.Error()) |
| 251 | return |
| 252 | } |
| 253 | |
| 254 | require.NoError(t, err) |
| 255 | defer resp.Body.Close() |
| 256 | |
| 257 | // Check status code |
| 258 | assert.Equal(t, tt.expectedCode, resp.StatusCode) |
| 259 | |
| 260 | // Check error message |
Akron | 121c66e | 2025-06-02 16:34:05 +0200 | [diff] [blame^] | 261 | var result map[string]any |
Akron | 74e1c07 | 2025-05-26 14:38:25 +0200 | [diff] [blame] | 262 | err = json.NewDecoder(resp.Body).Decode(&result) |
| 263 | require.NoError(t, err) |
| 264 | errMsg, ok := result["error"].(string) |
| 265 | require.True(t, ok) |
| 266 | assert.Equal(t, tt.expectedError, errMsg) |
| 267 | }) |
| 268 | } |
| 269 | } |
Akron | 121c66e | 2025-06-02 16:34:05 +0200 | [diff] [blame^] | 270 | |
| 271 | // # Run fuzzing for 1 minute |
| 272 | // go test -fuzz=FuzzTransformEndpoint -fuzztime=1m ./cmd/termmapper |
| 273 | // |
| 274 | // # Run fuzzing until a crash is found or Ctrl+C is pressed |
| 275 | // go test -fuzz=FuzzTransformEndpoint ./cmd/termmapper |
| 276 | // |
| 277 | // # Run fuzzing with verbose output |
| 278 | // go test -fuzz=FuzzTransformEndpoint -v ./cmd/termmapper |
| 279 | // |
| 280 | // go test -run=FuzzTransformEndpoint/testdata/fuzz/FuzzTransformEndpoint/$SEED |