blob: 43254a787cccf0ef837676e90d8d6d45711e8e80 [file] [log] [blame]
Akron81f709c2025-06-12 17:30:55 +02001package validation
2
3import (
4 "testing"
5
6 "github.com/korap/korap-mcp/service"
7 "github.com/rs/zerolog"
8 "github.com/stretchr/testify/assert"
9)
10
11func TestNew(t *testing.T) {
12 logger := zerolog.Nop()
13 validator := New(logger)
14
15 assert.NotNil(t, validator)
16 assert.Equal(t, logger.With().Str("component", "validator").Logger(), validator.logger)
17}
18
19func TestValidationError_Error(t *testing.T) {
20 err := ValidationError{
21 Field: "test_field",
22 Value: "test_value",
23 Message: "test message",
24 }
25
26 expected := "validation error for field 'test_field' (value: 'test_value'): test message"
27 assert.Equal(t, expected, err.Error())
28}
29
30func TestValidationErrors_Error(t *testing.T) {
31 // Test empty errors
32 emptyErrors := ValidationErrors{}
33 assert.Equal(t, "validation errors occurred", emptyErrors.Error())
34
35 // Test single error
36 singleError := ValidationErrors{
37 Errors: []ValidationError{
38 {Field: "field1", Value: "value1", Message: "message1"},
39 },
40 }
41 expected := "validation error for field 'field1' (value: 'value1'): message1"
42 assert.Equal(t, expected, singleError.Error())
43
44 // Test multiple errors
45 multipleErrors := ValidationErrors{
46 Errors: []ValidationError{
47 {Field: "field1", Value: "value1", Message: "message1"},
48 {Field: "field2", Value: "value2", Message: "message2"},
49 },
50 }
51 expected = "validation error for field 'field1' (value: 'value1'): message1; validation error for field 'field2' (value: 'value2'): message2"
52 assert.Equal(t, expected, multipleErrors.Error())
53}
54
55func TestValidateSearchRequest(t *testing.T) {
56 logger := zerolog.Nop()
57 validator := New(logger)
58
59 tests := []struct {
60 name string
61 request SearchRequest
62 expectErr bool
63 errorMsg string
64 }{
65 {
66 name: "valid_request_minimal",
67 request: SearchRequest{
68 Query: "test query",
69 },
70 expectErr: false,
71 },
72 {
73 name: "valid_request_complete",
74 request: SearchRequest{
75 Query: "test query",
76 QueryLanguage: "poliqarp",
77 Corpus: "test-corpus",
78 Count: 100,
79 },
80 expectErr: false,
81 },
82 {
83 name: "empty_query",
84 request: SearchRequest{
85 Query: "",
86 },
87 expectErr: true,
88 errorMsg: "query is required and cannot be empty",
89 },
90 {
91 name: "whitespace_only_query",
92 request: SearchRequest{
93 Query: " ",
94 },
95 expectErr: true,
96 errorMsg: "query is required and cannot be empty",
97 },
98 {
99 name: "invalid_query_language",
100 request: SearchRequest{
101 Query: "test query",
102 QueryLanguage: "invalid",
103 },
104 expectErr: true,
105 errorMsg: "invalid query language",
106 },
107 {
108 name: "invalid_corpus_id",
109 request: SearchRequest{
110 Query: "test query",
111 Corpus: "invalid corpus!",
112 },
113 expectErr: true,
114 errorMsg: "corpus ID contains invalid characters",
115 },
116 {
117 name: "count_negative",
118 request: SearchRequest{
119 Query: "test query",
120 Count: -1,
121 },
122 expectErr: true,
123 errorMsg: "count must be between 0 and 10000",
124 },
125 {
126 name: "count_zero_valid",
127 request: SearchRequest{
128 Query: "test query",
129 Count: 0,
130 },
131 expectErr: false,
132 },
133 {
134 name: "count_too_high",
135 request: SearchRequest{
136 Query: "test query",
137 Count: 10001,
138 },
139 expectErr: true,
140 errorMsg: "count must be between 0 and 10000",
141 },
142 {
143 name: "unsafe_query_too_long",
144 request: SearchRequest{
145 Query: string(make([]byte, 10001)),
146 },
147 expectErr: true,
148 errorMsg: "query is too long",
149 },
150 {
151 name: "unsafe_query_url",
152 request: SearchRequest{
153 Query: "http://example.com",
154 },
155 expectErr: true,
156 errorMsg: "query appears to contain a URL",
157 },
158 {
159 name: "unsafe_query_unmatched_parens",
160 request: SearchRequest{
161 Query: "test (query",
162 },
163 expectErr: true,
164 errorMsg: "unmatched parentheses",
165 },
166 }
167
168 for _, tt := range tests {
169 t.Run(tt.name, func(t *testing.T) {
170 err := validator.ValidateSearchRequest(tt.request)
171 if tt.expectErr {
172 assert.Error(t, err)
173 assert.Contains(t, err.Error(), tt.errorMsg)
174 } else {
175 assert.NoError(t, err)
176 }
177 })
178 }
179}
180
181func TestValidateMetadataRequest(t *testing.T) {
182 logger := zerolog.Nop()
183 validator := New(logger)
184
185 tests := []struct {
186 name string
187 request MetadataRequest
188 expectErr bool
189 errorMsg string
190 }{
191 {
192 name: "valid_list_action",
193 request: MetadataRequest{
194 Action: "list",
195 },
196 expectErr: false,
197 },
198 {
199 name: "valid_statistics_action",
200 request: MetadataRequest{
201 Action: "statistics",
202 Corpus: "test-corpus",
203 },
204 expectErr: false,
205 },
206 {
207 name: "empty_action",
208 request: MetadataRequest{
209 Action: "",
210 },
211 expectErr: true,
212 errorMsg: "action is required and cannot be empty",
213 },
214 {
215 name: "whitespace_only_action",
216 request: MetadataRequest{
217 Action: " ",
218 },
219 expectErr: true,
220 errorMsg: "action is required and cannot be empty",
221 },
222 {
223 name: "invalid_action",
224 request: MetadataRequest{
225 Action: "invalid",
226 },
227 expectErr: true,
228 errorMsg: "invalid action",
229 },
230 {
231 name: "invalid_corpus_id",
232 request: MetadataRequest{
233 Action: "statistics",
234 Corpus: "invalid corpus!",
235 },
236 expectErr: true,
237 errorMsg: "corpus ID contains invalid characters",
238 },
239 }
240
241 for _, tt := range tests {
242 t.Run(tt.name, func(t *testing.T) {
243 err := validator.ValidateMetadataRequest(tt.request)
244 if tt.expectErr {
245 assert.Error(t, err)
246 assert.Contains(t, err.Error(), tt.errorMsg)
247 } else {
248 assert.NoError(t, err)
249 }
250 })
251 }
252}
253
254func TestValidateSearchResponse(t *testing.T) {
255 logger := zerolog.Nop()
256 validator := New(logger)
257
258 tests := []struct {
259 name string
260 response *service.SearchResponse
261 expectErr bool
262 errorMsg string
263 }{
264 {
265 name: "nil_response",
266 response: nil,
267 expectErr: true,
268 errorMsg: "search response is nil",
269 },
270 {
271 name: "valid_response",
272 response: &service.SearchResponse{
273 Meta: service.SearchMeta{
274 TotalResults: 100,
275 Count: 10,
276 StartIndex: 0,
277 ItemsPerPage: 10,
278 },
279 Query: service.SearchQuery{
280 Query: "test",
281 QueryLang: "poliqarp",
282 },
283 Matches: []service.SearchMatch{
284 {MatchID: "match1", TextSigle: "text1", Position: 0},
285 {MatchID: "match2", TextSigle: "text2", Position: 1},
286 },
287 },
288 expectErr: false,
289 },
290 {
291 name: "negative_total_results",
292 response: &service.SearchResponse{
293 Meta: service.SearchMeta{
294 TotalResults: -1,
295 Count: 10,
296 StartIndex: 0,
297 ItemsPerPage: 10,
298 },
299 },
300 expectErr: true,
301 errorMsg: "totalResults cannot be negative",
302 },
303 {
304 name: "negative_count",
305 response: &service.SearchResponse{
306 Meta: service.SearchMeta{
307 TotalResults: 100,
308 Count: -1,
309 StartIndex: 0,
310 ItemsPerPage: 10,
311 },
312 },
313 expectErr: true,
314 errorMsg: "count cannot be negative",
315 },
316 {
317 name: "negative_start_index",
318 response: &service.SearchResponse{
319 Meta: service.SearchMeta{
320 TotalResults: 100,
321 Count: 10,
322 StartIndex: -1,
323 ItemsPerPage: 10,
324 },
325 },
326 expectErr: true,
327 errorMsg: "startIndex cannot be negative",
328 },
329 {
330 name: "negative_items_per_page",
331 response: &service.SearchResponse{
332 Meta: service.SearchMeta{
333 TotalResults: 100,
334 Count: 10,
335 StartIndex: 0,
336 ItemsPerPage: -1,
337 },
338 },
339 expectErr: true,
340 errorMsg: "itemsPerPage cannot be negative",
341 },
342 {
343 name: "match_missing_id",
344 response: &service.SearchResponse{
345 Meta: service.SearchMeta{
346 TotalResults: 100,
347 Count: 10,
348 StartIndex: 0,
349 ItemsPerPage: 10,
350 },
351 Matches: []service.SearchMatch{
352 {MatchID: "", TextSigle: "text1", Position: 0},
353 },
354 },
355 expectErr: true,
356 errorMsg: "match ID is required",
357 },
358 {
359 name: "match_missing_text_sigle",
360 response: &service.SearchResponse{
361 Meta: service.SearchMeta{
362 TotalResults: 100,
363 Count: 10,
364 StartIndex: 0,
365 ItemsPerPage: 10,
366 },
367 Matches: []service.SearchMatch{
368 {MatchID: "match1", TextSigle: "", Position: 0},
369 },
370 },
371 expectErr: true,
372 errorMsg: "textSigle is required",
373 },
374 {
375 name: "match_negative_position",
376 response: &service.SearchResponse{
377 Meta: service.SearchMeta{
378 TotalResults: 100,
379 Count: 10,
380 StartIndex: 0,
381 ItemsPerPage: 10,
382 },
383 Matches: []service.SearchMatch{
384 {MatchID: "match1", TextSigle: "text1", Position: -1},
385 },
386 },
387 expectErr: true,
388 errorMsg: "position cannot be negative",
389 },
390 }
391
392 for _, tt := range tests {
393 t.Run(tt.name, func(t *testing.T) {
394 err := validator.ValidateSearchResponse(tt.response)
395 if tt.expectErr {
396 assert.Error(t, err)
397 assert.Contains(t, err.Error(), tt.errorMsg)
398 } else {
399 assert.NoError(t, err)
400 }
401 })
402 }
403}
404
405func TestValidateCorpusListResponse(t *testing.T) {
406 logger := zerolog.Nop()
407 validator := New(logger)
408
409 tests := []struct {
410 name string
411 response *service.CorpusListResponse
412 expectErr bool
413 errorMsg string
414 }{
415 {
416 name: "nil_response",
417 response: nil,
418 expectErr: true,
419 errorMsg: "corpus list response is nil",
420 },
421 {
422 name: "valid_response",
423 response: &service.CorpusListResponse{
424 Corpora: []service.CorpusInfo{
425 {
426 ID: "corpus1",
427 Name: "Test Corpus 1",
428 Documents: 100,
429 Tokens: 50000,
430 },
431 {
432 ID: "corpus2",
433 Name: "Test Corpus 2",
434 Documents: 200,
435 Tokens: 75000,
436 },
437 },
438 },
439 expectErr: false,
440 },
441 {
442 name: "empty_corpus_list",
443 response: &service.CorpusListResponse{
444 Corpora: []service.CorpusInfo{},
445 },
446 expectErr: false,
447 },
448 {
449 name: "corpus_missing_id",
450 response: &service.CorpusListResponse{
451 Corpora: []service.CorpusInfo{
452 {
453 ID: "",
454 Name: "Test Corpus",
455 Documents: 100,
456 Tokens: 50000,
457 },
458 },
459 },
460 expectErr: true,
461 errorMsg: "corpus ID is required",
462 },
463 {
464 name: "corpus_invalid_id",
465 response: &service.CorpusListResponse{
466 Corpora: []service.CorpusInfo{
467 {
468 ID: "invalid id!",
469 Name: "Test Corpus",
470 Documents: 100,
471 Tokens: 50000,
472 },
473 },
474 },
475 expectErr: true,
476 errorMsg: "corpus ID contains invalid characters",
477 },
478 {
479 name: "corpus_missing_name",
480 response: &service.CorpusListResponse{
481 Corpora: []service.CorpusInfo{
482 {
483 ID: "corpus1",
484 Name: "",
485 Documents: 100,
486 Tokens: 50000,
487 },
488 },
489 },
490 expectErr: true,
491 errorMsg: "corpus name is required",
492 },
493 {
494 name: "corpus_negative_documents",
495 response: &service.CorpusListResponse{
496 Corpora: []service.CorpusInfo{
497 {
498 ID: "corpus1",
499 Name: "Test Corpus",
500 Documents: -1,
501 Tokens: 50000,
502 },
503 },
504 },
505 expectErr: true,
506 errorMsg: "document count cannot be negative",
507 },
508 {
509 name: "corpus_negative_tokens",
510 response: &service.CorpusListResponse{
511 Corpora: []service.CorpusInfo{
512 {
513 ID: "corpus1",
514 Name: "Test Corpus",
515 Documents: 100,
516 Tokens: -1,
517 },
518 },
519 },
520 expectErr: true,
521 errorMsg: "token count cannot be negative",
522 },
523 }
524
525 for _, tt := range tests {
526 t.Run(tt.name, func(t *testing.T) {
527 err := validator.ValidateCorpusListResponse(tt.response)
528 if tt.expectErr {
529 assert.Error(t, err)
530 assert.Contains(t, err.Error(), tt.errorMsg)
531 } else {
532 assert.NoError(t, err)
533 }
534 })
535 }
536}
537
538func TestValidateStatisticsResponse(t *testing.T) {
539 logger := zerolog.Nop()
540 validator := New(logger)
541
542 tests := []struct {
543 name string
544 response *service.StatisticsResponse
545 expectErr bool
546 errorMsg string
547 }{
548 {
549 name: "nil_response",
550 response: nil,
551 expectErr: true,
552 errorMsg: "statistics response is nil",
553 },
554 {
555 name: "valid_response",
556 response: &service.StatisticsResponse{
557 Documents: 100,
558 Tokens: 50000,
559 Sentences: 2500,
560 Paragraphs: 500,
561 },
562 expectErr: false,
563 },
564 {
565 name: "negative_documents",
566 response: &service.StatisticsResponse{
567 Documents: -1,
568 Tokens: 50000,
569 },
570 expectErr: true,
571 errorMsg: "document count cannot be negative",
572 },
573 {
574 name: "negative_tokens",
575 response: &service.StatisticsResponse{
576 Documents: 100,
577 Tokens: -1,
578 },
579 expectErr: true,
580 errorMsg: "token count cannot be negative",
581 },
582 {
583 name: "negative_sentences",
584 response: &service.StatisticsResponse{
585 Documents: 100,
586 Tokens: 50000,
587 Sentences: -1,
588 },
589 expectErr: true,
590 errorMsg: "sentence count cannot be negative",
591 },
592 {
593 name: "negative_paragraphs",
594 response: &service.StatisticsResponse{
595 Documents: 100,
596 Tokens: 50000,
597 Paragraphs: -1,
598 },
599 expectErr: true,
600 errorMsg: "paragraph count cannot be negative",
601 },
602 }
603
604 for _, tt := range tests {
605 t.Run(tt.name, func(t *testing.T) {
606 err := validator.ValidateStatisticsResponse(tt.response)
607 if tt.expectErr {
608 assert.Error(t, err)
609 assert.Contains(t, err.Error(), tt.errorMsg)
610 } else {
611 assert.NoError(t, err)
612 }
613 })
614 }
615}
616
617func TestValidateQuerySafety(t *testing.T) {
618 logger := zerolog.Nop()
619 validator := New(logger)
620
621 tests := []struct {
622 name string
623 query string
624 expectErr bool
625 errorMsg string
626 }{
627 {
628 name: "valid_query",
629 query: "test query",
630 expectErr: false,
631 },
632 {
633 name: "query_too_long",
634 query: string(make([]byte, 10001)),
635 expectErr: true,
636 errorMsg: "query is too long",
637 },
638 {
639 name: "query_with_url",
640 query: "http://example.com",
641 expectErr: true,
642 errorMsg: "query appears to contain a URL",
643 },
644 {
645 name: "query_with_https_url",
646 query: "https://example.com",
647 expectErr: true,
648 errorMsg: "query appears to contain a URL",
649 },
650 {
651 name: "query_unmatched_open_paren",
652 query: "test (query",
653 expectErr: true,
654 errorMsg: "unmatched parentheses",
655 },
656 {
657 name: "query_unmatched_close_paren",
658 query: "test query)",
659 expectErr: true,
660 errorMsg: "unmatched parentheses",
661 },
662 {
663 name: "query_too_many_nested_parens",
664 query: "(" + string(make([]byte, 100)) + ")" + "(" + string(make([]byte, 100)) + ")",
665 expectErr: false, // This should be under the limit
666 },
667 }
668
669 for _, tt := range tests {
670 t.Run(tt.name, func(t *testing.T) {
671 err := validator.validateQuerySafety(tt.query)
672 if tt.expectErr {
673 assert.Error(t, err)
674 assert.Contains(t, err.Error(), tt.errorMsg)
675 } else {
676 assert.NoError(t, err)
677 }
678 })
679 }
680}
681
682func TestValidateCorpusID(t *testing.T) {
683 logger := zerolog.Nop()
684 validator := New(logger)
685
686 tests := []struct {
687 name string
688 corpusID string
689 expectErr bool
690 errorMsg string
691 }{
692 {
693 name: "valid_corpus_id",
694 corpusID: "test-corpus_1.0",
695 expectErr: false,
696 },
697 {
698 name: "empty_corpus_id",
699 corpusID: "",
700 expectErr: true,
701 errorMsg: "corpus ID cannot be empty",
702 },
703 {
704 name: "corpus_id_too_long",
705 corpusID: string(make([]byte, 101)),
706 expectErr: true,
707 errorMsg: "corpus ID is too long",
708 },
709 {
710 name: "corpus_id_invalid_chars",
711 corpusID: "invalid corpus!",
712 expectErr: true,
713 errorMsg: "corpus ID contains invalid characters",
714 },
715 {
716 name: "corpus_id_with_space",
717 corpusID: "corpus with space",
718 expectErr: true,
719 errorMsg: "corpus ID contains invalid characters",
720 },
721 }
722
723 for _, tt := range tests {
724 t.Run(tt.name, func(t *testing.T) {
725 err := validator.validateCorpusID(tt.corpusID)
726 if tt.expectErr {
727 assert.Error(t, err)
728 assert.Contains(t, err.Error(), tt.errorMsg)
729 } else {
730 assert.NoError(t, err)
731 }
732 })
733 }
734}
735
736func TestSanitizeQuery(t *testing.T) {
737 logger := zerolog.Nop()
738 validator := New(logger)
739
740 tests := []struct {
741 name string
742 input string
743 expected string
744 }{
745 {
746 name: "trim_whitespace",
747 input: " test query ",
748 expected: "test query",
749 },
750 {
751 name: "remove_null_bytes",
752 input: "test\x00query",
753 expected: "testquery",
754 },
755 {
756 name: "normalize_whitespace",
757 input: "test query\t\nwith spaces",
758 expected: "test query with spaces",
759 },
760 {
761 name: "empty_string",
762 input: "",
763 expected: "",
764 },
765 {
766 name: "already_clean",
767 input: "test query",
768 expected: "test query",
769 },
770 }
771
772 for _, tt := range tests {
773 t.Run(tt.name, func(t *testing.T) {
774 result := validator.SanitizeQuery(tt.input)
775 assert.Equal(t, tt.expected, result)
776 })
777 }
778}
779
780func TestSanitizeCorpusID(t *testing.T) {
781 logger := zerolog.Nop()
782 validator := New(logger)
783
784 tests := []struct {
785 name string
786 input string
787 expected string
788 }{
789 {
790 name: "trim_whitespace",
791 input: " Test-Corpus ",
792 expected: "test-corpus",
793 },
794 {
795 name: "remove_null_bytes",
796 input: "test\x00corpus",
797 expected: "testcorpus",
798 },
799 {
800 name: "lowercase",
801 input: "Test-Corpus_1.0",
802 expected: "test-corpus_1.0",
803 },
804 {
805 name: "empty_string",
806 input: "",
807 expected: "",
808 },
809 {
810 name: "already_clean",
811 input: "test-corpus",
812 expected: "test-corpus",
813 },
814 }
815
816 for _, tt := range tests {
817 t.Run(tt.name, func(t *testing.T) {
818 result := validator.SanitizeCorpusID(tt.input)
819 assert.Equal(t, tt.expected, result)
820 })
821 }
822}
823
824// Helper function to create bool pointers
825func boolPtr(b bool) *bool {
826 return &b
827}