blob: 147865cc1b09e4d9d608d7193bd42610f5eae2c8 [file] [log] [blame]
Akron81f709c2025-06-12 17:30:55 +02001package validation
2
3import (
4 "testing"
5
6 "github.com/korap/korap-mcp/service"
7 "github.com/rs/zerolog"
8 "github.com/stretchr/testify/assert"
9)
10
11func TestNew(t *testing.T) {
12 logger := zerolog.Nop()
13 validator := New(logger)
14
15 assert.NotNil(t, validator)
16 assert.Equal(t, logger.With().Str("component", "validator").Logger(), validator.logger)
17}
18
19func TestValidationError_Error(t *testing.T) {
20 err := ValidationError{
21 Field: "test_field",
22 Value: "test_value",
23 Message: "test message",
24 }
25
26 expected := "validation error for field 'test_field' (value: 'test_value'): test message"
27 assert.Equal(t, expected, err.Error())
28}
29
30func TestValidationErrors_Error(t *testing.T) {
31 // Test empty errors
32 emptyErrors := ValidationErrors{}
33 assert.Equal(t, "validation errors occurred", emptyErrors.Error())
34
35 // Test single error
36 singleError := ValidationErrors{
37 Errors: []ValidationError{
38 {Field: "field1", Value: "value1", Message: "message1"},
39 },
40 }
41 expected := "validation error for field 'field1' (value: 'value1'): message1"
42 assert.Equal(t, expected, singleError.Error())
43
44 // Test multiple errors
45 multipleErrors := ValidationErrors{
46 Errors: []ValidationError{
47 {Field: "field1", Value: "value1", Message: "message1"},
48 {Field: "field2", Value: "value2", Message: "message2"},
49 },
50 }
51 expected = "validation error for field 'field1' (value: 'value1'): message1; validation error for field 'field2' (value: 'value2'): message2"
52 assert.Equal(t, expected, multipleErrors.Error())
53}
54
55func TestValidateSearchRequest(t *testing.T) {
56 logger := zerolog.Nop()
57 validator := New(logger)
58
59 tests := []struct {
60 name string
61 request SearchRequest
62 expectErr bool
63 errorMsg string
64 }{
65 {
66 name: "valid_request_minimal",
67 request: SearchRequest{
68 Query: "test query",
69 },
70 expectErr: false,
71 },
72 {
73 name: "valid_request_complete",
74 request: SearchRequest{
75 Query: "test query",
76 QueryLanguage: "poliqarp",
77 Corpus: "test-corpus",
78 Count: 100,
79 },
80 expectErr: false,
81 },
82 {
83 name: "empty_query",
84 request: SearchRequest{
85 Query: "",
86 },
87 expectErr: true,
88 errorMsg: "query is required and cannot be empty",
89 },
90 {
91 name: "whitespace_only_query",
92 request: SearchRequest{
93 Query: " ",
94 },
95 expectErr: true,
96 errorMsg: "query is required and cannot be empty",
97 },
98 {
Akron8db31c32025-06-17 12:22:41 +020099 name: "valid_poliqarp_language",
100 request: SearchRequest{
101 Query: "test query",
102 QueryLanguage: "poliqarp",
103 },
104 expectErr: false,
105 },
106 {
107 name: "valid_poliqarpplus_language",
108 request: SearchRequest{
109 Query: "test query",
110 QueryLanguage: "poliqarpplus",
111 },
112 expectErr: false,
113 },
114 {
115 name: "valid_cosmas2_language",
116 request: SearchRequest{
117 Query: "test query",
118 QueryLanguage: "cosmas2",
119 },
120 expectErr: false,
121 },
122 {
123 name: "valid_annis_language",
124 request: SearchRequest{
125 Query: "test query",
126 QueryLanguage: "annis",
127 },
128 expectErr: false,
129 },
130 {
131 name: "valid_cql_language",
132 request: SearchRequest{
133 Query: "test query",
134 QueryLanguage: "cql",
135 },
136 expectErr: false,
137 },
138 {
139 name: "valid_cqp_language",
140 request: SearchRequest{
141 Query: "test query",
142 QueryLanguage: "cqp",
143 },
144 expectErr: false,
145 },
146 {
147 name: "valid_fcsql_language",
148 request: SearchRequest{
149 Query: "test query",
150 QueryLanguage: "fcsql",
151 },
152 expectErr: false,
153 },
154 {
Akron81f709c2025-06-12 17:30:55 +0200155 name: "invalid_query_language",
156 request: SearchRequest{
157 Query: "test query",
158 QueryLanguage: "invalid",
159 },
160 expectErr: true,
Akron8db31c32025-06-17 12:22:41 +0200161 errorMsg: "invalid query language, must be one of: poliqarp, poliqarpplus, cosmas2, annis, cql, cqp, fcsql",
Akron81f709c2025-06-12 17:30:55 +0200162 },
163 {
164 name: "invalid_corpus_id",
165 request: SearchRequest{
166 Query: "test query",
Akron8db31c32025-06-17 12:22:41 +0200167 Corpus: "invalid@corpus#format",
Akron81f709c2025-06-12 17:30:55 +0200168 },
169 expectErr: true,
Akron8db31c32025-06-17 12:22:41 +0200170 errorMsg: "collection query contains invalid characters",
Akron81f709c2025-06-12 17:30:55 +0200171 },
172 {
173 name: "count_negative",
174 request: SearchRequest{
175 Query: "test query",
176 Count: -1,
177 },
178 expectErr: true,
179 errorMsg: "count must be between 0 and 10000",
180 },
181 {
182 name: "count_zero_valid",
183 request: SearchRequest{
184 Query: "test query",
185 Count: 0,
186 },
187 expectErr: false,
188 },
189 {
190 name: "count_too_high",
191 request: SearchRequest{
192 Query: "test query",
193 Count: 10001,
194 },
195 expectErr: true,
196 errorMsg: "count must be between 0 and 10000",
197 },
198 {
199 name: "unsafe_query_too_long",
200 request: SearchRequest{
201 Query: string(make([]byte, 10001)),
202 },
203 expectErr: true,
204 errorMsg: "query is too long",
205 },
206 {
207 name: "unsafe_query_url",
208 request: SearchRequest{
209 Query: "http://example.com",
210 },
211 expectErr: true,
212 errorMsg: "query appears to contain a URL",
213 },
214 {
215 name: "unsafe_query_unmatched_parens",
216 request: SearchRequest{
217 Query: "test (query",
218 },
219 expectErr: true,
220 errorMsg: "unmatched parentheses",
221 },
222 }
223
224 for _, tt := range tests {
225 t.Run(tt.name, func(t *testing.T) {
226 err := validator.ValidateSearchRequest(tt.request)
227 if tt.expectErr {
228 assert.Error(t, err)
229 assert.Contains(t, err.Error(), tt.errorMsg)
230 } else {
231 assert.NoError(t, err)
232 }
233 })
234 }
235}
236
237func TestValidateMetadataRequest(t *testing.T) {
238 logger := zerolog.Nop()
239 validator := New(logger)
240
241 tests := []struct {
242 name string
243 request MetadataRequest
244 expectErr bool
245 errorMsg string
246 }{
247 {
248 name: "valid_list_action",
249 request: MetadataRequest{
250 Action: "list",
251 },
252 expectErr: false,
253 },
254 {
255 name: "valid_statistics_action",
256 request: MetadataRequest{
257 Action: "statistics",
258 Corpus: "test-corpus",
259 },
260 expectErr: false,
261 },
262 {
263 name: "empty_action",
264 request: MetadataRequest{
265 Action: "",
266 },
267 expectErr: true,
268 errorMsg: "action is required and cannot be empty",
269 },
270 {
271 name: "whitespace_only_action",
272 request: MetadataRequest{
273 Action: " ",
274 },
275 expectErr: true,
276 errorMsg: "action is required and cannot be empty",
277 },
278 {
279 name: "invalid_action",
280 request: MetadataRequest{
281 Action: "invalid",
282 },
283 expectErr: true,
284 errorMsg: "invalid action",
285 },
286 {
287 name: "invalid_corpus_id",
288 request: MetadataRequest{
289 Action: "statistics",
Akron8db31c32025-06-17 12:22:41 +0200290 Corpus: "invalid@corpus#format",
Akron81f709c2025-06-12 17:30:55 +0200291 },
292 expectErr: true,
Akron8db31c32025-06-17 12:22:41 +0200293 errorMsg: "collection query contains invalid characters",
Akron81f709c2025-06-12 17:30:55 +0200294 },
295 }
296
297 for _, tt := range tests {
298 t.Run(tt.name, func(t *testing.T) {
299 err := validator.ValidateMetadataRequest(tt.request)
300 if tt.expectErr {
301 assert.Error(t, err)
302 assert.Contains(t, err.Error(), tt.errorMsg)
303 } else {
304 assert.NoError(t, err)
305 }
306 })
307 }
308}
309
310func TestValidateSearchResponse(t *testing.T) {
311 logger := zerolog.Nop()
312 validator := New(logger)
313
314 tests := []struct {
315 name string
316 response *service.SearchResponse
317 expectErr bool
318 errorMsg string
319 }{
320 {
321 name: "nil_response",
322 response: nil,
323 expectErr: true,
324 errorMsg: "search response is nil",
325 },
326 {
327 name: "valid_response",
328 response: &service.SearchResponse{
329 Meta: service.SearchMeta{
330 TotalResults: 100,
331 Count: 10,
332 StartIndex: 0,
333 ItemsPerPage: 10,
334 },
335 Query: service.SearchQuery{
336 Query: "test",
337 QueryLang: "poliqarp",
338 },
339 Matches: []service.SearchMatch{
340 {MatchID: "match1", TextSigle: "text1", Position: 0},
341 {MatchID: "match2", TextSigle: "text2", Position: 1},
342 },
343 },
344 expectErr: false,
345 },
346 {
347 name: "negative_total_results",
348 response: &service.SearchResponse{
349 Meta: service.SearchMeta{
350 TotalResults: -1,
351 Count: 10,
352 StartIndex: 0,
353 ItemsPerPage: 10,
354 },
355 },
356 expectErr: true,
357 errorMsg: "totalResults cannot be negative",
358 },
359 {
360 name: "negative_count",
361 response: &service.SearchResponse{
362 Meta: service.SearchMeta{
363 TotalResults: 100,
364 Count: -1,
365 StartIndex: 0,
366 ItemsPerPage: 10,
367 },
368 },
369 expectErr: true,
370 errorMsg: "count cannot be negative",
371 },
372 {
373 name: "negative_start_index",
374 response: &service.SearchResponse{
375 Meta: service.SearchMeta{
376 TotalResults: 100,
377 Count: 10,
378 StartIndex: -1,
379 ItemsPerPage: 10,
380 },
381 },
382 expectErr: true,
383 errorMsg: "startIndex cannot be negative",
384 },
385 {
386 name: "negative_items_per_page",
387 response: &service.SearchResponse{
388 Meta: service.SearchMeta{
389 TotalResults: 100,
390 Count: 10,
391 StartIndex: 0,
392 ItemsPerPage: -1,
393 },
394 },
395 expectErr: true,
396 errorMsg: "itemsPerPage cannot be negative",
397 },
398 {
399 name: "match_missing_id",
400 response: &service.SearchResponse{
401 Meta: service.SearchMeta{
402 TotalResults: 100,
403 Count: 10,
404 StartIndex: 0,
405 ItemsPerPage: 10,
406 },
407 Matches: []service.SearchMatch{
408 {MatchID: "", TextSigle: "text1", Position: 0},
409 },
410 },
411 expectErr: true,
412 errorMsg: "match ID is required",
413 },
414 {
415 name: "match_missing_text_sigle",
416 response: &service.SearchResponse{
417 Meta: service.SearchMeta{
418 TotalResults: 100,
419 Count: 10,
420 StartIndex: 0,
421 ItemsPerPage: 10,
422 },
423 Matches: []service.SearchMatch{
424 {MatchID: "match1", TextSigle: "", Position: 0},
425 },
426 },
427 expectErr: true,
428 errorMsg: "textSigle is required",
429 },
430 {
431 name: "match_negative_position",
432 response: &service.SearchResponse{
433 Meta: service.SearchMeta{
434 TotalResults: 100,
435 Count: 10,
436 StartIndex: 0,
437 ItemsPerPage: 10,
438 },
439 Matches: []service.SearchMatch{
440 {MatchID: "match1", TextSigle: "text1", Position: -1},
441 },
442 },
443 expectErr: true,
444 errorMsg: "position cannot be negative",
445 },
446 }
447
448 for _, tt := range tests {
449 t.Run(tt.name, func(t *testing.T) {
450 err := validator.ValidateSearchResponse(tt.response)
451 if tt.expectErr {
452 assert.Error(t, err)
453 assert.Contains(t, err.Error(), tt.errorMsg)
454 } else {
455 assert.NoError(t, err)
456 }
457 })
458 }
459}
460
461func TestValidateCorpusListResponse(t *testing.T) {
462 logger := zerolog.Nop()
463 validator := New(logger)
464
465 tests := []struct {
466 name string
467 response *service.CorpusListResponse
468 expectErr bool
469 errorMsg string
470 }{
471 {
472 name: "nil_response",
473 response: nil,
474 expectErr: true,
475 errorMsg: "corpus list response is nil",
476 },
477 {
478 name: "valid_response",
479 response: &service.CorpusListResponse{
480 Corpora: []service.CorpusInfo{
481 {
482 ID: "corpus1",
483 Name: "Test Corpus 1",
484 Documents: 100,
485 Tokens: 50000,
486 },
487 {
488 ID: "corpus2",
489 Name: "Test Corpus 2",
490 Documents: 200,
491 Tokens: 75000,
492 },
493 },
494 },
495 expectErr: false,
496 },
497 {
498 name: "empty_corpus_list",
499 response: &service.CorpusListResponse{
500 Corpora: []service.CorpusInfo{},
501 },
502 expectErr: false,
503 },
504 {
505 name: "corpus_missing_id",
506 response: &service.CorpusListResponse{
507 Corpora: []service.CorpusInfo{
508 {
509 ID: "",
510 Name: "Test Corpus",
511 Documents: 100,
512 Tokens: 50000,
513 },
514 },
515 },
516 expectErr: true,
517 errorMsg: "corpus ID is required",
518 },
519 {
520 name: "corpus_invalid_id",
521 response: &service.CorpusListResponse{
522 Corpora: []service.CorpusInfo{
523 {
Akron8db31c32025-06-17 12:22:41 +0200524 ID: "invalid@corpus#format",
Akron81f709c2025-06-12 17:30:55 +0200525 Name: "Test Corpus",
526 Documents: 100,
527 Tokens: 50000,
528 },
529 },
530 },
531 expectErr: true,
Akron8db31c32025-06-17 12:22:41 +0200532 errorMsg: "collection query contains invalid characters",
Akron81f709c2025-06-12 17:30:55 +0200533 },
534 {
535 name: "corpus_missing_name",
536 response: &service.CorpusListResponse{
537 Corpora: []service.CorpusInfo{
538 {
539 ID: "corpus1",
540 Name: "",
541 Documents: 100,
542 Tokens: 50000,
543 },
544 },
545 },
546 expectErr: true,
547 errorMsg: "corpus name is required",
548 },
549 {
550 name: "corpus_negative_documents",
551 response: &service.CorpusListResponse{
552 Corpora: []service.CorpusInfo{
553 {
554 ID: "corpus1",
555 Name: "Test Corpus",
556 Documents: -1,
557 Tokens: 50000,
558 },
559 },
560 },
561 expectErr: true,
562 errorMsg: "document count cannot be negative",
563 },
564 {
565 name: "corpus_negative_tokens",
566 response: &service.CorpusListResponse{
567 Corpora: []service.CorpusInfo{
568 {
569 ID: "corpus1",
570 Name: "Test Corpus",
571 Documents: 100,
572 Tokens: -1,
573 },
574 },
575 },
576 expectErr: true,
577 errorMsg: "token count cannot be negative",
578 },
579 }
580
581 for _, tt := range tests {
582 t.Run(tt.name, func(t *testing.T) {
583 err := validator.ValidateCorpusListResponse(tt.response)
584 if tt.expectErr {
585 assert.Error(t, err)
586 assert.Contains(t, err.Error(), tt.errorMsg)
587 } else {
588 assert.NoError(t, err)
589 }
590 })
591 }
592}
593
594func TestValidateStatisticsResponse(t *testing.T) {
595 logger := zerolog.Nop()
596 validator := New(logger)
597
598 tests := []struct {
599 name string
600 response *service.StatisticsResponse
601 expectErr bool
602 errorMsg string
603 }{
604 {
605 name: "nil_response",
606 response: nil,
607 expectErr: true,
608 errorMsg: "statistics response is nil",
609 },
610 {
611 name: "valid_response",
612 response: &service.StatisticsResponse{
613 Documents: 100,
614 Tokens: 50000,
615 Sentences: 2500,
616 Paragraphs: 500,
617 },
618 expectErr: false,
619 },
620 {
621 name: "negative_documents",
622 response: &service.StatisticsResponse{
623 Documents: -1,
624 Tokens: 50000,
625 },
626 expectErr: true,
627 errorMsg: "document count cannot be negative",
628 },
629 {
630 name: "negative_tokens",
631 response: &service.StatisticsResponse{
632 Documents: 100,
633 Tokens: -1,
634 },
635 expectErr: true,
636 errorMsg: "token count cannot be negative",
637 },
638 {
639 name: "negative_sentences",
640 response: &service.StatisticsResponse{
641 Documents: 100,
642 Tokens: 50000,
643 Sentences: -1,
644 },
645 expectErr: true,
646 errorMsg: "sentence count cannot be negative",
647 },
648 {
649 name: "negative_paragraphs",
650 response: &service.StatisticsResponse{
651 Documents: 100,
652 Tokens: 50000,
653 Paragraphs: -1,
654 },
655 expectErr: true,
656 errorMsg: "paragraph count cannot be negative",
657 },
658 }
659
660 for _, tt := range tests {
661 t.Run(tt.name, func(t *testing.T) {
662 err := validator.ValidateStatisticsResponse(tt.response)
663 if tt.expectErr {
664 assert.Error(t, err)
665 assert.Contains(t, err.Error(), tt.errorMsg)
666 } else {
667 assert.NoError(t, err)
668 }
669 })
670 }
671}
672
673func TestValidateQuerySafety(t *testing.T) {
674 logger := zerolog.Nop()
675 validator := New(logger)
676
677 tests := []struct {
678 name string
679 query string
680 expectErr bool
681 errorMsg string
682 }{
683 {
684 name: "valid_query",
685 query: "test query",
686 expectErr: false,
687 },
688 {
689 name: "query_too_long",
690 query: string(make([]byte, 10001)),
691 expectErr: true,
692 errorMsg: "query is too long",
693 },
694 {
695 name: "query_with_url",
696 query: "http://example.com",
697 expectErr: true,
698 errorMsg: "query appears to contain a URL",
699 },
700 {
701 name: "query_with_https_url",
702 query: "https://example.com",
703 expectErr: true,
704 errorMsg: "query appears to contain a URL",
705 },
706 {
707 name: "query_unmatched_open_paren",
708 query: "test (query",
709 expectErr: true,
710 errorMsg: "unmatched parentheses",
711 },
712 {
713 name: "query_unmatched_close_paren",
714 query: "test query)",
715 expectErr: true,
716 errorMsg: "unmatched parentheses",
717 },
718 {
719 name: "query_too_many_nested_parens",
720 query: "(" + string(make([]byte, 100)) + ")" + "(" + string(make([]byte, 100)) + ")",
721 expectErr: false, // This should be under the limit
722 },
723 }
724
725 for _, tt := range tests {
726 t.Run(tt.name, func(t *testing.T) {
727 err := validator.validateQuerySafety(tt.query)
728 if tt.expectErr {
729 assert.Error(t, err)
730 assert.Contains(t, err.Error(), tt.errorMsg)
731 } else {
732 assert.NoError(t, err)
733 }
734 })
735 }
736}
737
738func TestValidateCorpusID(t *testing.T) {
739 logger := zerolog.Nop()
740 validator := New(logger)
741
742 tests := []struct {
743 name string
744 corpusID string
745 expectErr bool
746 errorMsg string
747 }{
748 {
749 name: "valid_corpus_id",
750 corpusID: "test-corpus_1.0",
751 expectErr: false,
752 },
753 {
754 name: "empty_corpus_id",
755 corpusID: "",
756 expectErr: true,
757 errorMsg: "corpus ID cannot be empty",
758 },
759 {
760 name: "corpus_id_too_long",
761 corpusID: string(make([]byte, 101)),
762 expectErr: true,
763 errorMsg: "corpus ID is too long",
764 },
765 {
766 name: "corpus_id_invalid_chars",
Akron8db31c32025-06-17 12:22:41 +0200767 corpusID: "invalid@corpus#format",
Akron81f709c2025-06-12 17:30:55 +0200768 expectErr: true,
Akron8db31c32025-06-17 12:22:41 +0200769 errorMsg: "collection query contains invalid characters",
Akron81f709c2025-06-12 17:30:55 +0200770 },
771 {
772 name: "corpus_id_with_space",
773 corpusID: "corpus with space",
Akron8db31c32025-06-17 12:22:41 +0200774 expectErr: false, // Now allowed with updated regex
775 },
776 {
777 name: "corpus_id_with_boolean_operators",
778 corpusID: "corpus1 & corpus2",
779 expectErr: false, // Now allowed with updated regex
780 },
781 {
782 name: "collection_query_with_metadata",
783 corpusID: "textClass = \"politics\" & pubDate in 2020",
784 expectErr: false, // Collection query syntax
785 },
786 {
787 name: "collection_query_with_regex",
788 corpusID: "corpusSigle = \"DeReKo/WPD*\" & availability = /CC.*/",
789 expectErr: false, // Collection query with regex
790 },
791 {
792 name: "collection_query_complex",
793 corpusID: "(textType = \"news\" | textType = \"blog\") & textClass != \"fiction\"",
794 expectErr: false, // Complex collection query
Akron81f709c2025-06-12 17:30:55 +0200795 },
796 }
797
798 for _, tt := range tests {
799 t.Run(tt.name, func(t *testing.T) {
800 err := validator.validateCorpusID(tt.corpusID)
801 if tt.expectErr {
802 assert.Error(t, err)
803 assert.Contains(t, err.Error(), tt.errorMsg)
804 } else {
805 assert.NoError(t, err)
806 }
807 })
808 }
809}
810
811func TestSanitizeQuery(t *testing.T) {
812 logger := zerolog.Nop()
813 validator := New(logger)
814
815 tests := []struct {
816 name string
817 input string
818 expected string
819 }{
820 {
821 name: "trim_whitespace",
822 input: " test query ",
823 expected: "test query",
824 },
825 {
826 name: "remove_null_bytes",
827 input: "test\x00query",
828 expected: "testquery",
829 },
830 {
831 name: "normalize_whitespace",
832 input: "test query\t\nwith spaces",
833 expected: "test query with spaces",
834 },
835 {
836 name: "empty_string",
837 input: "",
838 expected: "",
839 },
840 {
841 name: "already_clean",
842 input: "test query",
843 expected: "test query",
844 },
845 }
846
847 for _, tt := range tests {
848 t.Run(tt.name, func(t *testing.T) {
849 result := validator.SanitizeQuery(tt.input)
850 assert.Equal(t, tt.expected, result)
851 })
852 }
853}
854
855func TestSanitizeCorpusID(t *testing.T) {
856 logger := zerolog.Nop()
857 validator := New(logger)
858
859 tests := []struct {
860 name string
861 input string
862 expected string
863 }{
864 {
865 name: "trim_whitespace",
866 input: " Test-Corpus ",
867 expected: "test-corpus",
868 },
869 {
870 name: "remove_null_bytes",
871 input: "test\x00corpus",
872 expected: "testcorpus",
873 },
874 {
875 name: "lowercase",
876 input: "Test-Corpus_1.0",
877 expected: "test-corpus_1.0",
878 },
879 {
880 name: "empty_string",
881 input: "",
882 expected: "",
883 },
884 {
885 name: "already_clean",
886 input: "test-corpus",
887 expected: "test-corpus",
888 },
889 }
890
891 for _, tt := range tests {
892 t.Run(tt.name, func(t *testing.T) {
893 result := validator.SanitizeCorpusID(tt.input)
894 assert.Equal(t, tt.expected, result)
895 })
896 }
897}