Improved parameter validation
Change-Id: If2e7ec1b063a6e114a6c5582463af784b75c37b8
diff --git a/validation/validator.go b/validation/validator.go
index 66b77c8..343e820 100644
--- a/validation/validator.go
+++ b/validation/validator.go
@@ -65,15 +65,11 @@
// Regular expressions for validation
var (
- // Query language validation - KorAP supports poliqarp, cosmas2, annis
- validQueryLanguages = map[string]bool{
- "poliqarp": true,
- "cosmas2": true,
- "annis": true,
- }
+ // Query language validation
+ validQueryLanguages = []string{"poliqarp", "poliqarpplus", "cosmas2", "annis", "cql", "cqp", "fcsql"}
- // Corpus ID validation - alphanumeric with dots, hyphens, underscores
- corpusIDRegex = regexp.MustCompile(`^[a-zA-Z0-9._-]+$`)
+ // Corpus ID validation - KorAP collection queries with metadata fields, operators, and regex
+ corpusIDRegex = regexp.MustCompile(`^[a-zA-Z0-9._\-\s&|!=<>()/*"']+$`)
// Action validation for metadata requests
validMetadataActions = map[string]bool{
@@ -105,15 +101,11 @@
}
// Validate query language if provided
- if req.QueryLanguage != "" && !validQueryLanguages[req.QueryLanguage] {
- var validLangs []string
- for lang := range validQueryLanguages {
- validLangs = append(validLangs, lang)
- }
+ if req.QueryLanguage != "" && !contains(validQueryLanguages, req.QueryLanguage) {
errors = append(errors, ValidationError{
Field: "query_language",
Value: req.QueryLanguage,
- Message: fmt.Sprintf("invalid query language, must be one of: %s", strings.Join(validLangs, ", ")),
+ Message: fmt.Sprintf("invalid query language, must be one of: %s", strings.Join(validQueryLanguages, ", ")),
})
}
@@ -404,7 +396,9 @@
return nil
}
-// validateCorpusID validates a corpus identifier
+// validateCorpusID validates a corpus identifier or collection query
+// This supports both simple corpus sigles (e.g., "DeReKo-2023-I") and complex
+// collection queries with metadata fields (e.g., "textClass = \"politics\" & pubDate in 2020")
func (v *Validator) validateCorpusID(corpusID string) error {
if len(corpusID) == 0 {
return fmt.Errorf("corpus ID cannot be empty")
@@ -415,7 +409,7 @@
}
if !corpusIDRegex.MatchString(corpusID) {
- return fmt.Errorf("corpus ID contains invalid characters (only alphanumeric, dots, hyphens, underscores allowed)")
+ return fmt.Errorf("collection query contains invalid characters (supports alphanumeric, dots, hyphens, underscores, spaces, quotes, operators & | ! = < > in, parentheses, and regex /pattern/)")
}
return nil
@@ -458,3 +452,13 @@
return sanitized
}
+
+// contains checks if a string slice contains a specific value
+func contains(slice []string, item string) bool {
+ for _, s := range slice {
+ if s == item {
+ return true
+ }
+ }
+ return false
+}