| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 1 | grammar CollectionQuery; |
| 2 | |||||
| 3 | @header {package de.ids_mannheim.korap.query.serialize.util;} | ||||
| 4 | |||||
| 5 | /* | ||||
| 6 | -- author: jbingel | ||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 7 | -- date: 2014-05-11 |
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 8 | */ |
| 9 | |||||
| 10 | /* | ||||
| 11 | * LEXER SECTION | ||||
| 12 | */ | ||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 13 | /* |
| 14 | Regular expression | ||||
| 15 | /x allows submatches like /^.*?RE.*?$/ | ||||
| 16 | /X forces full matches | ||||
| 17 | /i means case insensitivity | ||||
| 18 | /I forces case sensitivity | ||||
| 19 | */ | ||||
| 20 | FLAG_xi : '/' ( ('x'|'X') ('i'|'I')? ); | ||||
| 21 | FLAG_ix : '/' ( ('i'|'I') ('x'|'X')? ); | ||||
| 22 | |||||
| 23 | |||||
| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 24 | LRB : '('; |
| 25 | RRB : ')'; | ||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 26 | LB : '['; |
| 27 | RB : ']'; | ||||
| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 28 | LT : '<'; |
| 29 | GT : '>'; | ||||
| 30 | LEQ : '<='; | ||||
| 31 | GEQ : '>='; | ||||
| 32 | EQ : '='; | ||||
| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 33 | AND : '&' | 'AND' | 'and' | 'UND' | 'und' ; |
| 34 | OR : '|' | 'OR' | 'or' | 'ODER' | 'oder' ; | ||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 35 | NEG : '!'; |
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 36 | QMARK : '?'; |
| 37 | SLASH : '/'; | ||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 38 | COLON : ':'; |
| Joachim Bingel | 624854b | 2014-07-23 13:53:28 +0000 | [diff] [blame] | 39 | DASH : '-'; |
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 40 | TILDE : '~'; |
| Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame^] | 41 | SINCE : 'since'; |
| 42 | UNTIL : 'until'; | ||||
| 43 | IN : 'in'; | ||||
| 44 | ON : 'on'; | ||||
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 45 | WS : ( ' ' | '\t' | '\r' | '\n' )+ -> skip ; |
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 46 | fragment NO_RE : ~[ \t\/]; |
| 47 | fragment ALPHABET : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']' | ||||
| 48 | | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' ); | ||||
| Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 49 | fragment ALPHA : [a-zA-Z]; |
| Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 50 | |
| Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame^] | 51 | |
| 52 | DIGIT : [0-9]; | ||||
| Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 53 | DATE |
| 54 | : DIGIT DIGIT DIGIT DIGIT (DASH DIGIT DIGIT (DASH DIGIT DIGIT)?)? | ||||
| 55 | ; | ||||
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 56 | |
| 57 | NL : [\r\n] -> skip; | ||||
| 58 | ws : WS+; | ||||
| 59 | |||||
| Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame^] | 60 | WORD : ALPHABET+; |
| 61 | //WORD : ALPHABET* ALPHA ALPHABET*; // needs to have at least one alphabetical letter (non-numeric) | ||||
| 62 | |||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 63 | |
| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 64 | /* |
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 65 | * Regular expressions |
| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 66 | */ |
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 67 | fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}'; |
| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 68 | fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']' | '/' |
| 69 | | '(' | ')' | '|' | '"' | ':' | '\'' | '\\'); | ||||
| 70 | fragment RE_alter : ((RE_char | ('(' REGEX ')') | RE_chgroup) '|' REGEX )+; | ||||
| 71 | fragment RE_chgroup : '[' RE_char+ ']'; | ||||
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 72 | fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?; |
| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 73 | fragment RE_opt : (RE_char | RE_chgroup | ( '(' REGEX ')')) '?'; |
| 74 | fragment RE_star : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*'; | ||||
| 75 | fragment RE_plus : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+'; | ||||
| 76 | fragment RE_occ : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC; | ||||
| 77 | fragment RE_group : '(' REGEX ')'; | ||||
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 78 | REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH; |
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 79 | |
| 80 | /* | ||||
| 81 | * PARSER SECTION | ||||
| 82 | */ | ||||
| 83 | |||||
| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 84 | regex |
| Joachim Bingel | 624854b | 2014-07-23 13:53:28 +0000 | [diff] [blame] | 85 | : REGEX |
| 86 | ; | ||||
| 87 | |||||
| 88 | date | ||||
| Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 89 | : DATE |
| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 90 | ; |
| 91 | |||||
| Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame^] | 92 | dateOp |
| 93 | : SINCE | ||||
| 94 | | UNTIL | ||||
| 95 | | IN | ||||
| 96 | | ON | ||||
| 97 | ; | ||||
| 98 | |||||
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 99 | operator |
| Joachim Bingel | a499e92 | 2014-10-08 13:32:50 +0000 | [diff] [blame] | 100 | : (NEG? EQ) | LT | GT | LEQ | GEQ | TILDE; |
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 101 | |
| 102 | expr | ||||
| Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame^] | 103 | : constraint |
| 104 | | dateconstraint | ||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 105 | | token |
| 106 | ; | ||||
| 107 | |||||
| Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame^] | 108 | dateconstraint |
| 109 | : field dateOp date | ||||
| 110 | //| date dateOp field dateOp date | ||||
| 111 | ; | ||||
| 112 | |||||
| 113 | constraint | ||||
| 114 | : field operator value | ||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 115 | ; |
| 116 | |||||
| 117 | token | ||||
| 118 | : LB (term|termGroup) RB | ||||
| 119 | ; | ||||
| 120 | |||||
| 121 | term | ||||
| 122 | : NEG* (foundry SLASH)? layer termOp key (COLON value)? flag? | ||||
| 123 | ; | ||||
| 124 | |||||
| 125 | termOp | ||||
| 126 | : (NEG? EQ? EQ | NEG? TILDE? TILDE) | ||||
| 127 | ; | ||||
| 128 | |||||
| 129 | termGroup | ||||
| 130 | : (term | LRB termGroup RRB) booleanOp (term | LRB termGroup RRB | termGroup) | ||||
| 131 | ; | ||||
| 132 | |||||
| 133 | key | ||||
| 134 | : WORD | ||||
| 135 | | regex | ||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 136 | ; |
| 137 | |||||
| 138 | foundry | ||||
| 139 | : WORD | ||||
| 140 | ; | ||||
| 141 | |||||
| 142 | layer | ||||
| 143 | : WORD | ||||
| 144 | ; | ||||
| 145 | |||||
| 146 | booleanOp | ||||
| 147 | : AND | ||||
| 148 | | OR | ||||
| 149 | ; | ||||
| 150 | |||||
| 151 | flag | ||||
| 152 | : FLAG_xi | ||||
| 153 | | FLAG_ix | ||||
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 154 | ; |
| 155 | |||||
| 156 | field | ||||
| Joachim Bingel | 624854b | 2014-07-23 13:53:28 +0000 | [diff] [blame] | 157 | : WORD |
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 158 | ; |
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 159 | |
| 160 | value | ||||
| Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 161 | : WORD |
| Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame^] | 162 | | DIGIT+ |
| 163 | | DATE | ||||
| Joachim Bingel | 82e4ca7 | 2014-10-27 11:03:38 +0000 | [diff] [blame] | 164 | | multiword |
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 165 | | regex |
| 166 | ; | ||||
| 167 | |||||
| Joachim Bingel | 82e4ca7 | 2014-10-27 11:03:38 +0000 | [diff] [blame] | 168 | multiword |
| 169 | : '"' WORD+ '"' | ||||
| 170 | ; | ||||
| 171 | |||||
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 172 | relation |
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 173 | : (expr|exprGroup) booleanOp (expr|exprGroup|relation) |
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 174 | ; |
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 175 | |
| 176 | exprGroup | ||||
| Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 177 | : LRB (expr | exprGroup | relation) RRB |
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 178 | ; |
| 179 | |||||
| 180 | start | ||||
| Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 181 | : expr EOF |
| 182 | | exprGroup EOF | ||||
| 183 | | relation EOF | ||||
| Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 184 | ; |