Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 1 | grammar CollectionQuery; |
2 | |||||
3 | @header {package de.ids_mannheim.korap.query.serialize.util;} | ||||
4 | |||||
5 | /* | ||||
6 | -- author: jbingel | ||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 7 | -- date: 2014-05-11 |
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 8 | */ |
9 | |||||
10 | /* | ||||
11 | * LEXER SECTION | ||||
12 | */ | ||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 13 | /* |
14 | Regular expression | ||||
15 | /x allows submatches like /^.*?RE.*?$/ | ||||
16 | /X forces full matches | ||||
17 | /i means case insensitivity | ||||
18 | /I forces case sensitivity | ||||
19 | */ | ||||
20 | FLAG_xi : '/' ( ('x'|'X') ('i'|'I')? ); | ||||
21 | FLAG_ix : '/' ( ('i'|'I') ('x'|'X')? ); | ||||
22 | |||||
23 | |||||
Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 24 | LRB : '('; |
25 | RRB : ')'; | ||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 26 | LB : '['; |
27 | RB : ']'; | ||||
Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 28 | LT : '<'; |
29 | GT : '>'; | ||||
30 | LEQ : '<='; | ||||
31 | GEQ : '>='; | ||||
32 | EQ : '='; | ||||
Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 33 | AND : '&' | 'AND' | 'and' | 'UND' | 'und' ; |
34 | OR : '|' | 'OR' | 'or' | 'ODER' | 'oder' ; | ||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 35 | NEG : '!'; |
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 36 | QMARK : '?'; |
37 | SLASH : '/'; | ||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 38 | COLON : ':'; |
Joachim Bingel | 624854b | 2014-07-23 13:53:28 +0000 | [diff] [blame] | 39 | DASH : '-'; |
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 40 | TILDE : '~'; |
Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame^] | 41 | NEGTILDE : '!~'; |
Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame] | 42 | SINCE : 'since'; |
43 | UNTIL : 'until'; | ||||
44 | IN : 'in'; | ||||
45 | ON : 'on'; | ||||
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 46 | WS : ( ' ' | '\t' | '\r' | '\n' )+ -> skip ; |
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 47 | fragment NO_RE : ~[ \t\/]; |
48 | fragment ALPHABET : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']' | ||||
49 | | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' ); | ||||
Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 50 | fragment ALPHA : [a-zA-Z]; |
Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 51 | |
Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame] | 52 | |
53 | DIGIT : [0-9]; | ||||
Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 54 | DATE |
55 | : DIGIT DIGIT DIGIT DIGIT (DASH DIGIT DIGIT (DASH DIGIT DIGIT)?)? | ||||
56 | ; | ||||
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 57 | |
58 | NL : [\r\n] -> skip; | ||||
59 | ws : WS+; | ||||
60 | |||||
Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame] | 61 | WORD : ALPHABET+; |
62 | //WORD : ALPHABET* ALPHA ALPHABET*; // needs to have at least one alphabetical letter (non-numeric) | ||||
63 | |||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 64 | |
Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 65 | /* |
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 66 | * Regular expressions |
Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 67 | */ |
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 68 | fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}'; |
Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 69 | fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']' | '/' |
70 | | '(' | ')' | '|' | '"' | ':' | '\'' | '\\'); | ||||
71 | fragment RE_alter : ((RE_char | ('(' REGEX ')') | RE_chgroup) '|' REGEX )+; | ||||
72 | fragment RE_chgroup : '[' RE_char+ ']'; | ||||
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 73 | fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?; |
Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 74 | fragment RE_opt : (RE_char | RE_chgroup | ( '(' REGEX ')')) '?'; |
75 | fragment RE_star : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*'; | ||||
76 | fragment RE_plus : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+'; | ||||
77 | fragment RE_occ : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC; | ||||
78 | fragment RE_group : '(' REGEX ')'; | ||||
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 79 | REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH; |
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 80 | |
81 | /* | ||||
82 | * PARSER SECTION | ||||
83 | */ | ||||
84 | |||||
Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 85 | regex |
Joachim Bingel | 624854b | 2014-07-23 13:53:28 +0000 | [diff] [blame] | 86 | : REGEX |
87 | ; | ||||
88 | |||||
89 | date | ||||
Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 90 | : DATE |
Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 91 | ; |
92 | |||||
Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame] | 93 | dateOp |
94 | : SINCE | ||||
95 | | UNTIL | ||||
96 | | IN | ||||
97 | | ON | ||||
98 | ; | ||||
99 | |||||
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 100 | operator |
Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame^] | 101 | : (NEG? EQ) | LT | GT | LEQ | GEQ | TILDE | NEGTILDE; |
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 102 | |
103 | expr | ||||
Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame] | 104 | : constraint |
105 | | dateconstraint | ||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 106 | | token |
107 | ; | ||||
108 | |||||
Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame] | 109 | dateconstraint |
110 | : field dateOp date | ||||
111 | //| date dateOp field dateOp date | ||||
112 | ; | ||||
113 | |||||
114 | constraint | ||||
115 | : field operator value | ||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 116 | ; |
117 | |||||
118 | token | ||||
119 | : LB (term|termGroup) RB | ||||
120 | ; | ||||
121 | |||||
122 | term | ||||
123 | : NEG* (foundry SLASH)? layer termOp key (COLON value)? flag? | ||||
124 | ; | ||||
125 | |||||
126 | termOp | ||||
127 | : (NEG? EQ? EQ | NEG? TILDE? TILDE) | ||||
128 | ; | ||||
129 | |||||
130 | termGroup | ||||
131 | : (term | LRB termGroup RRB) booleanOp (term | LRB termGroup RRB | termGroup) | ||||
132 | ; | ||||
133 | |||||
134 | key | ||||
135 | : WORD | ||||
136 | | regex | ||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 137 | ; |
138 | |||||
139 | foundry | ||||
140 | : WORD | ||||
141 | ; | ||||
142 | |||||
143 | layer | ||||
144 | : WORD | ||||
145 | ; | ||||
146 | |||||
147 | booleanOp | ||||
148 | : AND | ||||
149 | | OR | ||||
150 | ; | ||||
151 | |||||
152 | flag | ||||
153 | : FLAG_xi | ||||
154 | | FLAG_ix | ||||
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 155 | ; |
156 | |||||
157 | field | ||||
Joachim Bingel | 624854b | 2014-07-23 13:53:28 +0000 | [diff] [blame] | 158 | : WORD |
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 159 | ; |
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 160 | |
161 | value | ||||
Joachim Bingel | 4bf1ec5 | 2014-10-08 08:18:03 +0000 | [diff] [blame] | 162 | : WORD |
Joachim Bingel | 1f4c5ad | 2014-12-16 10:40:42 +0000 | [diff] [blame] | 163 | | DIGIT+ |
164 | | DATE | ||||
Joachim Bingel | 82e4ca7 | 2014-10-27 11:03:38 +0000 | [diff] [blame] | 165 | | multiword |
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 166 | | regex |
167 | ; | ||||
168 | |||||
Joachim Bingel | 82e4ca7 | 2014-10-27 11:03:38 +0000 | [diff] [blame] | 169 | multiword |
170 | : '"' WORD+ '"' | ||||
171 | ; | ||||
172 | |||||
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 173 | relation |
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 174 | : (expr|exprGroup) booleanOp (expr|exprGroup|relation) |
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 175 | ; |
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 176 | |
177 | exprGroup | ||||
Joachim Bingel | a3f51f7 | 2014-07-22 14:45:31 +0000 | [diff] [blame] | 178 | : LRB (expr | exprGroup | relation) RRB |
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 179 | ; |
180 | |||||
181 | start | ||||
Joachim Bingel | 787836a | 2014-08-07 14:50:18 +0000 | [diff] [blame] | 182 | : expr EOF |
183 | | exprGroup EOF | ||||
184 | | relation EOF | ||||
Joachim Bingel | dbbde77 | 2014-05-12 15:26:10 +0000 | [diff] [blame] | 185 | ; |