blob: da730819b907ffeadcbc400c7a105bc418f3bdb4 [file] [log] [blame]
Joachim Bingeldbbde772014-05-12 15:26:10 +00001grammar CollectionQuery;
2
3@header {package de.ids_mannheim.korap.query.serialize.util;}
4
5/*
6 -- author: jbingel
Joachim Bingel787836a2014-08-07 14:50:18 +00007 -- date: 2014-05-11
Joachim Bingeldbbde772014-05-12 15:26:10 +00008*/
9
10/*
11 * LEXER SECTION
12 */
Joachim Bingel787836a2014-08-07 14:50:18 +000013/*
14 Regular expression
15 /x allows submatches like /^.*?RE.*?$/
16 /X forces full matches
17 /i means case insensitivity
18 /I forces case sensitivity
19*/
20FLAG_xi : '/' ( ('x'|'X') ('i'|'I')? );
21FLAG_ix : '/' ( ('i'|'I') ('x'|'X')? );
22
23
Joachim Bingel43607ed2014-05-19 12:39:55 +000024LRB : '(';
25RRB : ')';
Joachim Bingel787836a2014-08-07 14:50:18 +000026LB : '[';
27RB : ']';
Joachim Bingel43607ed2014-05-19 12:39:55 +000028LT : '<';
29GT : '>';
30LEQ : '<=';
31GEQ : '>=';
32EQ : '=';
Joachim Bingel43607ed2014-05-19 12:39:55 +000033AND : '&' | 'AND' | 'and' | 'UND' | 'und' ;
34OR : '|' | 'OR' | 'or' | 'ODER' | 'oder' ;
Joachim Bingel787836a2014-08-07 14:50:18 +000035NEG : '!';
Joachim Bingela3f51f72014-07-22 14:45:31 +000036QMARK : '?';
37SLASH : '/';
Joachim Bingel787836a2014-08-07 14:50:18 +000038COLON : ':';
Joachim Bingel624854b2014-07-23 13:53:28 +000039DASH : '-';
Joachim Bingel787836a2014-08-07 14:50:18 +000040TILDE : '~';
Joachim Bingel3fa584b2014-12-17 13:35:43 +000041NEGTILDE : '!~';
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +000042SINCE : 'since';
43UNTIL : 'until';
44IN : 'in';
45ON : 'on';
Joachim Bingeldbbde772014-05-12 15:26:10 +000046WS : ( ' ' | '\t' | '\r' | '\n' )+ -> skip ;
Joachim Bingeldbbde772014-05-12 15:26:10 +000047fragment NO_RE : ~[ \t\/];
48fragment ALPHABET : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
49 | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' );
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000050fragment ALPHA : [a-zA-Z];
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000051
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +000052
53DIGIT : [0-9];
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000054DATE
55: DIGIT DIGIT DIGIT DIGIT (DASH DIGIT DIGIT (DASH DIGIT DIGIT)?)?
56;
Joachim Bingeldbbde772014-05-12 15:26:10 +000057
58NL : [\r\n] -> skip;
59ws : WS+;
60
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +000061WORD : ALPHABET+;
62//WORD : ALPHABET* ALPHA ALPHABET*; // needs to have at least one alphabetical letter (non-numeric)
63
Joachim Bingel787836a2014-08-07 14:50:18 +000064
Joachim Bingel43607ed2014-05-19 12:39:55 +000065/*
Joachim Bingela3f51f72014-07-22 14:45:31 +000066 * Regular expressions
Joachim Bingel43607ed2014-05-19 12:39:55 +000067 */
Joachim Bingela3f51f72014-07-22 14:45:31 +000068fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
Joachim Bingel43607ed2014-05-19 12:39:55 +000069fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']' | '/'
70 | '(' | ')' | '|' | '"' | ':' | '\'' | '\\');
71fragment RE_alter : ((RE_char | ('(' REGEX ')') | RE_chgroup) '|' REGEX )+;
72fragment RE_chgroup : '[' RE_char+ ']';
Joachim Bingela3f51f72014-07-22 14:45:31 +000073fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?;
Joachim Bingel43607ed2014-05-19 12:39:55 +000074fragment RE_opt : (RE_char | RE_chgroup | ( '(' REGEX ')')) '?';
75fragment RE_star : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*';
76fragment RE_plus : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+';
77fragment RE_occ : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC;
78fragment RE_group : '(' REGEX ')';
Joachim Bingela3f51f72014-07-22 14:45:31 +000079REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH;
Joachim Bingeldbbde772014-05-12 15:26:10 +000080
81/*
82 * PARSER SECTION
83 */
84
Joachim Bingel43607ed2014-05-19 12:39:55 +000085regex
Joachim Bingel624854b2014-07-23 13:53:28 +000086: REGEX
87;
88
89date
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000090: DATE
Joachim Bingel43607ed2014-05-19 12:39:55 +000091;
92
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +000093dateOp
94: SINCE
95| UNTIL
96| IN
97| ON
98;
99
Joachim Bingeldbbde772014-05-12 15:26:10 +0000100operator
Joachim Bingel3fa584b2014-12-17 13:35:43 +0000101: (NEG? EQ) | LT | GT | LEQ | GEQ | TILDE | NEGTILDE;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000102
103expr
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +0000104: constraint
105| dateconstraint
Joachim Bingel787836a2014-08-07 14:50:18 +0000106| token
107;
108
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +0000109dateconstraint
110: field dateOp date
111//| date dateOp field dateOp date
112;
113
114constraint
115: field operator value
Joachim Bingel787836a2014-08-07 14:50:18 +0000116;
117
118token
119: LB (term|termGroup) RB
120;
121
122term
123: NEG* (foundry SLASH)? layer termOp key (COLON value)? flag?
124;
125
126termOp
127: (NEG? EQ? EQ | NEG? TILDE? TILDE)
128;
129
130termGroup
131: (term | LRB termGroup RRB) booleanOp (term | LRB termGroup RRB | termGroup)
132;
133
134key
135: WORD
136| regex
Joachim Bingel787836a2014-08-07 14:50:18 +0000137;
138
139foundry
140: WORD
141;
142
143layer
144: WORD
145;
146
147booleanOp
148: AND
149| OR
150;
151
152flag
153: FLAG_xi
154| FLAG_ix
Joachim Bingeldbbde772014-05-12 15:26:10 +0000155;
156
157field
Joachim Bingel624854b2014-07-23 13:53:28 +0000158: WORD
Joachim Bingela3f51f72014-07-22 14:45:31 +0000159;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000160
161value
Joachim Bingel4bf1ec52014-10-08 08:18:03 +0000162: WORD
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +0000163| DIGIT+
164| DATE
Joachim Bingel82e4ca72014-10-27 11:03:38 +0000165| multiword
Joachim Bingela3f51f72014-07-22 14:45:31 +0000166| regex
167;
168
Joachim Bingel82e4ca72014-10-27 11:03:38 +0000169multiword
170: '"' WORD+ '"'
171;
172
Joachim Bingela3f51f72014-07-22 14:45:31 +0000173relation
Joachim Bingel787836a2014-08-07 14:50:18 +0000174: (expr|exprGroup) booleanOp (expr|exprGroup|relation)
Joachim Bingela3f51f72014-07-22 14:45:31 +0000175;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000176
177exprGroup
Joachim Bingela3f51f72014-07-22 14:45:31 +0000178: LRB (expr | exprGroup | relation) RRB
Joachim Bingeldbbde772014-05-12 15:26:10 +0000179;
180
181start
Joachim Bingel787836a2014-08-07 14:50:18 +0000182: expr EOF
183| exprGroup EOF
184| relation EOF
Joachim Bingeldbbde772014-05-12 15:26:10 +0000185;