blob: f46bbbfc851e31c7b6a015cf27313a08d8e20fcc [file] [log] [blame]
Joachim Bingeldbbde772014-05-12 15:26:10 +00001grammar CollectionQuery;
2
3@header {package de.ids_mannheim.korap.query.serialize.util;}
4
5/*
6 -- author: jbingel
Joachim Bingel787836a2014-08-07 14:50:18 +00007 -- date: 2014-05-11
Joachim Bingeldbbde772014-05-12 15:26:10 +00008*/
9
10/*
11 * LEXER SECTION
12 */
Joachim Bingel787836a2014-08-07 14:50:18 +000013/*
14 Regular expression
15 /x allows submatches like /^.*?RE.*?$/
16 /X forces full matches
17 /i means case insensitivity
18 /I forces case sensitivity
19*/
20FLAG_xi : '/' ( ('x'|'X') ('i'|'I')? );
21FLAG_ix : '/' ( ('i'|'I') ('x'|'X')? );
22
23
Joachim Bingel43607ed2014-05-19 12:39:55 +000024LRB : '(';
25RRB : ')';
Joachim Bingel787836a2014-08-07 14:50:18 +000026LB : '[';
27RB : ']';
Joachim Bingel43607ed2014-05-19 12:39:55 +000028LT : '<';
29GT : '>';
30LEQ : '<=';
31GEQ : '>=';
32EQ : '=';
Joachim Bingel43607ed2014-05-19 12:39:55 +000033AND : '&' | 'AND' | 'and' | 'UND' | 'und' ;
34OR : '|' | 'OR' | 'or' | 'ODER' | 'oder' ;
Joachim Bingel787836a2014-08-07 14:50:18 +000035NEG : '!';
Joachim Bingela3f51f72014-07-22 14:45:31 +000036QMARK : '?';
37SLASH : '/';
Joachim Bingel787836a2014-08-07 14:50:18 +000038COLON : ':';
Joachim Bingel624854b2014-07-23 13:53:28 +000039DASH : '-';
Joachim Bingel787836a2014-08-07 14:50:18 +000040TILDE : '~';
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +000041SINCE : 'since';
42UNTIL : 'until';
43IN : 'in';
44ON : 'on';
Joachim Bingeldbbde772014-05-12 15:26:10 +000045WS : ( ' ' | '\t' | '\r' | '\n' )+ -> skip ;
Joachim Bingeldbbde772014-05-12 15:26:10 +000046fragment NO_RE : ~[ \t\/];
47fragment ALPHABET : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
48 | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' );
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000049fragment ALPHA : [a-zA-Z];
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000050
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +000051
52DIGIT : [0-9];
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000053DATE
54: DIGIT DIGIT DIGIT DIGIT (DASH DIGIT DIGIT (DASH DIGIT DIGIT)?)?
55;
Joachim Bingeldbbde772014-05-12 15:26:10 +000056
57NL : [\r\n] -> skip;
58ws : WS+;
59
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +000060WORD : ALPHABET+;
61//WORD : ALPHABET* ALPHA ALPHABET*; // needs to have at least one alphabetical letter (non-numeric)
62
Joachim Bingel787836a2014-08-07 14:50:18 +000063
Joachim Bingel43607ed2014-05-19 12:39:55 +000064/*
Joachim Bingela3f51f72014-07-22 14:45:31 +000065 * Regular expressions
Joachim Bingel43607ed2014-05-19 12:39:55 +000066 */
Joachim Bingela3f51f72014-07-22 14:45:31 +000067fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
Joachim Bingel43607ed2014-05-19 12:39:55 +000068fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']' | '/'
69 | '(' | ')' | '|' | '"' | ':' | '\'' | '\\');
70fragment RE_alter : ((RE_char | ('(' REGEX ')') | RE_chgroup) '|' REGEX )+;
71fragment RE_chgroup : '[' RE_char+ ']';
Joachim Bingela3f51f72014-07-22 14:45:31 +000072fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?;
Joachim Bingel43607ed2014-05-19 12:39:55 +000073fragment RE_opt : (RE_char | RE_chgroup | ( '(' REGEX ')')) '?';
74fragment RE_star : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*';
75fragment RE_plus : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+';
76fragment RE_occ : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC;
77fragment RE_group : '(' REGEX ')';
Joachim Bingela3f51f72014-07-22 14:45:31 +000078REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH;
Joachim Bingeldbbde772014-05-12 15:26:10 +000079
80/*
81 * PARSER SECTION
82 */
83
Joachim Bingel43607ed2014-05-19 12:39:55 +000084regex
Joachim Bingel624854b2014-07-23 13:53:28 +000085: REGEX
86;
87
88date
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000089: DATE
Joachim Bingel43607ed2014-05-19 12:39:55 +000090;
91
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +000092dateOp
93: SINCE
94| UNTIL
95| IN
96| ON
97;
98
Joachim Bingeldbbde772014-05-12 15:26:10 +000099operator
Joachim Bingela499e922014-10-08 13:32:50 +0000100: (NEG? EQ) | LT | GT | LEQ | GEQ | TILDE;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000101
102expr
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +0000103: constraint
104| dateconstraint
Joachim Bingel787836a2014-08-07 14:50:18 +0000105| token
106;
107
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +0000108dateconstraint
109: field dateOp date
110//| date dateOp field dateOp date
111;
112
113constraint
114: field operator value
Joachim Bingel787836a2014-08-07 14:50:18 +0000115;
116
117token
118: LB (term|termGroup) RB
119;
120
121term
122: NEG* (foundry SLASH)? layer termOp key (COLON value)? flag?
123;
124
125termOp
126: (NEG? EQ? EQ | NEG? TILDE? TILDE)
127;
128
129termGroup
130: (term | LRB termGroup RRB) booleanOp (term | LRB termGroup RRB | termGroup)
131;
132
133key
134: WORD
135| regex
Joachim Bingel787836a2014-08-07 14:50:18 +0000136;
137
138foundry
139: WORD
140;
141
142layer
143: WORD
144;
145
146booleanOp
147: AND
148| OR
149;
150
151flag
152: FLAG_xi
153| FLAG_ix
Joachim Bingeldbbde772014-05-12 15:26:10 +0000154;
155
156field
Joachim Bingel624854b2014-07-23 13:53:28 +0000157: WORD
Joachim Bingela3f51f72014-07-22 14:45:31 +0000158;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000159
160value
Joachim Bingel4bf1ec52014-10-08 08:18:03 +0000161: WORD
Joachim Bingel1f4c5ad2014-12-16 10:40:42 +0000162| DIGIT+
163| DATE
Joachim Bingel82e4ca72014-10-27 11:03:38 +0000164| multiword
Joachim Bingela3f51f72014-07-22 14:45:31 +0000165| regex
166;
167
Joachim Bingel82e4ca72014-10-27 11:03:38 +0000168multiword
169: '"' WORD+ '"'
170;
171
Joachim Bingela3f51f72014-07-22 14:45:31 +0000172relation
Joachim Bingel787836a2014-08-07 14:50:18 +0000173: (expr|exprGroup) booleanOp (expr|exprGroup|relation)
Joachim Bingela3f51f72014-07-22 14:45:31 +0000174;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000175
176exprGroup
Joachim Bingela3f51f72014-07-22 14:45:31 +0000177: LRB (expr | exprGroup | relation) RRB
Joachim Bingeldbbde772014-05-12 15:26:10 +0000178;
179
180start
Joachim Bingel787836a2014-08-07 14:50:18 +0000181: expr EOF
182| exprGroup EOF
183| relation EOF
Joachim Bingeldbbde772014-05-12 15:26:10 +0000184;