blob: 3d6aaa8ae79794b1b53e1bbe3168dec4c08e2f88 [file] [log] [blame]
Joachim Bingeldbbde772014-05-12 15:26:10 +00001grammar CollectionQuery;
2
3@header {package de.ids_mannheim.korap.query.serialize.util;}
4
5/*
6 -- author: jbingel
Joachim Bingel787836a2014-08-07 14:50:18 +00007 -- date: 2014-05-11
Joachim Bingeldbbde772014-05-12 15:26:10 +00008*/
9
10/*
11 * LEXER SECTION
12 */
Joachim Bingel787836a2014-08-07 14:50:18 +000013/*
14 Regular expression
15 /x allows submatches like /^.*?RE.*?$/
16 /X forces full matches
17 /i means case insensitivity
18 /I forces case sensitivity
19*/
20FLAG_xi : '/' ( ('x'|'X') ('i'|'I')? );
21FLAG_ix : '/' ( ('i'|'I') ('x'|'X')? );
22
23
Joachim Bingel43607ed2014-05-19 12:39:55 +000024LRB : '(';
25RRB : ')';
Joachim Bingel787836a2014-08-07 14:50:18 +000026LB : '[';
27RB : ']';
Joachim Bingel43607ed2014-05-19 12:39:55 +000028LT : '<';
29GT : '>';
30LEQ : '<=';
31GEQ : '>=';
32EQ : '=';
Joachim Bingel43607ed2014-05-19 12:39:55 +000033AND : '&' | 'AND' | 'and' | 'UND' | 'und' ;
34OR : '|' | 'OR' | 'or' | 'ODER' | 'oder' ;
Joachim Bingel787836a2014-08-07 14:50:18 +000035NEG : '!';
Joachim Bingela3f51f72014-07-22 14:45:31 +000036QMARK : '?';
37SLASH : '/';
Joachim Bingel787836a2014-08-07 14:50:18 +000038COLON : ':';
Joachim Bingel624854b2014-07-23 13:53:28 +000039DASH : '-';
Joachim Bingel787836a2014-08-07 14:50:18 +000040TILDE : '~';
Joachim Bingeldbbde772014-05-12 15:26:10 +000041WS : ( ' ' | '\t' | '\r' | '\n' )+ -> skip ;
Joachim Bingeldbbde772014-05-12 15:26:10 +000042fragment NO_RE : ~[ \t\/];
43fragment ALPHABET : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
44 | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' );
Joachim Bingel624854b2014-07-23 13:53:28 +000045DIGIT : [0-9];
Joachim Bingeldbbde772014-05-12 15:26:10 +000046NUMBER : [0-9]+;
47
48NL : [\r\n] -> skip;
49ws : WS+;
50
51WORD : ALPHABET+;
Joachim Bingel43607ed2014-05-19 12:39:55 +000052
Joachim Bingel787836a2014-08-07 14:50:18 +000053
Joachim Bingel43607ed2014-05-19 12:39:55 +000054/*
Joachim Bingela3f51f72014-07-22 14:45:31 +000055 * Regular expressions
Joachim Bingel43607ed2014-05-19 12:39:55 +000056 */
Joachim Bingela3f51f72014-07-22 14:45:31 +000057fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
Joachim Bingel43607ed2014-05-19 12:39:55 +000058fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']' | '/'
59 | '(' | ')' | '|' | '"' | ':' | '\'' | '\\');
60fragment RE_alter : ((RE_char | ('(' REGEX ')') | RE_chgroup) '|' REGEX )+;
61fragment RE_chgroup : '[' RE_char+ ']';
Joachim Bingela3f51f72014-07-22 14:45:31 +000062fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?;
Joachim Bingel43607ed2014-05-19 12:39:55 +000063fragment RE_opt : (RE_char | RE_chgroup | ( '(' REGEX ')')) '?';
64fragment RE_star : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*';
65fragment RE_plus : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+';
66fragment RE_occ : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC;
67fragment RE_group : '(' REGEX ')';
Joachim Bingela3f51f72014-07-22 14:45:31 +000068REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH;
Joachim Bingeldbbde772014-05-12 15:26:10 +000069
70/*
71 * PARSER SECTION
72 */
73
Joachim Bingel43607ed2014-05-19 12:39:55 +000074regex
Joachim Bingel624854b2014-07-23 13:53:28 +000075: REGEX
76;
77
78date
79: DIGIT DIGIT DIGIT DIGIT (DASH DIGIT DIGIT (DASH DIGIT DIGIT)?)?
Joachim Bingel43607ed2014-05-19 12:39:55 +000080;
81
Joachim Bingeldbbde772014-05-12 15:26:10 +000082operator
Joachim Bingel787836a2014-08-07 14:50:18 +000083: (NEG? EQ) | LT | GT | LEQ | GEQ;
Joachim Bingeldbbde772014-05-12 15:26:10 +000084
85expr
Joachim Bingel787836a2014-08-07 14:50:18 +000086: meta
87| token
88;
89
90meta
91: (value operator)? field operator value
92;
93
94token
95: LB (term|termGroup) RB
96;
97
98term
99: NEG* (foundry SLASH)? layer termOp key (COLON value)? flag?
100;
101
102termOp
103: (NEG? EQ? EQ | NEG? TILDE? TILDE)
104;
105
106termGroup
107: (term | LRB termGroup RRB) booleanOp (term | LRB termGroup RRB | termGroup)
108;
109
110key
111: WORD
112| regex
113| NUMBER
114;
115
116foundry
117: WORD
118;
119
120layer
121: WORD
122;
123
124booleanOp
125: AND
126| OR
127;
128
129flag
130: FLAG_xi
131| FLAG_ix
Joachim Bingeldbbde772014-05-12 15:26:10 +0000132;
133
134field
Joachim Bingel624854b2014-07-23 13:53:28 +0000135: WORD
Joachim Bingela3f51f72014-07-22 14:45:31 +0000136;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000137
138value
Joachim Bingela3f51f72014-07-22 14:45:31 +0000139: WORD
140| NUMBER
Joachim Bingel624854b2014-07-23 13:53:28 +0000141| date
Joachim Bingela3f51f72014-07-22 14:45:31 +0000142| '"' (WORD ws*)+'"'
143| regex
144;
145
Joachim Bingela3f51f72014-07-22 14:45:31 +0000146relation
Joachim Bingel787836a2014-08-07 14:50:18 +0000147: (expr|exprGroup) booleanOp (expr|exprGroup|relation)
Joachim Bingela3f51f72014-07-22 14:45:31 +0000148;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000149
150exprGroup
Joachim Bingela3f51f72014-07-22 14:45:31 +0000151: LRB (expr | exprGroup | relation) RRB
Joachim Bingeldbbde772014-05-12 15:26:10 +0000152;
153
154start
Joachim Bingel787836a2014-08-07 14:50:18 +0000155: expr EOF
156| exprGroup EOF
157| relation EOF
Joachim Bingeldbbde772014-05-12 15:26:10 +0000158;