blob: d3b25b47b373b7416715aedb1e1e6a2bf654fe27 [file] [log] [blame]
Joachim Bingeldbbde772014-05-12 15:26:10 +00001grammar CollectionQuery;
2
3@header {package de.ids_mannheim.korap.query.serialize.util;}
4
5/*
6 -- author: jbingel
Joachim Bingel787836a2014-08-07 14:50:18 +00007 -- date: 2014-05-11
Joachim Bingeldbbde772014-05-12 15:26:10 +00008*/
9
10/*
11 * LEXER SECTION
12 */
Joachim Bingel787836a2014-08-07 14:50:18 +000013/*
14 Regular expression
15 /x allows submatches like /^.*?RE.*?$/
16 /X forces full matches
17 /i means case insensitivity
18 /I forces case sensitivity
19*/
20FLAG_xi : '/' ( ('x'|'X') ('i'|'I')? );
21FLAG_ix : '/' ( ('i'|'I') ('x'|'X')? );
22
23
Joachim Bingel43607ed2014-05-19 12:39:55 +000024LRB : '(';
25RRB : ')';
Joachim Bingel787836a2014-08-07 14:50:18 +000026LB : '[';
27RB : ']';
Joachim Bingel43607ed2014-05-19 12:39:55 +000028LT : '<';
29GT : '>';
30LEQ : '<=';
31GEQ : '>=';
32EQ : '=';
Joachim Bingel43607ed2014-05-19 12:39:55 +000033AND : '&' | 'AND' | 'and' | 'UND' | 'und' ;
34OR : '|' | 'OR' | 'or' | 'ODER' | 'oder' ;
Joachim Bingel787836a2014-08-07 14:50:18 +000035NEG : '!';
Joachim Bingela3f51f72014-07-22 14:45:31 +000036QMARK : '?';
37SLASH : '/';
Joachim Bingel787836a2014-08-07 14:50:18 +000038COLON : ':';
Joachim Bingel624854b2014-07-23 13:53:28 +000039DASH : '-';
Joachim Bingel787836a2014-08-07 14:50:18 +000040TILDE : '~';
Joachim Bingeldbbde772014-05-12 15:26:10 +000041WS : ( ' ' | '\t' | '\r' | '\n' )+ -> skip ;
Joachim Bingeldbbde772014-05-12 15:26:10 +000042fragment NO_RE : ~[ \t\/];
43fragment ALPHABET : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
44 | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' );
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000045fragment ALPHA : [a-zA-Z];
Joachim Bingel624854b2014-07-23 13:53:28 +000046DIGIT : [0-9];
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000047
48DATE
49: DIGIT DIGIT DIGIT DIGIT (DASH DIGIT DIGIT (DASH DIGIT DIGIT)?)?
50;
Joachim Bingeldbbde772014-05-12 15:26:10 +000051
52NL : [\r\n] -> skip;
53ws : WS+;
54
Joachim Bingel82e4ca72014-10-27 11:03:38 +000055WORD : ALPHABET* ALPHA ALPHABET*; // needs to have at least one alphabetical letter (non-numeric)
Joachim Bingel787836a2014-08-07 14:50:18 +000056
Joachim Bingel43607ed2014-05-19 12:39:55 +000057/*
Joachim Bingela3f51f72014-07-22 14:45:31 +000058 * Regular expressions
Joachim Bingel43607ed2014-05-19 12:39:55 +000059 */
Joachim Bingela3f51f72014-07-22 14:45:31 +000060fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
Joachim Bingel43607ed2014-05-19 12:39:55 +000061fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']' | '/'
62 | '(' | ')' | '|' | '"' | ':' | '\'' | '\\');
63fragment RE_alter : ((RE_char | ('(' REGEX ')') | RE_chgroup) '|' REGEX )+;
64fragment RE_chgroup : '[' RE_char+ ']';
Joachim Bingela3f51f72014-07-22 14:45:31 +000065fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?;
Joachim Bingel43607ed2014-05-19 12:39:55 +000066fragment RE_opt : (RE_char | RE_chgroup | ( '(' REGEX ')')) '?';
67fragment RE_star : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*';
68fragment RE_plus : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+';
69fragment RE_occ : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC;
70fragment RE_group : '(' REGEX ')';
Joachim Bingela3f51f72014-07-22 14:45:31 +000071REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH;
Joachim Bingeldbbde772014-05-12 15:26:10 +000072
73/*
74 * PARSER SECTION
75 */
76
Joachim Bingel43607ed2014-05-19 12:39:55 +000077regex
Joachim Bingel624854b2014-07-23 13:53:28 +000078: REGEX
79;
80
81date
Joachim Bingel4bf1ec52014-10-08 08:18:03 +000082: DATE
Joachim Bingel43607ed2014-05-19 12:39:55 +000083;
84
Joachim Bingeldbbde772014-05-12 15:26:10 +000085operator
Joachim Bingela499e922014-10-08 13:32:50 +000086: (NEG? EQ) | LT | GT | LEQ | GEQ | TILDE;
Joachim Bingeldbbde772014-05-12 15:26:10 +000087
88expr
Joachim Bingel787836a2014-08-07 14:50:18 +000089: meta
90| token
91;
92
93meta
94: (value operator)? field operator value
95;
96
97token
98: LB (term|termGroup) RB
99;
100
101term
102: NEG* (foundry SLASH)? layer termOp key (COLON value)? flag?
103;
104
105termOp
106: (NEG? EQ? EQ | NEG? TILDE? TILDE)
107;
108
109termGroup
110: (term | LRB termGroup RRB) booleanOp (term | LRB termGroup RRB | termGroup)
111;
112
113key
114: WORD
115| regex
Joachim Bingel787836a2014-08-07 14:50:18 +0000116;
117
118foundry
119: WORD
120;
121
122layer
123: WORD
124;
125
126booleanOp
127: AND
128| OR
129;
130
131flag
132: FLAG_xi
133| FLAG_ix
Joachim Bingeldbbde772014-05-12 15:26:10 +0000134;
135
136field
Joachim Bingel624854b2014-07-23 13:53:28 +0000137: WORD
Joachim Bingela3f51f72014-07-22 14:45:31 +0000138;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000139
140value
Joachim Bingel4bf1ec52014-10-08 08:18:03 +0000141: WORD
Joachim Bingel82e4ca72014-10-27 11:03:38 +0000142| multiword
Joachim Bingel4bf1ec52014-10-08 08:18:03 +0000143| date
Joachim Bingela3f51f72014-07-22 14:45:31 +0000144| regex
145;
146
Joachim Bingel82e4ca72014-10-27 11:03:38 +0000147multiword
148: '"' WORD+ '"'
149;
150
Joachim Bingela3f51f72014-07-22 14:45:31 +0000151relation
Joachim Bingel787836a2014-08-07 14:50:18 +0000152: (expr|exprGroup) booleanOp (expr|exprGroup|relation)
Joachim Bingela3f51f72014-07-22 14:45:31 +0000153;
Joachim Bingeldbbde772014-05-12 15:26:10 +0000154
155exprGroup
Joachim Bingela3f51f72014-07-22 14:45:31 +0000156: LRB (expr | exprGroup | relation) RRB
Joachim Bingeldbbde772014-05-12 15:26:10 +0000157;
158
159start
Joachim Bingel787836a2014-08-07 14:50:18 +0000160: expr EOF
161| exprGroup EOF
162| relation EOF
Joachim Bingeldbbde772014-05-12 15:26:10 +0000163;