blob: da730819b907ffeadcbc400c7a105bc418f3bdb4 [file] [log] [blame]
grammar CollectionQuery;
@header {package de.ids_mannheim.korap.query.serialize.util;}
/*
-- author: jbingel
-- date: 2014-05-11
*/
/*
* LEXER SECTION
*/
/*
Regular expression
/x allows submatches like /^.*?RE.*?$/
/X forces full matches
/i means case insensitivity
/I forces case sensitivity
*/
FLAG_xi : '/' ( ('x'|'X') ('i'|'I')? );
FLAG_ix : '/' ( ('i'|'I') ('x'|'X')? );
LRB : '(';
RRB : ')';
LB : '[';
RB : ']';
LT : '<';
GT : '>';
LEQ : '<=';
GEQ : '>=';
EQ : '=';
AND : '&' | 'AND' | 'and' | 'UND' | 'und' ;
OR : '|' | 'OR' | 'or' | 'ODER' | 'oder' ;
NEG : '!';
QMARK : '?';
SLASH : '/';
COLON : ':';
DASH : '-';
TILDE : '~';
NEGTILDE : '!~';
SINCE : 'since';
UNTIL : 'until';
IN : 'in';
ON : 'on';
WS : ( ' ' | '\t' | '\r' | '\n' )+ -> skip ;
fragment NO_RE : ~[ \t\/];
fragment ALPHABET : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
| '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' );
fragment ALPHA : [a-zA-Z];
DIGIT : [0-9];
DATE
: DIGIT DIGIT DIGIT DIGIT (DASH DIGIT DIGIT (DASH DIGIT DIGIT)?)?
;
NL : [\r\n] -> skip;
ws : WS+;
WORD : ALPHABET+;
//WORD : ALPHABET* ALPHA ALPHABET*; // needs to have at least one alphabetical letter (non-numeric)
/*
* Regular expressions
*/
fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']' | '/'
| '(' | ')' | '|' | '"' | ':' | '\'' | '\\');
fragment RE_alter : ((RE_char | ('(' REGEX ')') | RE_chgroup) '|' REGEX )+;
fragment RE_chgroup : '[' RE_char+ ']';
fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?;
fragment RE_opt : (RE_char | RE_chgroup | ( '(' REGEX ')')) '?';
fragment RE_star : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*';
fragment RE_plus : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+';
fragment RE_occ : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC;
fragment RE_group : '(' REGEX ')';
REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH;
/*
* PARSER SECTION
*/
regex
: REGEX
;
date
: DATE
;
dateOp
: SINCE
| UNTIL
| IN
| ON
;
operator
: (NEG? EQ) | LT | GT | LEQ | GEQ | TILDE | NEGTILDE;
expr
: constraint
| dateconstraint
| token
;
dateconstraint
: field dateOp date
//| date dateOp field dateOp date
;
constraint
: field operator value
;
token
: LB (term|termGroup) RB
;
term
: NEG* (foundry SLASH)? layer termOp key (COLON value)? flag?
;
termOp
: (NEG? EQ? EQ | NEG? TILDE? TILDE)
;
termGroup
: (term | LRB termGroup RRB) booleanOp (term | LRB termGroup RRB | termGroup)
;
key
: WORD
| regex
;
foundry
: WORD
;
layer
: WORD
;
booleanOp
: AND
| OR
;
flag
: FLAG_xi
| FLAG_ix
;
field
: WORD
;
value
: WORD
| DIGIT+
| DATE
| multiword
| regex
;
multiword
: '"' WORD+ '"'
;
relation
: (expr|exprGroup) booleanOp (expr|exprGroup|relation)
;
exprGroup
: LRB (expr | exprGroup | relation) RRB
;
start
: expr EOF
| exprGroup EOF
| relation EOF
;