KorAP-querySerialization independent from - KorAP-PoliqarpParser - KorAP-AnnisParser - KorAP-Cosmas2Parser - KorAP-lucene-index grammars integrated into querySerialization still depends on KorAP-Entities (e.g. JsonUtils used in QuerySerializer and CollectionQueryBuilder)

commit: 6003b852456d1e7846fa68b3303cfa901cf038ca [log] [tgz]
author: Joachim Bingel <joa.bingel@gmail.com> Thu Dec 18 14:20:55 2014 +0000
committer: Joachim Bingel <joa.bingel@gmail.com> Thu Dec 18 14:20:55 2014 +0000
tree: 8a3df988e1f2fd1430136ab98540eb967ca0ae00
parent: 84395b2f3cd9c59177e20bf11e51434f225ec10c [diff]
diff --git a/src/main/antlr/cosmas/c2ps.g b/src/main/antlr/cosmas/c2ps.g
new file mode 100644
index 0000000..cd3b823
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps.g

@@ -0,0 +1,239 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//												//
+// 	COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax)			//
+// 	globale Grammatik (ruft lokale c2ps_x.g Grammatiken auf).				//
+//	17.12.12/FB										//
+//      v-0.6											//
+// TODO:											//
+// - se1: Einsetzen des Default-Operators in den kummulierten AST.				//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps;
+
+options {output=AST; backtrack=true; }
+tokens  {C2PQ; OPBED; OPTS; OPBEG; OPEND; OPNHIT; OPALL; OPLEM; OPPROX;
+	 ARG1; ARG2; 
+	 OPWF; OPLEM; OPANNOT;
+	 OPLABEL;
+	 OPIN; OPOV;
+	 OPAND;
+	 OPOR;
+	 OPNOT;
+	 OPEXPR1;
+	 OPMORPH; OPELEM;
+	}
+
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+@members {
+    private IErrorReporter errorReporter = null;
+    public void setErrorReporter(IErrorReporter errorReporter) {
+        this.errorReporter = errorReporter;
+    }
+    public void emitErrorMessage(String msg) {
+        errorReporter.reportError(msg);
+    }
+}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+WS	:	(' '|'\r'|'\n')+ {skip();};
+
+// Suchoperator PROX:
+// ------------------
+
+fragment DISTVALUE
+	:	 ('0' .. '9')+ (':' ('0'..'9')+)? ;
+		
+fragment DIST
+	:	('+'|'-')? (DISTVALUE ('w'|'s'|'p'|'t') | ('w'|'s'|'p'|'t') DISTVALUE);
+	
+fragment GROUP
+	:	('min' | 'max');
+
+OP_PROX	:	('/' | '%') DIST (',' DIST)* (',' GROUP)? ;
+
+OP_IN	:	'#IN' | '#IN(' OP_IN_OPTS? ')' ; 
+
+OP_OV	:	'#OV' | '#OV(' OP_OV_OPTS? ')' ;
+
+// EAVEXP wird hier eingesetzt fÃ¼r eine beliebige Sequenz von Zeichen bis zu ')'.
+fragment OP_IN_OPTS
+	:	EAVEXPR ;
+
+// EAVEXP wird hier eingesetzt fÃ¼r eine beliebige Sequenz von Zeichen bis zu ')'.	
+fragment OP_OV_OPTS
+	:	EAVEXPR ;
+
+// OP_BED: #BED( searchExp , Bedingung )
+// OP_BED_END = ", Bedingung )" 
+// ungelÃ¶st: #BED(Jimi Hendrix, sa) -> Komma wird "Hendrix," zugeschlagen!
+// Umgehung: Blank vor dem Komma: #BED(Jimi Hendrix , sa) -> OK.
+
+OP_BED_END
+	:	',' ~(')')+ ')' ; 
+	
+// OP1: Operator with single argument:
+// (funktioniert nicht: fragment OP1 : OP1BEG | OP1END ...;)
+
+//OP1	:	'#BEG(' | '#END(' | '#ALL(' | '#NHIT(' ;	
+
+// Labels als Keywords fÃ¼r Suchbegriffe mit besonderer Bedeutung (Ãberschriften, etc.),
+// muss VOR SEARCHWORD1/2 deklariert werden.
+
+SEARCHLABEL
+	:	('<s>' | '<p>' | '<Ã¼>' | '<Ã¼d>' | '<Ã¼h>' | '<Ã¼u>' | '<Ã¼z>' | '<Ã¼r>');
+
+// Search Word: 
+// spezialzeichen werden in "..." versteckt.
+// SEARCHWORD1: single or multiple words not enclosed in "...".
+// SEARCHWORD2: single or multiple words enclosed in "...".
+SEARCHLEMMA
+	:	'&' SEARCHWORD1 ; // rewrite rules funktionieren im lexer nicht: -> ^(OPLEM $SEARCHWORD1.text); 
+
+// SEARCHWORD2: schluckt Blanks. Diese mÃ¼ssen nachtrÃ¤glich als Wortdelimiter erkannt werden.
+SEARCHWORD1
+	:	~('"' | ' ' | '#' | ')' | '(' )+ ;
+
+SEARCHWORD2
+	:	'"' (~('"') | '\\"')+ '"' ;
+
+// Annotationsoperator #ELEM( EAVEXPR ).
+// EAVEXPR = Element Attribut Value Expression.
+// alle Spezialzeichen vor dem Blank ausgeschlossen.
+// e.g. #ELEM(ANA='N pl'); #ELEM(HEAD, TYPE='DACHUEBERSCHRIFT');
+// e.g. #ELEM( ANA='N()' LEM='L\'Ã©tÃ©');
+
+fragment EAVEXPR
+	:	( ~( '(' | ')' | '\'' | ('\u0000'..'\u001F')) | ('\'' (~('\'' | '\\') | '\\' '\'')* '\'') )+ ;
+	
+fragment WORD
+	:	~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
+                    | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' )+;
+
+// "#ELEM()" nur fÃ¼r Fehlerbehandlung, ansonsten sinnlose Anfrage.
+OP_ELEM	:	'#ELEM(' EAVEXPR ')' | '#ELEM(' ')';
+
+// EAVEXPR ist streng genommen nicht der korrekte Labelname fÃ¼r den Inhalt von MORPH(),
+// hat aber die gleiche Syntax und kann an dieser Stelle eingesetzt werden.
+
+fragment MORPHEXPR
+	: WORD (':' WORD)?
+	| WORD '!'? '=' WORD (':' WORD)?
+	| WORD '/' WORD '!'? '=' WORD (':' WORD)?
+	;
+	
+OP_MORPH:	'MORPH(' 
+				MORPHEXPR (' '* '&' ' '* MORPHEXPR)* 
+			')' ;
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+
+// options {backtrack=true; k=5;}
+
+c2ps_query 
+	:	searchExpr EOF -> ^(C2PQ searchExpr);
+
+/* this rule is OK.
+searchExpr
+	:	(op1 | searchWord | searchLemma | searchAnnot | searchLabel | '('! searchExpr ')'!)+ (op2^ searchExpr)? ;
+*/
+// trying to modify the rule above for generating arg1 and arg2 in the resulting AST more easily.
+// notes: se1+=searchExpr1 is of type List. -> $se1+ (not {$se1+} !) is the AST of the list.
+searchExpr
+	:	(se1+=searchExpr1)+ (op2 se2=searchExpr)? 
+	
+		-> {$op2.tree != null}? ^({$op2.tree} ^(ARG1 $se1+) ^(ARG2 {$se2.tree}))
+		-> $se1+ ;
+
+searchExpr1
+	:	op1 		   -> {$op1.tree}
+	| 	searchWord 	   -> {$searchWord.tree}
+	| 	searchLemma 	   -> {$searchLemma.tree}
+	| 	searchAnnot 	   -> {$searchAnnot.tree}
+	| 	searchLabel        -> {$searchLabel.tree}
+	| 	'(' searchExpr ')' -> {$searchExpr.tree};
+
+// Suchbegriff = Suchwort in Hochkommata (word2) oder ohne (word1):
+// aufgegeben: word1+ | '"' word1+ '"' ; 
+
+searchWord
+	:	word1
+	|	word2;
+
+word1	:	SEARCHWORD1 -> {c2ps_opWF.check($SEARCHWORD1.text, false, false, $SEARCHWORD1.index)} ; 
+
+word2	:	SEARCHWORD2 -> {c2ps_opWF.check($SEARCHWORD2.text, true, false, $SEARCHWORD2.index)} ;
+	
+// Suchbegriff = Lemma:
+searchLemma
+	:	SEARCHLEMMA -> {c2ps_opWF.check($SEARCHLEMMA.text, false, true, $SEARCHLEMMA.index)} ; 
+
+// Suchbegriff = Annotationsoperator:
+// (damit Lexer den richtige Token erzeugt, muss OP_ELEM den gesamten
+// Annot-Ausdruck als 1 Token erkennen).
+searchAnnot
+	:	OP_ELEM  
+		-> ^({c2ps_opELEM.check($OP_ELEM.text,$OP_ELEM.index)})
+	| 	OP_MORPH 
+		-> ^(OPMORPH ^({new CommonTree(new CommonToken(OPMORPH, c2ps_opAnnot.strip($OP_MORPH.text)))}));
+
+// searchLabel: <s>, <p>, <Ã¼> etc.
+
+searchLabel
+	:	SEARCHLABEL -> ^(OPLABEL SEARCHLABEL); 
+	
+// Suchoperatoren:
+// ---------------
+
+// OP2: Suchoperatoren mit 2 Argumenten:
+// -------------------------------------
+
+// Der von op2 zurÃ¼ckgelieferte AST ist automatisch derjenige vom geparsten Operator.
+
+op2	:	(opPROX | opIN | opOV | opAND | opOR | opNOT) ;
+		
+// AST with Options for opPROX is returned by c2ps_opPROX.check():
+opPROX	:	OP_PROX -> ^(OPPROX {c2ps_opPROX.check($OP_PROX.text, $OP_PROX.index)} );
+
+opIN	: 	OP_IN -> {c2ps_opIN.check($OP_IN.text, $OP_IN.index)};
+
+opOV	:	OP_OV -> {c2ps_opOV.check($OP_OV.text, $OP_OV.index)};
+
+opAND	:	('und' | 'UND' | 'and' | 'AND')     -> ^(OPAND);
+
+opOR	:	('oder' | 'ODER' | 'or' | 'OR')     -> ^(OPOR);
+
+opNOT	:	('nicht' | 'NICHT' | 'not' | 'NOT') -> ^(OPNOT);
+
+// OP1: Suchoperatoren mit 1 Argument:
+// -----------------------------------
+
+op1	:	opBEG | opEND | opNHIT | opALL | opBED; 
+
+// #BED(serchExpr, B).
+// B muss nachtrÃ¤glich in einer lokalen Grammatik Ã¼berprÃ¼ft werden.
+
+opBED	:	'#BED(' searchExpr opBEDEnd -> ^(OPBED searchExpr ^(OPTS {$opBEDEnd.tree})) ;
+
+// c2ps_opBED.check() returns an AST that is returned by rule opBEDEnd.
+// for this action inside a rewrite rule, no ';' behind the function call.
+opBEDEnd:	OP_BED_END -> {c2ps_opBED.check($OP_BED_END.text, $OP_BED_END.index) };
+
+opBEG	:	( '#BEG(' | '#LINKS(' ) searchExpr ')'  -> ^(OPBEG searchExpr) ;
+
+opEND	:	( '#END(' | '#RECHTS(' ) searchExpr ')'  -> ^(OPEND searchExpr) ;
+
+opNHIT	:	( '#NHIT(' | '#INKLUSIVE(' ) searchExpr ')' -> ^(OPNHIT searchExpr) ;
+
+opALL	:	( '#ALL(' | '#EXKLUSIVE(' ) searchExpr ')'  -> ^(OPALL searchExpr) ;

diff --git a/src/main/antlr/cosmas/c2ps.tokens b/src/main/antlr/cosmas/c2ps.tokens
new file mode 100644
index 0000000..ae8ad67
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps.tokens

@@ -0,0 +1,78 @@
+OP_ELEM=41
+T__62=62
+OPEND=8
+OP_OV=34
+OPTS=6
+OPAND=20
+T__61=61
+T__60=60
+T__55=55
+T__56=56
+T__57=57
+C2PQ=4
+T__58=58
+OPALL=10
+T__51=51
+T__52=52
+OPNOT=22
+T__53=53
+T__54=54
+MORPHEXPR=42
+SEARCHWORD2=40
+T__59=59
+OPELEM=25
+OPOR=21
+OP_MORPH=43
+T__50=50
+OPPROX=12
+OPOV=19
+T__46=46
+EAVEXPR=35
+T__47=47
+T__44=44
+OPWF=15
+SEARCHLEMMA=39
+T__45=45
+T__48=48
+T__49=49
+OPLABEL=17
+OP_OV_OPTS=33
+DIST=28
+ARG1=13
+SEARCHWORD1=38
+OPNHIT=9
+ARG2=14
+SEARCHLABEL=37
+OP_IN=32
+OP_IN_OPTS=31
+GROUP=29
+OPLEM=11
+WS=26
+OPIN=18
+OPBED=5
+OP_BED_END=36
+OPBEG=7
+OPMORPH=24
+OPANNOT=16
+DISTVALUE=27
+OPEXPR1=23
+OP_PROX=30
+'#ALL('=62
+'NOT'=57
+'NICHT'=55
+'#NHIT('=61
+'OR'=53
+'and'=48
+'UND'=47
+'#END('=60
+'#BED('=58
+'#BEG('=59
+'not'=56
+'('=44
+'oder'=50
+'or'=52
+'nicht'=54
+')'=45
+'und'=46
+'AND'=49
+'ODER'=51

diff --git a/src/main/antlr/cosmas/c2ps_opBED.g b/src/main/antlr/cosmas/c2ps_opBED.g
new file mode 100644
index 0000000..e5d715e
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opBED.g

@@ -0,0 +1,44 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//												//
+// 	Lokale Grammatik der COSMAS II zeilenorientierten Suchanfragesprache			//
+//	Dez. 2012/FB										//
+//      v1.0											//
+//	lokale Grammatik für #BED(x, Opts).							//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opBED;
+
+options {output=AST;}
+tokens  {TPBEG; TPEND; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+TP_POS	:	('+'|'-')? ('sa'|'SA'|'se'|'SE'|'pa'|'PA'|'pe'|'PE'|'ta'|'TA'|'te'|'TE') ;
+
+WS	:	(' ')+ {skip();};
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+
+opBEDOpts
+	:	',' textpos ')' -> textpos ;
+	
+textpos	:	( tpBeg ('/' tpEnd)? | '/' tpEnd ) -> tpBeg? tpEnd?;
+
+tpBeg	:	tpExpr -> ^(TPBEG tpExpr);
+
+tpEnd	:	tpExpr -> ^(TPEND tpExpr);
+
+tpExpr	:	tpPos (',' tpPos)* -> tpPos*;
+
+tpPos	:	TP_POS; 

diff --git a/src/main/antlr/cosmas/c2ps_opBED.tokens b/src/main/antlr/cosmas/c2ps_opBED.tokens
new file mode 100644
index 0000000..69f86b5
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opBED.tokens

@@ -0,0 +1,10 @@
+TPBEG=4
+WS=7
+TP_POS=6
+T__10=10
+TPEND=5
+T__9=9
+T__8=8
+'/'=10
+','=8
+')'=9

diff --git a/src/main/antlr/cosmas/c2ps_opELEM.g b/src/main/antlr/cosmas/c2ps_opELEM.g
new file mode 100644
index 0000000..5a6730c
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opELEM.g

@@ -0,0 +1,52 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//												//
+// 	COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax)			//
+// 	lokale Grammatik für #ELEM(Expr).							//
+//	08.01.13/FB										//
+//      v-0.2											//
+// TODO: -											//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opELEM;
+
+options {output=AST;}
+
+tokens {OPELEM; EMPTY;
+	ELNAME; 
+	EQ; NOTEQ;
+	}
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+WS  	:	( ' '|'\t'|'\r'|'\n')+ {$channel=HIDDEN;};
+
+// remove '#' from ID to avoid #ELEM(C) being tokenized as an ID;
+// stating '#' should not start an ID has no effect in ANTLR.
+// ID may contain an escaped ', e.g. l\'été.
+ID	:	(~('#'|'\''|' '|'='|'!'|'<'|'>'|')') | ('\\' '\''))+;
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+opELEM	:	'#ELEM(' ')'              -> ^(OPELEM EMPTY)
+	|	'#ELEM(' elem avExpr* ')' -> ^(OPELEM elem avExpr*)
+	|	'#ELEM(' avExpr+ ')'      -> ^(OPELEM avExpr+);
+
+elem	:	ID -> ^(ELNAME ID);
+
+avExpr	:	id1=ID op id2=ID            -> ^(op $id1 $id2)
+	|	id1=ID op '\'' id3+=ID+ '\'' -> ^(op $id1 $id3+);
+	
+op	:	'='             -> ^(EQ)
+	|	('<>' | '!=')   -> ^(NOTEQ);
+	
+	
\ No newline at end of file

diff --git a/src/main/antlr/cosmas/c2ps_opELEM.tokens b/src/main/antlr/cosmas/c2ps_opELEM.tokens
new file mode 100644
index 0000000..656fb67
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opELEM.tokens

@@ -0,0 +1,19 @@
+NOTEQ=8
+WS=9
+T__16=16
+T__15=15
+T__12=12
+T__11=11
+T__14=14
+T__13=13
+ELNAME=6
+OPELEM=4
+EQ=7
+ID=10
+EMPTY=5
+'!='=16
+'='=14
+'<>'=15
+'#ELEM('=11
+')'=12
+'\''=13

diff --git a/src/main/antlr/cosmas/c2ps_opIN.g b/src/main/antlr/cosmas/c2ps_opIN.g
new file mode 100644
index 0000000..da14af2
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opIN.g

@@ -0,0 +1,67 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//												//
+// 	COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax)			//
+// 	lokale Grammatik für #IN() und #IN(Options).						//
+//	17.12.12/FB										//
+//      v-0.1											//
+//												//
+// Opts nimmt eine oder mehrere, durch Kommata getrennte Optionen auf:				//
+// - Bereichsoptionen (RANGE): ALL, HIT, -.							//
+// - Positionsoptionen (POS): L, R, F, FE, FI, N, -.						//
+// - Ausschließungsoptionen: %, -.								//
+// - Gruppenbildungsoptionen (GROUP): min, max, -.						//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opIN;
+
+options {output=AST;}
+tokens  {OPIN;
+	 RANGE; ALL; HIT; 
+	 POS; 
+	 EXCL; YES;
+	 GROUP; MIN; MAX; }
+
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+	 
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+WS	:	(' ')+ {skip();};
+
+POSTYP	:	('L'|'l'|'R'|'r'|'F'|'f'|'FE'|'fe'|'FI'|'fi'|'N'|'n' );
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+opIN	:	'#IN' -> ^(OPIN)
+	|	'#IN(' opts? ')' -> ^(OPIN opts?);
+
+opts	:	opt (',' opt)*
+
+		-> opt*;
+
+opt	:	(optRange |optPos | optExcl | optGrp);
+
+// Bereich:
+optRange:	('ALL' | 'all') -> ^(RANGE ALL) 
+	| 	('HIT' | 'hit')	-> ^(RANGE HIT); 
+
+// Position:
+optPos	:	POSTYP
+
+		-> ^(POS POSTYP);
+
+optExcl	:	'%' 
+
+		-> ^(EXCL YES);
+
+optGrp	:	('MIN' | 'min') -> ^(GROUP MIN)
+	| 	('MAX' | 'max') -> ^(GROUP MAX) ;
+

diff --git a/src/main/antlr/cosmas/c2ps_opIN.tokens b/src/main/antlr/cosmas/c2ps_opIN.tokens
new file mode 100644
index 0000000..a49b6a9
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opIN.tokens

@@ -0,0 +1,38 @@
+T__28=28
+T__27=27
+T__26=26
+T__25=25
+T__24=24
+T__23=23
+T__22=22
+T__21=21
+T__20=20
+MAX=13
+YES=10
+RANGE=5
+EXCL=9
+MIN=12
+T__19=19
+GROUP=11
+WS=14
+T__16=16
+OPIN=4
+T__18=18
+T__17=17
+POS=8
+ALL=6
+HIT=7
+POSTYP=15
+'#IN('=17
+'HIT'=22
+'all'=21
+'MIN'=25
+'#IN'=16
+'hit'=23
+'min'=26
+','=19
+')'=18
+'ALL'=20
+'MAX'=27
+'max'=28
+'%'=24

diff --git a/src/main/antlr/cosmas/c2ps_opOV.g b/src/main/antlr/cosmas/c2ps_opOV.g
new file mode 100644
index 0000000..a80db9b
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opOV.g

@@ -0,0 +1,58 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//												//
+// 	COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax)			//
+// 	lokale Grammatik für #OV() und #OV(Options).						//
+//	17.12.12/FB										//
+//      v-0.1											//
+//												//
+// Opts nimmt eine oder mehrere, durch Kommata getrennte Optionen auf:				//
+// - Positionsoptionen (POS): L, R, F, FE, FI, X, -.						//
+// - Ausschließungsoptionen (EXCL): %, -.							//
+// - Gruppenbildungsoptionen (GROUP): min, max, -.						//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opOV;
+
+options {output=AST;}
+tokens  {OPOV;
+	 POS; 
+	 EXCL; YES;
+	 GROUP; MIN; MAX; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+	 
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+WS	:	(' ')+ {skip();};
+
+POSTYP	:	'L'|'l'|'R'|'r'|'F'|'f'|'FE'|'fe'|'FI'|'fi'|'N'|'n'|'X'|'x' ;
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+opOV	:	'#OV'            -> ^(OPOV)
+	|	'#OV(' opts? ')' -> ^(OPOV opts?);
+
+opts	:	opt (',' opt)*   -> opt*;
+
+opt	:	(optPos | optExcl | optGrp);
+
+// Position:
+optPos	:	POSTYP 
+
+		-> ^(POS POSTYP);
+
+optExcl	:	'%' 
+
+		-> ^(EXCL YES);
+
+optGrp	:	('MIN' | 'min') -> ^(GROUP MIN)
+	| 	('MAX' | 'max') -> ^(GROUP MAX) ;
+

diff --git a/src/main/antlr/cosmas/c2ps_opOV.tokens b/src/main/antlr/cosmas/c2ps_opOV.tokens
new file mode 100644
index 0000000..5b66e6c
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opOV.tokens

@@ -0,0 +1,27 @@
+T__21=21
+T__20=20
+MAX=10
+YES=7
+EXCL=6
+MIN=9
+T__19=19
+GROUP=8
+WS=11
+T__16=16
+T__15=15
+T__18=18
+T__17=17
+POS=5
+T__14=14
+T__13=13
+POSTYP=12
+OPOV=4
+'#OV'=13
+'min'=19
+','=16
+')'=15
+'MAX'=20
+'MIN'=18
+'#OV('=14
+'max'=21
+'%'=17

diff --git a/src/main/antlr/cosmas/c2ps_opPROX.g b/src/main/antlr/cosmas/c2ps_opPROX.g
new file mode 100644
index 0000000..f7a42f5
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opPROX.g

@@ -0,0 +1,78 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//												//
+// 	lokale Grammatik der COSMAS II zeilenorientierten Suchanfragesprache (= c2ps)		//
+//	für den Abstandsoperator /w... und %w...						//
+//	v-1.0 - 07.12.12/FB									//
+//												//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opPROX;
+
+options {output=AST;}
+
+tokens  { PROX_OPTS; 
+	  TYP; PROX; EXCL; 
+	  DIST_LIST; DIST; RANGE; VAL0; 
+	  MEAS; // measure
+	  DIR; PLUS; MINUS; BOTH;
+	  GRP; MIN; MAX; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						PROX-Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+DISTVALUE
+	:	('0' .. '9')+ ;
+	
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// 						PROX-Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+
+opPROX	:	proxTyp proxDist (',' proxDist)* (',' proxGroup)? 
+		
+		-> ^(PROX_OPTS {$proxTyp.tree} ^(DIST_LIST proxDist+) {$proxGroup.tree});
+	
+proxTyp	:	  '/' -> ^(TYP PROX)	// klassischer Abstand.
+		| '%' -> ^(TYP EXCL);	// ausschließender Abstand.
+
+// proxDist: e.g. +5w or -s0 or /w2:4 etc.
+// kein proxDirection? hier, weil der Default erst innerhalb von Regel proxDirection erzeugt werden kann.
+proxDist:	proxDirection (v1=proxDistValue m1=proxMeasure | m2=proxMeasure v2=proxDistValue)
+
+		-> {$v1.tree != null}? ^(DIST {$proxDirection.tree} {$v1.tree} {$m1.tree})
+		-> 		       ^(DIST {$proxDirection.tree} {$v2.tree} {$m2.tree});
+
+proxDirection
+	:	(p='+'|m='-')?	-> {$p != null}? ^(DIR PLUS)
+				-> {$m != null}? ^(DIR MINUS)
+				->               ^(DIR BOTH) ;
+/*
+proxDistValue	// proxDistMin ( ':' proxDistMax)? ;
+	:	(m1=proxDistMin -> ^(DIST_RANGE VAL0 $m1)) (':' m2=proxDistMax -> ^(DIST_RANGE $m1 $m2))? ;
+*/
+proxDistValue	// proxDistMin ( ':' proxDistMax)? ;
+	:	(m1=proxDistMin ) (':' m2=proxDistMax)? 
+	
+		-> {$m2.text != null}? ^(RANGE $m1  $m2)
+		->		       ^(RANGE VAL0 $m1);
+		
+proxMeasure
+	:	(m='w'|m='s'|m='p'|m='t') -> ^(MEAS $m);
+
+proxDistMin
+	:	DISTVALUE;
+	
+proxDistMax
+	:	DISTVALUE;
+	
+proxGroup
+	:	'min' -> ^(GRP MIN)
+	|	'max' -> ^(GRP MAX);
+	

diff --git a/src/main/antlr/cosmas/c2ps_opPROX.tokens b/src/main/antlr/cosmas/c2ps_opPROX.tokens
new file mode 100644
index 0000000..ff027fd
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opPROX.tokens

@@ -0,0 +1,39 @@
+MEAS=12
+T__29=29
+T__28=28
+T__27=27
+T__26=26
+T__25=25
+T__24=24
+T__23=23
+T__22=22
+T__21=21
+MAX=19
+DIST=9
+PROX=6
+RANGE=10
+TYP=5
+EXCL=7
+MIN=18
+MINUS=15
+PROX_OPTS=4
+DIST_LIST=8
+VAL0=11
+T__30=30
+T__31=31
+DIR=13
+GRP=17
+BOTH=16
+PLUS=14
+DISTVALUE=20
+'w'=27
+'p'=29
+'/'=22
+'s'=28
+':'=26
+'-'=25
+'min'=30
+'+'=24
+','=21
+'max'=31
+'%'=23

diff --git a/src/main/antlr/cosmas/c2ps_opWF.g b/src/main/antlr/cosmas/c2ps_opWF.g
new file mode 100644
index 0000000..200a21a
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opWF.g

@@ -0,0 +1,74 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//                                  								//
+//		COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax)    		//
+//    		lokale Grammatik für Wortformen							// 
+// 		(Bsp: Hendrix, Hendrix:sa/-pe, :fi:Hendrix, etc. )				//
+// 		v-0.3 - 10.01.13/FB								//
+//                                  								//
+// Strategie: 											//
+// - Input string: :cccc:wwww:ppp								//
+// - diese Grammatik trennt ccc, www und ppp voneinander, ccc und ppp werden in weiteren 	//
+//   lokalen Grammatiken zerlegt.								//
+// - Begründung: weil die Tokens in ccc, www und ppp sich überschneiden, ist eine große 	//
+//   Grammatik unnötig umständlich.								//
+// - Bsp.: :FiOlDs:Würde:sa/-pe,-te -> c=FiOlDs + w=Würde + p=sa/-pe,-te.			//
+// Mögliche Werte für die Case-Optionen:							//
+// www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/GROSS_KLEIN.html //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opWF;
+
+options {output=AST;}
+
+tokens  { OPWF; OPLEM; OPTCASE; TPOS; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+//                   PROX-Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+fragment STRING
+	:	( ~(':' | ' ') | '\\:' )+ ; 
+
+Case	:	':' STRING ':';
+
+TPos	:	':' STRING;
+
+WF	:	STRING;
+
+WS	:	(' ')+ {skip();};
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+//                   PROX-Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ 
+searchWFs
+	:	searchWF+;
+	
+searchWF:	optCase? wordform tpos?
+
+		-> ^(OPWF wordform optCase? tpos? ) ;
+
+wordform:	WF -> {c2ps_opWF.encode($WF.text, OPWF)};
+
+// Case Options:
+optCase	:	Case 
+
+		-> {c2ps_optCase.check($Case.text, $Case.index)} ;
+
+// textposition Options:
+tpos	:	TPos 
+
+		-> ^(TPOS {c2ps_opBED.checkTPos($TPos.text, $TPos.index)});
+
+// analog für Lemmata, kein optCase:
+searchLEM
+	:	wordform tpos?
+	
+		-> ^(OPLEM wordform tpos?);
+		

diff --git a/src/main/antlr/cosmas/c2ps_opWF.tokens b/src/main/antlr/cosmas/c2ps_opWF.tokens
new file mode 100644
index 0000000..4f921af
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opWF.tokens

@@ -0,0 +1,9 @@
+OPLEM=5
+OPTCASE=6
+WS=12
+OPWF=4
+TPOS=7
+TPos=10
+WF=11
+Case=9
+STRING=8

diff --git a/src/main/antlr/cosmas/c2ps_optCase.g b/src/main/antlr/cosmas/c2ps_optCase.g
new file mode 100644
index 0000000..e24fa77
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_optCase.g

@@ -0,0 +1,51 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//                                  								//
+//		COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax)    		//
+//    		lokale Grammatik für Option 'Case'.						// 
+// 		(Bsp: :fi: in :fi:Hendrix .							//
+// 		v-0.1 - 14.12.12/FB								//
+//                                  								//
+// Externer Aufruf: 										//
+// Mögliche Werte für die Case-Optionen:							//
+// www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/GROSS_KLEIN.html //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_optCase;
+
+options {output=AST;}
+
+tokens  {CASE; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+//                   PROX-Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+// hier sind die 'englischen' Abkürzungen definiert. Es gibt noch die Entsprechungen in deut. Sprache.
+//CA	:	('fi'|'Fi'|'fu'|'Fu'|'fs'|'Fs'|'fl'|'Fl'|'Os'|'os'|'Oi'|'oi'|'Ou'|'ou'|'Ol'|'ol'|'Ds'|'ds'|'Di'|'di');
+
+fragment CA_FIRST
+	:	('F'|'f');
+fragment CA_OTHER
+	:	('O'|'o');
+fragment CA_HOW
+	:	('s'|'i'|'u'|'l');
+fragment CA_DIA
+	:	('D'|'d');
+		
+CA	:	((CA_FIRST|CA_OTHER) CA_HOW) | ( CA_DIA ('s'|'i') );
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+//                   PROX-Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ 
+optCase	:	ca+=CA+ 
+
+		-> ^(CASE CA+ ) ;
+
+

diff --git a/src/main/antlr/cosmas/c2ps_optCase.tokens b/src/main/antlr/cosmas/c2ps_optCase.tokens
new file mode 100644
index 0000000..829aab0
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_optCase.tokens

@@ -0,0 +1,6 @@
+CA_HOW=7
+CA_OTHER=6
+CA=9
+CASE=4
+CA_DIA=8
+CA_FIRST=5
commit	6003b852456d1e7846fa68b3303cfa901cf038ca	[log] [tgz]
author	Joachim Bingel <joa.bingel@gmail.com>	Thu Dec 18 14:20:55 2014 +0000
committer	Joachim Bingel <joa.bingel@gmail.com>	Thu Dec 18 14:20:55 2014 +0000
tree	8a3df988e1f2fd1430136ab98540eb967ca0ae00
parent	84395b2f3cd9c59177e20bf11e51434f225ec10c [diff]