KorAP-querySerialization independent from
- KorAP-PoliqarpParser
- KorAP-AnnisParser
- KorAP-Cosmas2Parser
- KorAP-lucene-index
grammars integrated into querySerialization
still depends on KorAP-Entities (e.g. JsonUtils used in QuerySerializer and CollectionQueryBuilder)
diff --git a/src/main/antlr/annis/AqlLexer.g4 b/src/main/antlr/annis/AqlLexer.g4
new file mode 100644
index 0000000..a698a3b
--- /dev/null
+++ b/src/main/antlr/annis/AqlLexer.g4
@@ -0,0 +1,113 @@
+/*
+* Copyright 2013 SFB 632.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+lexer grammar AqlLexer;
+
+@header {package de.ids_mannheim.korap.query.parse.annis;}
+
+// additional tokens
+tokens {
+RANGE,
+ANNO,
+FROM_TO,
+DOM
+}
+TOK:'tok';
+NODE:'node';
+META:'meta';
+AND:'&';
+OR:'|';
+EQ_VAL:'==';
+EQ: '=';
+NEQ:'!=';
+DOMINANCE:'>';
+POINTING:'->';
+PRECEDENCE:'.';
+TEST:'%';
+IDENT_COV:'_=_';
+INCLUSION:'_i_';
+OVERLAP:'_o_';
+LEFT_ALIGN:'_l_';
+RIGHT_ALIGN:'_r_';
+LEFT_OVERLAP:'_ol_';
+RIGHT_OVERLAP:'_or_';
+LEFT_CHILD:'@l';
+RIGHT_CHILD:'@r';
+COMMON_PARENT:'$';
+IDENTITY:'_id_';
+ROOT:':root';
+ARITY:':arity';
+TOKEN_ARITY:':tokenarity';
+COMMA:',';
+STAR:'*';
+BRACE_OPEN:'(';
+BRACE_CLOSE:')';
+BRACKET_OPEN:'[';
+BRACKET_CLOSE:']';
+COLON:':';
+DOUBLECOLON:'::';
+SLASH:'/';
+QMARK : '?';
+
+/*
+ * Regular expressions (delimited by slashes in Annis)
+ */
+fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
+fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']' | '/'
+ | '(' | ')' | '|' | '"' | ':' | '\'' | '\\');
+fragment RE_alter : ((RE_char | ('(' REGEX ')') | RE_chgroup) '|' REGEX )+;
+fragment RE_chgroup : '[' RE_char+ ']';
+fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?;
+fragment RE_opt : (RE_char | RE_chgroup | ( '(' REGEX ')')) '?';
+fragment RE_star : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*';
+fragment RE_plus : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+';
+fragment RE_occ : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC;
+fragment RE_group : '(' REGEX ')';
+REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH;
+
+
+WS : ( ' ' | '\t' | '\r' | '\n' )+ -> skip ;
+
+VAR_DEF
+: ('a'..'z'|'A'..'Z') ( '0' .. '9'|'a'..'z'|'A'..'Z')* '#'
+;
+
+REF
+: '#' ( '0' .. '9'|'a'..'z'|'A'..'Z')+
+;
+
+ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_'|'-')*;
+//ID : ('a'..'z'|'A'..'Z'|'_'|'['|']') ('a'..'z'|'A'..'Z'|'0'..'9'|'_'|'-'|'['|']')*;
+
+DIGITS : ('0'..'9')+;
+
+
+
+//START_TEXT_REGEX : '/' -> pushMode(IN_REGEX);
+START_TEXT_PLAIN:'"' -> pushMode(IN_TEXT);
+
+
+
+mode IN_REGEX;
+
+//END_TEXT_REGEX : '/' -> popMode;
+TEXT_REGEX : (~'/'|'\\"')+;
+
+mode IN_TEXT;
+
+END_TEXT_PLAIN : '"' -> popMode;
+TEXT_PLAIN : (~'"'|'\\"')+;
+
+
diff --git a/src/main/antlr/annis/AqlParser.g4 b/src/main/antlr/annis/AqlParser.g4
new file mode 100644
index 0000000..af6a29e
--- /dev/null
+++ b/src/main/antlr/annis/AqlParser.g4
@@ -0,0 +1,177 @@
+/*
+* Copyright 2013 SFB 632.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+parser grammar AqlParser;
+
+@header {package de.ids_mannheim.korap.query.parse.annis;}
+
+options
+{
+language=Java;
+tokenVocab=AqlLexer;
+}
+
+
+// the following start rule should end with an EOF: "(exprTop | regex) EOF". However, this causes
+// trouble with "qName operator textSpec" specifications at the end of the input (see variableExpr rule), while "TOK operator textSpec"
+// works fine, for a strange reason. Until this is further investigated, go without EOF
+start
+: exprTop
+| regex
+;
+
+regex : REGEX;
+
+textSpec
+: START_TEXT_PLAIN END_TEXT_PLAIN # EmptyExactTextSpec
+| START_TEXT_PLAIN content=TEXT_PLAIN END_TEXT_PLAIN # ExactTextSpec
+| regex # RegexTextSpec
+//| START_TEXT_REGEX END_TEXT_REGEX #EmptyRegexTextSpec
+//| START_TEXT_REGEX content=TEXT_REGEX END_TEXT_REGEX # RegexTextSpec
+//| SLASH content=TEXT_REGEX SLASH # RegexTextSpec
+//| SLASH SLASH #EmptyRegexTextSpec
+;
+
+eqOperator
+: EQ
+| NEQ
+;
+
+rangeSpec
+: min=DIGITS (COMMA max=DIGITS)?
+;
+
+qName
+: (foundry '/')? layer
+;
+
+edgeAnno
+: ((foundry '/')? layer eqOperator)? textSpec
+;
+
+edgeSpec
+: BRACKET_OPEN (edgeAnno WS*)+ BRACKET_CLOSE
+;
+
+refOrNode
+: REF # ReferenceRef
+| VAR_DEF? variableExpr # ReferenceNode
+;
+
+precedence
+: PRECEDENCE (layer)? # DirectPrecedence
+| PRECEDENCE (layer)? STAR # IndirectPrecedence
+| PRECEDENCE (layer COMMA?)? rangeSpec #RangePrecedence
+;
+
+dominance
+: DOMINANCE (qName)? (LEFT_CHILD | RIGHT_CHILD)? (anno=edgeSpec)? # DirectDominance
+| DOMINANCE (qName)? STAR # IndirectDominance
+| DOMINANCE (qName)? rangeSpec? # RangeDominance
+;
+
+pointing
+: POINTING qName (anno=edgeSpec)? # DirectPointing
+| POINTING qName (anno=edgeSpec)? STAR # IndirectPointing
+| POINTING qName (anno=edgeSpec)? COMMA? rangeSpec # RangePointing
+;
+
+spanrelation
+: IDENT_COV # IdenticalCoverage
+| LEFT_ALIGN # LeftAlign
+| RIGHT_ALIGN # RightAlign
+| INCLUSION # Inclusion
+| OVERLAP # Overlap
+| RIGHT_OVERLAP # RightOverlap
+| LEFT_OVERLAP # LeftOverlap
+;
+
+commonparent
+: COMMON_PARENT (label)? # CommonParent
+;
+
+commonancestor
+: COMMON_PARENT (label)? STAR # CommonAncestor
+;
+
+identity
+: IDENTITY
+;
+
+equalvalue
+: EQ_VAL
+;
+
+notequalvalue
+: NEQ
+;
+
+operator
+: precedence
+| spanrelation
+| dominance
+| pointing
+| commonparent
+| commonancestor
+| identity
+| equalvalue
+| notequalvalue
+;
+
+foundry
+: ID;
+
+layer
+: ID;
+
+label
+: ID;
+
+n_ary_linguistic_term
+: refOrNode (operator refOrNode)+ # Relation
+;
+
+unary_linguistic_term
+: left=REF ROOT # RootTerm
+| left=REF ARITY EQ rangeSpec # ArityTerm
+| left=REF TOKEN_ARITY EQ rangeSpec # TokenArityTerm
+;
+
+variableExpr
+: qName eqOperator txt=textSpec # AnnoEqTextExpr
+| TOK eqOperator txt=textSpec # TokTextExpr
+| txt=textSpec # TextOnly // shortcut for tok="..."
+| qName # AnnoOnlyExpr
+| TOK # TokOnlyExpr
+| NODE # NodeExpr
+;
+
+expr
+: VAR_DEF variableExpr # NamedVariableTermExpr
+| variableExpr # VariableTermExpr
+| unary_linguistic_term # UnaryTermExpr
+| n_ary_linguistic_term # BinaryTermExpr
+| META DOUBLECOLON id=qName op=EQ txt=textSpec # MetaTermExpr
+;
+
+andTopExpr
+: ((expr (AND expr)*) | (BRACE_OPEN expr (AND expr)* BRACE_CLOSE)) # AndExpr
+;
+
+
+exprTop
+: andTopExpr (OR andTopExpr)* # OrTop
+;
\ No newline at end of file
diff --git a/src/main/antlr/annis/RawAqlPreParser.g4 b/src/main/antlr/annis/RawAqlPreParser.g4
new file mode 100644
index 0000000..9ba0370
--- /dev/null
+++ b/src/main/antlr/annis/RawAqlPreParser.g4
@@ -0,0 +1,36 @@
+/*
+* Copyright 2013 SFB 632.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+parser grammar RawAqlPreParser;
+
+@header {package de.ids_mannheim.korap.query.parse.annis;}
+
+options
+{
+language=Java;
+tokenVocab=AqlLexer;
+}
+
+start
+: expr EOF
+;
+
+expr
+: expr AND expr # AndExpr
+| expr OR expr # OrExpr
+| BRACE_OPEN expr BRACE_CLOSE # BraceExpr
+| ~(AND | OR | BRACE_OPEN | BRACE_CLOSE)+ # LeafExpr
+;
\ No newline at end of file
diff --git a/src/main/antlr/CollectionQuery.g4 b/src/main/antlr/collection/CollectionQuery.g4
similarity index 97%
rename from src/main/antlr/CollectionQuery.g4
rename to src/main/antlr/collection/CollectionQuery.g4
index da73081..869b759 100644
--- a/src/main/antlr/CollectionQuery.g4
+++ b/src/main/antlr/collection/CollectionQuery.g4
@@ -1,6 +1,6 @@
grammar CollectionQuery;
-@header {package de.ids_mannheim.korap.query.serialize.util;}
+@header {package de.ids_mannheim.korap.query.parse.collection;}
/*
-- author: jbingel
diff --git a/src/main/antlr/cosmas/c2ps.g b/src/main/antlr/cosmas/c2ps.g
new file mode 100644
index 0000000..cd3b823
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps.g
@@ -0,0 +1,239 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+// //
+// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax) //
+// globale Grammatik (ruft lokale c2ps_x.g Grammatiken auf). //
+// 17.12.12/FB //
+// v-0.6 //
+// TODO: //
+// - se1: Einsetzen des Default-Operators in den kummulierten AST. //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps;
+
+options {output=AST; backtrack=true; }
+tokens {C2PQ; OPBED; OPTS; OPBEG; OPEND; OPNHIT; OPALL; OPLEM; OPPROX;
+ ARG1; ARG2;
+ OPWF; OPLEM; OPANNOT;
+ OPLABEL;
+ OPIN; OPOV;
+ OPAND;
+ OPOR;
+ OPNOT;
+ OPEXPR1;
+ OPMORPH; OPELEM;
+ }
+
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+@members {
+ private IErrorReporter errorReporter = null;
+ public void setErrorReporter(IErrorReporter errorReporter) {
+ this.errorReporter = errorReporter;
+ }
+ public void emitErrorMessage(String msg) {
+ errorReporter.reportError(msg);
+ }
+}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+WS : (' '|'\r'|'\n')+ {skip();};
+
+// Suchoperator PROX:
+// ------------------
+
+fragment DISTVALUE
+ : ('0' .. '9')+ (':' ('0'..'9')+)? ;
+
+fragment DIST
+ : ('+'|'-')? (DISTVALUE ('w'|'s'|'p'|'t') | ('w'|'s'|'p'|'t') DISTVALUE);
+
+fragment GROUP
+ : ('min' | 'max');
+
+OP_PROX : ('/' | '%') DIST (',' DIST)* (',' GROUP)? ;
+
+OP_IN : '#IN' | '#IN(' OP_IN_OPTS? ')' ;
+
+OP_OV : '#OV' | '#OV(' OP_OV_OPTS? ')' ;
+
+// EAVEXP wird hier eingesetzt für eine beliebige Sequenz von Zeichen bis zu ')'.
+fragment OP_IN_OPTS
+ : EAVEXPR ;
+
+// EAVEXP wird hier eingesetzt für eine beliebige Sequenz von Zeichen bis zu ')'.
+fragment OP_OV_OPTS
+ : EAVEXPR ;
+
+// OP_BED: #BED( searchExp , Bedingung )
+// OP_BED_END = ", Bedingung )"
+// ungelöst: #BED(Jimi Hendrix, sa) -> Komma wird "Hendrix," zugeschlagen!
+// Umgehung: Blank vor dem Komma: #BED(Jimi Hendrix , sa) -> OK.
+
+OP_BED_END
+ : ',' ~(')')+ ')' ;
+
+// OP1: Operator with single argument:
+// (funktioniert nicht: fragment OP1 : OP1BEG | OP1END ...;)
+
+//OP1 : '#BEG(' | '#END(' | '#ALL(' | '#NHIT(' ;
+
+// Labels als Keywords für Suchbegriffe mit besonderer Bedeutung (Ãberschriften, etc.),
+// muss VOR SEARCHWORD1/2 deklariert werden.
+
+SEARCHLABEL
+ : ('<s>' | '<p>' | '<ü>' | '<üd>' | '<üh>' | '<üu>' | '<üz>' | '<ür>');
+
+// Search Word:
+// spezialzeichen werden in "..." versteckt.
+// SEARCHWORD1: single or multiple words not enclosed in "...".
+// SEARCHWORD2: single or multiple words enclosed in "...".
+SEARCHLEMMA
+ : '&' SEARCHWORD1 ; // rewrite rules funktionieren im lexer nicht: -> ^(OPLEM $SEARCHWORD1.text);
+
+// SEARCHWORD2: schluckt Blanks. Diese müssen nachträglich als Wortdelimiter erkannt werden.
+SEARCHWORD1
+ : ~('"' | ' ' | '#' | ')' | '(' )+ ;
+
+SEARCHWORD2
+ : '"' (~('"') | '\\"')+ '"' ;
+
+// Annotationsoperator #ELEM( EAVEXPR ).
+// EAVEXPR = Element Attribut Value Expression.
+// alle Spezialzeichen vor dem Blank ausgeschlossen.
+// e.g. #ELEM(ANA='N pl'); #ELEM(HEAD, TYPE='DACHUEBERSCHRIFT');
+// e.g. #ELEM( ANA='N()' LEM='L\'été');
+
+fragment EAVEXPR
+ : ( ~( '(' | ')' | '\'' | ('\u0000'..'\u001F')) | ('\'' (~('\'' | '\\') | '\\' '\'')* '\'') )+ ;
+
+fragment WORD
+ : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
+ | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' )+;
+
+// "#ELEM()" nur für Fehlerbehandlung, ansonsten sinnlose Anfrage.
+OP_ELEM : '#ELEM(' EAVEXPR ')' | '#ELEM(' ')';
+
+// EAVEXPR ist streng genommen nicht der korrekte Labelname für den Inhalt von MORPH(),
+// hat aber die gleiche Syntax und kann an dieser Stelle eingesetzt werden.
+
+fragment MORPHEXPR
+ : WORD (':' WORD)?
+ | WORD '!'? '=' WORD (':' WORD)?
+ | WORD '/' WORD '!'? '=' WORD (':' WORD)?
+ ;
+
+OP_MORPH: 'MORPH('
+ MORPHEXPR (' '* '&' ' '* MORPHEXPR)*
+ ')' ;
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+
+// options {backtrack=true; k=5;}
+
+c2ps_query
+ : searchExpr EOF -> ^(C2PQ searchExpr);
+
+/* this rule is OK.
+searchExpr
+ : (op1 | searchWord | searchLemma | searchAnnot | searchLabel | '('! searchExpr ')'!)+ (op2^ searchExpr)? ;
+*/
+// trying to modify the rule above for generating arg1 and arg2 in the resulting AST more easily.
+// notes: se1+=searchExpr1 is of type List. -> $se1+ (not {$se1+} !) is the AST of the list.
+searchExpr
+ : (se1+=searchExpr1)+ (op2 se2=searchExpr)?
+
+ -> {$op2.tree != null}? ^({$op2.tree} ^(ARG1 $se1+) ^(ARG2 {$se2.tree}))
+ -> $se1+ ;
+
+searchExpr1
+ : op1 -> {$op1.tree}
+ | searchWord -> {$searchWord.tree}
+ | searchLemma -> {$searchLemma.tree}
+ | searchAnnot -> {$searchAnnot.tree}
+ | searchLabel -> {$searchLabel.tree}
+ | '(' searchExpr ')' -> {$searchExpr.tree};
+
+// Suchbegriff = Suchwort in Hochkommata (word2) oder ohne (word1):
+// aufgegeben: word1+ | '"' word1+ '"' ;
+
+searchWord
+ : word1
+ | word2;
+
+word1 : SEARCHWORD1 -> {c2ps_opWF.check($SEARCHWORD1.text, false, false, $SEARCHWORD1.index)} ;
+
+word2 : SEARCHWORD2 -> {c2ps_opWF.check($SEARCHWORD2.text, true, false, $SEARCHWORD2.index)} ;
+
+// Suchbegriff = Lemma:
+searchLemma
+ : SEARCHLEMMA -> {c2ps_opWF.check($SEARCHLEMMA.text, false, true, $SEARCHLEMMA.index)} ;
+
+// Suchbegriff = Annotationsoperator:
+// (damit Lexer den richtige Token erzeugt, muss OP_ELEM den gesamten
+// Annot-Ausdruck als 1 Token erkennen).
+searchAnnot
+ : OP_ELEM
+ -> ^({c2ps_opELEM.check($OP_ELEM.text,$OP_ELEM.index)})
+ | OP_MORPH
+ -> ^(OPMORPH ^({new CommonTree(new CommonToken(OPMORPH, c2ps_opAnnot.strip($OP_MORPH.text)))}));
+
+// searchLabel: <s>, <p>, <ü> etc.
+
+searchLabel
+ : SEARCHLABEL -> ^(OPLABEL SEARCHLABEL);
+
+// Suchoperatoren:
+// ---------------
+
+// OP2: Suchoperatoren mit 2 Argumenten:
+// -------------------------------------
+
+// Der von op2 zurückgelieferte AST ist automatisch derjenige vom geparsten Operator.
+
+op2 : (opPROX | opIN | opOV | opAND | opOR | opNOT) ;
+
+// AST with Options for opPROX is returned by c2ps_opPROX.check():
+opPROX : OP_PROX -> ^(OPPROX {c2ps_opPROX.check($OP_PROX.text, $OP_PROX.index)} );
+
+opIN : OP_IN -> {c2ps_opIN.check($OP_IN.text, $OP_IN.index)};
+
+opOV : OP_OV -> {c2ps_opOV.check($OP_OV.text, $OP_OV.index)};
+
+opAND : ('und' | 'UND' | 'and' | 'AND') -> ^(OPAND);
+
+opOR : ('oder' | 'ODER' | 'or' | 'OR') -> ^(OPOR);
+
+opNOT : ('nicht' | 'NICHT' | 'not' | 'NOT') -> ^(OPNOT);
+
+// OP1: Suchoperatoren mit 1 Argument:
+// -----------------------------------
+
+op1 : opBEG | opEND | opNHIT | opALL | opBED;
+
+// #BED(serchExpr, B).
+// B muss nachträglich in einer lokalen Grammatik überprüft werden.
+
+opBED : '#BED(' searchExpr opBEDEnd -> ^(OPBED searchExpr ^(OPTS {$opBEDEnd.tree})) ;
+
+// c2ps_opBED.check() returns an AST that is returned by rule opBEDEnd.
+// for this action inside a rewrite rule, no ';' behind the function call.
+opBEDEnd: OP_BED_END -> {c2ps_opBED.check($OP_BED_END.text, $OP_BED_END.index) };
+
+opBEG : ( '#BEG(' | '#LINKS(' ) searchExpr ')' -> ^(OPBEG searchExpr) ;
+
+opEND : ( '#END(' | '#RECHTS(' ) searchExpr ')' -> ^(OPEND searchExpr) ;
+
+opNHIT : ( '#NHIT(' | '#INKLUSIVE(' ) searchExpr ')' -> ^(OPNHIT searchExpr) ;
+
+opALL : ( '#ALL(' | '#EXKLUSIVE(' ) searchExpr ')' -> ^(OPALL searchExpr) ;
diff --git a/src/main/antlr/cosmas/c2ps.tokens b/src/main/antlr/cosmas/c2ps.tokens
new file mode 100644
index 0000000..ae8ad67
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps.tokens
@@ -0,0 +1,78 @@
+OP_ELEM=41
+T__62=62
+OPEND=8
+OP_OV=34
+OPTS=6
+OPAND=20
+T__61=61
+T__60=60
+T__55=55
+T__56=56
+T__57=57
+C2PQ=4
+T__58=58
+OPALL=10
+T__51=51
+T__52=52
+OPNOT=22
+T__53=53
+T__54=54
+MORPHEXPR=42
+SEARCHWORD2=40
+T__59=59
+OPELEM=25
+OPOR=21
+OP_MORPH=43
+T__50=50
+OPPROX=12
+OPOV=19
+T__46=46
+EAVEXPR=35
+T__47=47
+T__44=44
+OPWF=15
+SEARCHLEMMA=39
+T__45=45
+T__48=48
+T__49=49
+OPLABEL=17
+OP_OV_OPTS=33
+DIST=28
+ARG1=13
+SEARCHWORD1=38
+OPNHIT=9
+ARG2=14
+SEARCHLABEL=37
+OP_IN=32
+OP_IN_OPTS=31
+GROUP=29
+OPLEM=11
+WS=26
+OPIN=18
+OPBED=5
+OP_BED_END=36
+OPBEG=7
+OPMORPH=24
+OPANNOT=16
+DISTVALUE=27
+OPEXPR1=23
+OP_PROX=30
+'#ALL('=62
+'NOT'=57
+'NICHT'=55
+'#NHIT('=61
+'OR'=53
+'and'=48
+'UND'=47
+'#END('=60
+'#BED('=58
+'#BEG('=59
+'not'=56
+'('=44
+'oder'=50
+'or'=52
+'nicht'=54
+')'=45
+'und'=46
+'AND'=49
+'ODER'=51
diff --git a/src/main/antlr/cosmas/c2ps_opBED.g b/src/main/antlr/cosmas/c2ps_opBED.g
new file mode 100644
index 0000000..e5d715e
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opBED.g
@@ -0,0 +1,44 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+// //
+// Lokale Grammatik der COSMAS II zeilenorientierten Suchanfragesprache //
+// Dez. 2012/FB //
+// v1.0 //
+// lokale Grammatik für #BED(x, Opts). //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opBED;
+
+options {output=AST;}
+tokens {TPBEG; TPEND; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+TP_POS : ('+'|'-')? ('sa'|'SA'|'se'|'SE'|'pa'|'PA'|'pe'|'PE'|'ta'|'TA'|'te'|'TE') ;
+
+WS : (' ')+ {skip();};
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+
+opBEDOpts
+ : ',' textpos ')' -> textpos ;
+
+textpos : ( tpBeg ('/' tpEnd)? | '/' tpEnd ) -> tpBeg? tpEnd?;
+
+tpBeg : tpExpr -> ^(TPBEG tpExpr);
+
+tpEnd : tpExpr -> ^(TPEND tpExpr);
+
+tpExpr : tpPos (',' tpPos)* -> tpPos*;
+
+tpPos : TP_POS;
diff --git a/src/main/antlr/cosmas/c2ps_opBED.tokens b/src/main/antlr/cosmas/c2ps_opBED.tokens
new file mode 100644
index 0000000..69f86b5
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opBED.tokens
@@ -0,0 +1,10 @@
+TPBEG=4
+WS=7
+TP_POS=6
+T__10=10
+TPEND=5
+T__9=9
+T__8=8
+'/'=10
+','=8
+')'=9
diff --git a/src/main/antlr/cosmas/c2ps_opELEM.g b/src/main/antlr/cosmas/c2ps_opELEM.g
new file mode 100644
index 0000000..5a6730c
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opELEM.g
@@ -0,0 +1,52 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+// //
+// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax) //
+// lokale Grammatik für #ELEM(Expr). //
+// 08.01.13/FB //
+// v-0.2 //
+// TODO: - //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opELEM;
+
+options {output=AST;}
+
+tokens {OPELEM; EMPTY;
+ ELNAME;
+ EQ; NOTEQ;
+ }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+WS : ( ' '|'\t'|'\r'|'\n')+ {$channel=HIDDEN;};
+
+// remove '#' from ID to avoid #ELEM(C) being tokenized as an ID;
+// stating '#' should not start an ID has no effect in ANTLR.
+// ID may contain an escaped ', e.g. l\'été.
+ID : (~('#'|'\''|' '|'='|'!'|'<'|'>'|')') | ('\\' '\''))+;
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+opELEM : '#ELEM(' ')' -> ^(OPELEM EMPTY)
+ | '#ELEM(' elem avExpr* ')' -> ^(OPELEM elem avExpr*)
+ | '#ELEM(' avExpr+ ')' -> ^(OPELEM avExpr+);
+
+elem : ID -> ^(ELNAME ID);
+
+avExpr : id1=ID op id2=ID -> ^(op $id1 $id2)
+ | id1=ID op '\'' id3+=ID+ '\'' -> ^(op $id1 $id3+);
+
+op : '=' -> ^(EQ)
+ | ('<>' | '!=') -> ^(NOTEQ);
+
+
\ No newline at end of file
diff --git a/src/main/antlr/cosmas/c2ps_opELEM.tokens b/src/main/antlr/cosmas/c2ps_opELEM.tokens
new file mode 100644
index 0000000..656fb67
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opELEM.tokens
@@ -0,0 +1,19 @@
+NOTEQ=8
+WS=9
+T__16=16
+T__15=15
+T__12=12
+T__11=11
+T__14=14
+T__13=13
+ELNAME=6
+OPELEM=4
+EQ=7
+ID=10
+EMPTY=5
+'!='=16
+'='=14
+'<>'=15
+'#ELEM('=11
+')'=12
+'\''=13
diff --git a/src/main/antlr/cosmas/c2ps_opIN.g b/src/main/antlr/cosmas/c2ps_opIN.g
new file mode 100644
index 0000000..da14af2
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opIN.g
@@ -0,0 +1,67 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+// //
+// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax) //
+// lokale Grammatik für #IN() und #IN(Options). //
+// 17.12.12/FB //
+// v-0.1 //
+// //
+// Opts nimmt eine oder mehrere, durch Kommata getrennte Optionen auf: //
+// - Bereichsoptionen (RANGE): ALL, HIT, -. //
+// - Positionsoptionen (POS): L, R, F, FE, FI, N, -. //
+// - Ausschließungsoptionen: %, -. //
+// - Gruppenbildungsoptionen (GROUP): min, max, -. //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opIN;
+
+options {output=AST;}
+tokens {OPIN;
+ RANGE; ALL; HIT;
+ POS;
+ EXCL; YES;
+ GROUP; MIN; MAX; }
+
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+WS : (' ')+ {skip();};
+
+POSTYP : ('L'|'l'|'R'|'r'|'F'|'f'|'FE'|'fe'|'FI'|'fi'|'N'|'n' );
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+opIN : '#IN' -> ^(OPIN)
+ | '#IN(' opts? ')' -> ^(OPIN opts?);
+
+opts : opt (',' opt)*
+
+ -> opt*;
+
+opt : (optRange |optPos | optExcl | optGrp);
+
+// Bereich:
+optRange: ('ALL' | 'all') -> ^(RANGE ALL)
+ | ('HIT' | 'hit') -> ^(RANGE HIT);
+
+// Position:
+optPos : POSTYP
+
+ -> ^(POS POSTYP);
+
+optExcl : '%'
+
+ -> ^(EXCL YES);
+
+optGrp : ('MIN' | 'min') -> ^(GROUP MIN)
+ | ('MAX' | 'max') -> ^(GROUP MAX) ;
+
diff --git a/src/main/antlr/cosmas/c2ps_opIN.tokens b/src/main/antlr/cosmas/c2ps_opIN.tokens
new file mode 100644
index 0000000..a49b6a9
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opIN.tokens
@@ -0,0 +1,38 @@
+T__28=28
+T__27=27
+T__26=26
+T__25=25
+T__24=24
+T__23=23
+T__22=22
+T__21=21
+T__20=20
+MAX=13
+YES=10
+RANGE=5
+EXCL=9
+MIN=12
+T__19=19
+GROUP=11
+WS=14
+T__16=16
+OPIN=4
+T__18=18
+T__17=17
+POS=8
+ALL=6
+HIT=7
+POSTYP=15
+'#IN('=17
+'HIT'=22
+'all'=21
+'MIN'=25
+'#IN'=16
+'hit'=23
+'min'=26
+','=19
+')'=18
+'ALL'=20
+'MAX'=27
+'max'=28
+'%'=24
diff --git a/src/main/antlr/cosmas/c2ps_opOV.g b/src/main/antlr/cosmas/c2ps_opOV.g
new file mode 100644
index 0000000..a80db9b
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opOV.g
@@ -0,0 +1,58 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+// //
+// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax) //
+// lokale Grammatik für #OV() und #OV(Options). //
+// 17.12.12/FB //
+// v-0.1 //
+// //
+// Opts nimmt eine oder mehrere, durch Kommata getrennte Optionen auf: //
+// - Positionsoptionen (POS): L, R, F, FE, FI, X, -. //
+// - Ausschließungsoptionen (EXCL): %, -. //
+// - Gruppenbildungsoptionen (GROUP): min, max, -. //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opOV;
+
+options {output=AST;}
+tokens {OPOV;
+ POS;
+ EXCL; YES;
+ GROUP; MIN; MAX; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+WS : (' ')+ {skip();};
+
+POSTYP : 'L'|'l'|'R'|'r'|'F'|'f'|'FE'|'fe'|'FI'|'fi'|'N'|'n'|'X'|'x' ;
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+opOV : '#OV' -> ^(OPOV)
+ | '#OV(' opts? ')' -> ^(OPOV opts?);
+
+opts : opt (',' opt)* -> opt*;
+
+opt : (optPos | optExcl | optGrp);
+
+// Position:
+optPos : POSTYP
+
+ -> ^(POS POSTYP);
+
+optExcl : '%'
+
+ -> ^(EXCL YES);
+
+optGrp : ('MIN' | 'min') -> ^(GROUP MIN)
+ | ('MAX' | 'max') -> ^(GROUP MAX) ;
+
diff --git a/src/main/antlr/cosmas/c2ps_opOV.tokens b/src/main/antlr/cosmas/c2ps_opOV.tokens
new file mode 100644
index 0000000..5b66e6c
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opOV.tokens
@@ -0,0 +1,27 @@
+T__21=21
+T__20=20
+MAX=10
+YES=7
+EXCL=6
+MIN=9
+T__19=19
+GROUP=8
+WS=11
+T__16=16
+T__15=15
+T__18=18
+T__17=17
+POS=5
+T__14=14
+T__13=13
+POSTYP=12
+OPOV=4
+'#OV'=13
+'min'=19
+','=16
+')'=15
+'MAX'=20
+'MIN'=18
+'#OV('=14
+'max'=21
+'%'=17
diff --git a/src/main/antlr/cosmas/c2ps_opPROX.g b/src/main/antlr/cosmas/c2ps_opPROX.g
new file mode 100644
index 0000000..f7a42f5
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opPROX.g
@@ -0,0 +1,78 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+// //
+// lokale Grammatik der COSMAS II zeilenorientierten Suchanfragesprache (= c2ps) //
+// für den Abstandsoperator /w... und %w... //
+// v-1.0 - 07.12.12/FB //
+// //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opPROX;
+
+options {output=AST;}
+
+tokens { PROX_OPTS;
+ TYP; PROX; EXCL;
+ DIST_LIST; DIST; RANGE; VAL0;
+ MEAS; // measure
+ DIR; PLUS; MINUS; BOTH;
+ GRP; MIN; MAX; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// PROX-Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+DISTVALUE
+ : ('0' .. '9')+ ;
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// PROX-Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+
+opPROX : proxTyp proxDist (',' proxDist)* (',' proxGroup)?
+
+ -> ^(PROX_OPTS {$proxTyp.tree} ^(DIST_LIST proxDist+) {$proxGroup.tree});
+
+proxTyp : '/' -> ^(TYP PROX) // klassischer Abstand.
+ | '%' -> ^(TYP EXCL); // ausschließender Abstand.
+
+// proxDist: e.g. +5w or -s0 or /w2:4 etc.
+// kein proxDirection? hier, weil der Default erst innerhalb von Regel proxDirection erzeugt werden kann.
+proxDist: proxDirection (v1=proxDistValue m1=proxMeasure | m2=proxMeasure v2=proxDistValue)
+
+ -> {$v1.tree != null}? ^(DIST {$proxDirection.tree} {$v1.tree} {$m1.tree})
+ -> ^(DIST {$proxDirection.tree} {$v2.tree} {$m2.tree});
+
+proxDirection
+ : (p='+'|m='-')? -> {$p != null}? ^(DIR PLUS)
+ -> {$m != null}? ^(DIR MINUS)
+ -> ^(DIR BOTH) ;
+/*
+proxDistValue // proxDistMin ( ':' proxDistMax)? ;
+ : (m1=proxDistMin -> ^(DIST_RANGE VAL0 $m1)) (':' m2=proxDistMax -> ^(DIST_RANGE $m1 $m2))? ;
+*/
+proxDistValue // proxDistMin ( ':' proxDistMax)? ;
+ : (m1=proxDistMin ) (':' m2=proxDistMax)?
+
+ -> {$m2.text != null}? ^(RANGE $m1 $m2)
+ -> ^(RANGE VAL0 $m1);
+
+proxMeasure
+ : (m='w'|m='s'|m='p'|m='t') -> ^(MEAS $m);
+
+proxDistMin
+ : DISTVALUE;
+
+proxDistMax
+ : DISTVALUE;
+
+proxGroup
+ : 'min' -> ^(GRP MIN)
+ | 'max' -> ^(GRP MAX);
+
diff --git a/src/main/antlr/cosmas/c2ps_opPROX.tokens b/src/main/antlr/cosmas/c2ps_opPROX.tokens
new file mode 100644
index 0000000..ff027fd
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opPROX.tokens
@@ -0,0 +1,39 @@
+MEAS=12
+T__29=29
+T__28=28
+T__27=27
+T__26=26
+T__25=25
+T__24=24
+T__23=23
+T__22=22
+T__21=21
+MAX=19
+DIST=9
+PROX=6
+RANGE=10
+TYP=5
+EXCL=7
+MIN=18
+MINUS=15
+PROX_OPTS=4
+DIST_LIST=8
+VAL0=11
+T__30=30
+T__31=31
+DIR=13
+GRP=17
+BOTH=16
+PLUS=14
+DISTVALUE=20
+'w'=27
+'p'=29
+'/'=22
+'s'=28
+':'=26
+'-'=25
+'min'=30
+'+'=24
+','=21
+'max'=31
+'%'=23
diff --git a/src/main/antlr/cosmas/c2ps_opWF.g b/src/main/antlr/cosmas/c2ps_opWF.g
new file mode 100644
index 0000000..200a21a
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opWF.g
@@ -0,0 +1,74 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+// //
+// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax) //
+// lokale Grammatik für Wortformen //
+// (Bsp: Hendrix, Hendrix:sa/-pe, :fi:Hendrix, etc. ) //
+// v-0.3 - 10.01.13/FB //
+// //
+// Strategie: //
+// - Input string: :cccc:wwww:ppp //
+// - diese Grammatik trennt ccc, www und ppp voneinander, ccc und ppp werden in weiteren //
+// lokalen Grammatiken zerlegt. //
+// - Begründung: weil die Tokens in ccc, www und ppp sich überschneiden, ist eine große //
+// Grammatik unnötig umständlich. //
+// - Bsp.: :FiOlDs:Würde:sa/-pe,-te -> c=FiOlDs + w=Würde + p=sa/-pe,-te. //
+// Mögliche Werte für die Case-Optionen: //
+// www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/GROSS_KLEIN.html //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_opWF;
+
+options {output=AST;}
+
+tokens { OPWF; OPLEM; OPTCASE; TPOS; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// PROX-Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+fragment STRING
+ : ( ~(':' | ' ') | '\\:' )+ ;
+
+Case : ':' STRING ':';
+
+TPos : ':' STRING;
+
+WF : STRING;
+
+WS : (' ')+ {skip();};
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// PROX-Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+searchWFs
+ : searchWF+;
+
+searchWF: optCase? wordform tpos?
+
+ -> ^(OPWF wordform optCase? tpos? ) ;
+
+wordform: WF -> {c2ps_opWF.encode($WF.text, OPWF)};
+
+// Case Options:
+optCase : Case
+
+ -> {c2ps_optCase.check($Case.text, $Case.index)} ;
+
+// textposition Options:
+tpos : TPos
+
+ -> ^(TPOS {c2ps_opBED.checkTPos($TPos.text, $TPos.index)});
+
+// analog für Lemmata, kein optCase:
+searchLEM
+ : wordform tpos?
+
+ -> ^(OPLEM wordform tpos?);
+
diff --git a/src/main/antlr/cosmas/c2ps_opWF.tokens b/src/main/antlr/cosmas/c2ps_opWF.tokens
new file mode 100644
index 0000000..4f921af
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_opWF.tokens
@@ -0,0 +1,9 @@
+OPLEM=5
+OPTCASE=6
+WS=12
+OPWF=4
+TPOS=7
+TPos=10
+WF=11
+Case=9
+STRING=8
diff --git a/src/main/antlr/cosmas/c2ps_optCase.g b/src/main/antlr/cosmas/c2ps_optCase.g
new file mode 100644
index 0000000..e24fa77
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_optCase.g
@@ -0,0 +1,51 @@
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+// //
+// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax) //
+// lokale Grammatik für Option 'Case'. //
+// (Bsp: :fi: in :fi:Hendrix . //
+// v-0.1 - 14.12.12/FB //
+// //
+// Externer Aufruf: //
+// Mögliche Werte für die Case-Optionen: //
+// www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/GROSS_KLEIN.html //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+grammar c2ps_optCase;
+
+options {output=AST;}
+
+tokens {CASE; }
+@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// PROX-Lexer
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+// hier sind die 'englischen' Abkürzungen definiert. Es gibt noch die Entsprechungen in deut. Sprache.
+//CA : ('fi'|'Fi'|'fu'|'Fu'|'fs'|'Fs'|'fl'|'Fl'|'Os'|'os'|'Oi'|'oi'|'Ou'|'ou'|'Ol'|'ol'|'Ds'|'ds'|'Di'|'di');
+
+fragment CA_FIRST
+ : ('F'|'f');
+fragment CA_OTHER
+ : ('O'|'o');
+fragment CA_HOW
+ : ('s'|'i'|'u'|'l');
+fragment CA_DIA
+ : ('D'|'d');
+
+CA : ((CA_FIRST|CA_OTHER) CA_HOW) | ( CA_DIA ('s'|'i') );
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// PROX-Parser
+//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+optCase : ca+=CA+
+
+ -> ^(CASE CA+ ) ;
+
+
diff --git a/src/main/antlr/cosmas/c2ps_optCase.tokens b/src/main/antlr/cosmas/c2ps_optCase.tokens
new file mode 100644
index 0000000..829aab0
--- /dev/null
+++ b/src/main/antlr/cosmas/c2ps_optCase.tokens
@@ -0,0 +1,6 @@
+CA_HOW=7
+CA_OTHER=6
+CA=9
+CASE=4
+CA_DIA=8
+CA_FIRST=5
diff --git a/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4 b/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
new file mode 100644
index 0000000..d34475a
--- /dev/null
+++ b/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
@@ -0,0 +1,103 @@
+lexer grammar PoliqarpPlusLexer;
+
+@header {package de.ids_mannheim.korap.query.parse.poliqarpplus;}
+
+
+
+options
+{
+language=Java;
+}
+
+/*
+ -- author: Joachim Bingel
+ -- date: 14-06-27
+
+ Poliqarp Query Language lexer
+
+ Language documentations:
+ - Adam Przepiórkowski (2004):
+ "The IPI PAN Corpus -- preliminary version", pp. 44
+
+ Further information:
+ - http://korpus.pl/index.php?page=poliqarp
+ Statistical extension:
+ - http://nlp.ipipan.waw.pl/Poliqarp/
+ Based on CQP
+ - http://cwb.sourceforge.net/files/CQP_Tutorial/
+
+Todo: Some special characters aren't supported in REGEX and strings.
+Todo: tags can be splittet at ':' in case the fieldname is 'tag'
+
+*/
+
+POSITION_OP : ('contains' | 'startswith' | 'startsWith' | 'endswith' | 'endsWith' | 'matches' | 'overlaps') ;
+RELATION_OP : ('dominates' | 'relatesTo');
+MATCH_OP : ('focus' | 'shrink' | 'split'); // submatch and shrink are deprecated!
+SUBMATCH_OP : 'submatch';
+META : 'meta';
+
+/*
+ Regular expression
+ /x allows submatches like /^.*?RE.*?$/
+ /X forces full matches
+ /i means case insensitivity
+ /I forces case sensitivity
+*/
+FLAG_xi : '/' ( ('x'|'X') ('i'|'I')? );
+FLAG_ix : '/' ( ('i'|'I') ('x'|'X')? );
+
+
+
+/** Simple strings and Simple queries */
+WS : [ \t] -> skip ;
+fragment FOCC : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
+fragment NO_RE : ~[ \t\/];
+fragment ALPHABET : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
+ | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' );
+NUMBER : [0-9]+;
+
+NL : [\r\n] -> skip;
+
+
+WORD : ALPHABET+;
+
+
+/* Complex queries */
+LPAREN : '[';
+RPAREN : ']';
+LRPAREN : '(';
+RRPAREN : ')';
+NEG : '!';
+QMARK : '?';
+CONJ : '&';
+DISJ : '|';
+COMMA : ',';
+LT : '<';
+GT : '>';
+LBRACE : '{';
+RBRACE : '}';
+SLASH : '/';
+COLON : ':';
+TILDE : '~';
+EQ : '=';
+CARET : '^';
+STAR : '*';
+PLUS : '+';
+EMPTYREL : '@';
+
+/* Regular expressions and Regex queries */
+fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']'
+ | '(' | ')' | '|' | '"' | ':' | '\'' | '\\');
+fragment RE_alter : ((RE_char | ('(' RE_expr ')') | RE_chgroup) '|' RE_expr )+;
+fragment RE_chgroup : '[' RE_char+ ']';
+fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?;
+fragment RE_opt : (RE_char | RE_chgroup | ( '(' RE_expr ')')) '?';
+fragment RE_star : (RE_char | RE_chgroup | ( '(' RE_expr ')')) '*';
+fragment RE_plus : (RE_char | RE_chgroup | ( '(' RE_expr ')')) '+';
+fragment RE_occ : (RE_char | RE_chgroup | ( '(' RE_expr ')')) FOCC;
+fragment RE_group : '(' RE_expr ')';
+fragment RE_expr : ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)+;
+fragment RE_dquote : '"' (RE_expr | '\'' | ':' )* '"';
+fragment RE_squote : '\'' (RE_expr | '\"' | ':' )* '\'';
+REGEX : ( RE_dquote | RE_squote );
\ No newline at end of file
diff --git a/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4 b/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
new file mode 100644
index 0000000..dc876d9
--- /dev/null
+++ b/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
@@ -0,0 +1,228 @@
+parser grammar PoliqarpPlusParser;
+
+@header {package de.ids_mannheim.korap.query.parse.poliqarpplus;}
+
+options
+{
+language=Java;
+tokenVocab=PoliqarpPlusLexer;
+}
+/*
+ -- author: Joachim Bingel
+ -- date: 14-06-27
+
+ Poliqarp Query Language parser
+
+ Language documentations:
+ - Adam Przepiórkowski (2004):
+ "The IPI PAN Corpus -- preliminary version", pp. 44
+
+ Further information:
+ - http://korpus.pl/index.php?page=poliqarp
+ Statistical extension:
+ - http://nlp.ipipan.waw.pl/Poliqarp/
+ Based on CQP
+ - http://cwb.sourceforge.net/files/CQP_Tutorial/
+
+Todo: Some special characters aren't supported in REGEX and strings.
+Todo: tags can be splittet at ':' in case the fieldname is 'tag'
+
+*/
+
+
+flag:
+FLAG_xi | FLAG_ix
+;
+
+boolOp
+: CONJ | DISJ
+;
+
+regex
+: REGEX
+;
+
+key
+: WORD
+| regex
+| NUMBER
+;
+
+foundry
+: WORD
+;
+
+layer
+: WORD
+;
+
+value
+: WORD
+| NUMBER
+| regex
+;
+
+/* Fields */
+term
+: NEG* (foundry SLASH)? layer termOp key (COLON value)? flag?
+;
+
+termOp
+: (NEG? EQ? EQ | NEG? TILDE? TILDE)
+;
+
+min
+: NUMBER
+;
+
+max
+: NUMBER
+;
+
+startpos
+: NUMBER
+;
+
+length
+: NUMBER
+;
+
+range
+: LBRACE
+ ( min COMMA max
+ | max
+ | COMMA max
+ | min COMMA
+ )
+ RBRACE
+;
+
+emptyToken
+: LPAREN RPAREN
+;
+
+termGroup
+: (term | LRPAREN termGroup RRPAREN) boolOp (term | LRPAREN termGroup RRPAREN | termGroup)
+;
+
+repetition
+: kleene
+| range
+;
+
+kleene
+: QMARK
+| STAR
+| PLUS
+;
+
+token
+: NEG*
+ ( LPAREN term RPAREN
+ | LPAREN termGroup RPAREN
+ | key flag?
+ )
+;
+
+span
+: LT ((foundry SLASH)? layer termOp)? key NEG* (LRPAREN term RRPAREN|LRPAREN termGroup RRPAREN)? GT
+| LT ((foundry SLASH)? layer termOp)? key NEG* (term|termGroup)? GT
+;
+
+position
+: POSITION_OP LRPAREN (segment|sequence) COMMA (segment|sequence) RRPAREN
+;
+
+relation
+: RELATION_OP LRPAREN ((EMPTYREL|relSpec)? repetition? COLON)? (segment|sequence) COMMA (segment|sequence) RRPAREN
+;
+
+relSpec
+: (foundry SLASH)? layer (termOp key)?
+;
+
+submatch
+: SUBMATCH_OP LRPAREN startpos COMMA (length)? COLON (segment|sequence) RRPAREN
+;
+
+matching
+: (SUBMATCH_OP|MATCH_OP) LRPAREN spanclass_id? (segment|sequence)? RRPAREN
+;
+
+alignment
+: CARET (segment|sequence)
+;
+
+disjunction
+: (segment|sequence|group) (DISJ (segment|sequence|group))+
+;
+
+group
+: LRPAREN ( disjunction | sequence ) RRPAREN
+;
+
+spanclass_id
+: NUMBER (boolOp NUMBER)* COLON
+;
+
+emptyTokenSequence
+: (emptyToken repetition?)+
+;
+
+emptyTokenSequenceClass
+: LBRACE spanclass_id? emptyTokenSequence RBRACE // class defined around empty tokens
+;
+
+
+distance
+: emptyTokenSequence
+;
+
+spanclass
+: LBRACE spanclass_id? (segment|sequence) RBRACE
+;
+
+segment
+: ( position
+ | token
+ | span
+ | group
+ | spanclass
+ | matching
+ | submatch
+ | relation
+ | LRPAREN segment RRPAREN
+ )
+ repetition?
+;
+
+sequence
+: segment+ (emptyTokenSequence|emptyTokenSequenceClass) // ordering important! this subrule must precede any 'distance'-subrules to give precedence to repetition-interpretation of numbers in braces (could be mistaken for number tokens in spanclass), e.g. {2}.
+| segment segment+
+| (emptyTokenSequence|emptyTokenSequenceClass) (segment+ | sequence) (emptyTokenSequence|emptyTokenSequenceClass)?
+| segment (distance|emptyTokenSequenceClass) segment
+| segment (distance|emptyTokenSequenceClass)? sequence
+| segment+ alignment
+;
+
+
+/** Entry point for linguistic queries */
+
+query
+: segment | sequence | disjunction
+;
+
+
+/**
+ === META section ===
+ defines metadata filters on request
+*/
+
+meta : META metaTermGroup;
+metaTermGroup : ( term | termGroup )+;
+
+/**
+ Entry point for all requests. Linguistic query is obligatory, metadata filtering
+ is optional.
+*/
+request : query meta? EOF;
\ No newline at end of file
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/IErrorReporter.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/IErrorReporter.java
new file mode 100644
index 0000000..9f92c7b
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/IErrorReporter.java
@@ -0,0 +1,5 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+public interface IErrorReporter {
+ void reportError(String error);
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opAnnot.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opAnnot.java
new file mode 100644
index 0000000..4bb62a2
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opAnnot.java
@@ -0,0 +1,37 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import java.io.*;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.DebugEventSocketProxy;
+import org.antlr.runtime.tree.*;
+
+/* COSMAS II Plain Syntax (c2ps).
+ * lokale Grammatik für Optionen von #IN(Opts).
+ * 12.12.12/FB
+ *
+ * strip(): MORPH(NP sg nom) -> NP sg nom.
+ */
+
+public class c2ps_opAnnot
+
+{
+
+ public static String strip(String input)
+ {
+ if( input.startsWith("MORPH(") )
+ {
+ input = input.substring(6,input.length()-1);
+ }
+
+ return input;
+ }
+
+ /*
+ * main: testprogram:
+ */
+
+ public static void main(String args[]) throws Exception
+ {
+ } // main
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java
new file mode 100644
index 0000000..a6dc19b
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java
@@ -0,0 +1,101 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import java.io.*;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.DebugEventSocketProxy;
+import org.antlr.runtime.tree.*;
+
+// parses Opts in #BED(x,Opts):
+
+public class c2ps_opBED
+
+{
+
+ public static Tree check(String input, int index)
+ {
+ ANTLRStringStream
+ ss = new ANTLRStringStream(input);
+ c2ps_opBEDLexer
+ lex = new c2ps_opBEDLexer(ss);
+ CommonTokenStream tokens =
+ new CommonTokenStream(lex);
+ c2ps_opBEDParser
+ g = new c2ps_opBEDParser(tokens);
+ c2ps_opBEDParser.opBEDOpts_return
+ c2PQReturn = null;
+
+ /*
+ System.out.println("check opBED: " + index + ": " + input);
+ System.out.flush();
+ */
+
+ try
+ {
+ c2PQReturn = g.opBEDOpts();
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+
+ // AST Tree anzeigen:
+ Tree tree = (Tree)c2PQReturn.getTree();
+ //System.out.println("#BED Opts: " + tree.toStringTree() );
+
+ return tree;
+ }
+
+ /*
+ * check Text Position starting at rule textpos.
+ */
+
+ public static Tree checkTPos(String input, int index)
+ {
+ ANTLRStringStream
+ ss = new ANTLRStringStream(input);
+ c2ps_opBEDLexer
+ lex = new c2ps_opBEDLexer(ss);
+ CommonTokenStream tokens =
+ new CommonTokenStream(lex);
+ c2ps_opBEDParser
+ g = new c2ps_opBEDParser(tokens);
+ c2ps_opBEDParser.textpos_return
+ c2PQReturn = null;
+
+ /*
+ System.out.println("check opBED: " + index + ": " + input);
+ System.out.flush();
+ */
+
+ try
+ {
+ c2PQReturn = g.textpos();
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+
+ // AST Tree anzeigen:
+ Tree tree = (Tree)c2PQReturn.getTree();
+ // System.out.println("#BED Opts: " + tree.toStringTree() );
+
+ return tree;
+ }
+
+ public static void main(String args[]) throws Exception
+ {
+ String[]
+ input = {",sa,se,-ta,-te/pa,-pe)", ",sa)", ",/pa,-pe)"};
+ Tree
+ tree;
+
+ for(int i=0; i<input.length; i++)
+ {
+ tree = check(input[i], 0);
+ System.out.println("Parsing input: " + input[i] + ": " + tree.toStringTree());
+ }
+
+ } // main
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opELEM.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opELEM.java
new file mode 100644
index 0000000..9968e24
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opELEM.java
@@ -0,0 +1,79 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import java.io.*;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.DebugEventSocketProxy;
+import org.antlr.runtime.tree.*;
+
+// parses Search Expression inside #ELEM(...):
+
+public class c2ps_opELEM
+
+{
+
+ /* Method check():
+ * input: e.g. #ELEM(S), #ELEM(W ANA='DET ADJ'),
+ * #ELEM(ANA <> 'V sg' TYP !=VP), etc.
+ */
+ public static Tree check(String input, int index)
+ {
+ ANTLRStringStream
+ ss = new ANTLRStringStream(input);
+ c2ps_opELEMLexer
+ lex = new c2ps_opELEMLexer(ss);
+ CommonTokenStream tokens =
+ new CommonTokenStream(lex);
+ c2ps_opELEMParser
+ g = new c2ps_opELEMParser(tokens);
+ c2ps_opELEMParser.opELEM_return
+ c2PQReturn = null;
+
+ /*
+ System.out.println("check opELEM: " + index + ": " + "'" + input + "'");
+ System.out.flush();
+ */
+
+ try
+ {
+ c2PQReturn = g.opELEM();
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+
+ // AST Tree anzeigen:
+ Tree tree = (Tree)c2PQReturn.getTree();
+ //System.out.println("#ELEM Opts: " + tree.toStringTree() );
+
+ return tree;
+ }
+
+
+ /*
+ * main - Testprogramm for #ELEM(...)
+ */
+
+ public static void main(String args[]) throws Exception
+ {
+ String[]
+ input = {"#ELEM()",
+ "#ELEM( )",
+ "#ELEM(S)",
+ "#ELEM(W ANA='DET ADV')",
+ "#ELEM( TITLE TYPE!=Unterüberschrift )",
+ "#ELEM(v='a b c' w!='d e f' x=y )",
+ "#ELEM(flexion='l\\'été' lemma='été')"};
+ Tree
+ tree;
+
+ for(int i=0; i<input.length; i++)
+ {
+ System.out.println("#ELEM input: " + input[i]);
+ tree = check(input[i], 0);
+ System.out.println("#ELEM AST : " + tree.toStringTree());
+ }
+
+ } // main
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opIN.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opIN.java
new file mode 100644
index 0000000..7d50a90
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opIN.java
@@ -0,0 +1,83 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import java.io.*;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.DebugEventSocketProxy;
+import org.antlr.runtime.tree.*;
+
+/* COSMAS II Plain Syntax (c2ps).
+ * lokale Grammatik für Optionen von #IN(Opts).
+ * 12.12.12/FB
+ *
+ * check(input): Input Bsp.: "#IN", "#IN()", "#IN(L)", "#IN(L,min,%)", etc.
+ *
+ * Opts nimmt eine oder mehrere, durch Kommata getrennte Optionen auf:
+ * - Bereichsoptionen: ALL, HIT, -.
+ * - Positionsoptionen: L, R, F, FE, FI, N, -.
+ * - Ausschließungsoptionen: %, -.
+ * - Gruppenbildungsoptionen: min, max, -.
+ * Für die Nutzung ohne Optionen steht Operator #IN zur Verfügung.
+ */
+
+public class c2ps_opIN
+
+{
+
+ public static Tree check(String input, int index)
+ {
+ ANTLRStringStream
+ ss = new ANTLRStringStream(input);
+ c2ps_opINLexer
+ lex = new c2ps_opINLexer(ss);
+ CommonTokenStream tokens =
+ new CommonTokenStream(lex);
+ c2ps_opINParser
+ g = new c2ps_opINParser(tokens);
+ c2ps_opINParser.opIN_return
+ c2PQReturn = null;
+
+ /*
+ System.out.println("check opIN:" + index + ": " + input);
+ System.out.flush();
+ */
+
+ try
+ {
+ c2PQReturn = g.opIN();
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+
+ // AST Tree anzeigen:
+ Tree tree = (Tree)c2PQReturn.getTree();
+ // System.out.println("opIN: " + tree.toStringTree() );
+
+ return tree;
+ }
+
+ /*
+ * main: testprogram:
+ */
+
+ public static void main(String args[]) throws Exception
+ {
+ String[]
+ input = {"#IN", "#IN()", "#IN(L)", "#IN(FE,min)", "#IN(R,%,max)", "#IN(FI,ALL)",
+ "#IN(FE,ALL,%,MIN)"};
+ Tree
+ tree;
+
+ System.out.println("Tests von #IN-Optionen:\n");
+
+ for(int i=0; i<input.length; i++)
+ {
+ tree = check(input[i], 0);
+ System.out.println("#IN: input: " + input[i]);
+ System.out.println("#IN: AST : " + tree.toStringTree() + "\n");
+ }
+
+ } // main
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opOV.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opOV.java
new file mode 100644
index 0000000..c47160c
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opOV.java
@@ -0,0 +1,82 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import java.io.*;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.DebugEventSocketProxy;
+import org.antlr.runtime.tree.*;
+
+/* COSMAS II Plain Syntax (c2ps).
+ * lokale Grammatik für Optionen von #OV(Opts).
+ * 12.12.12/FB
+ *
+ * Input Bsp.: "#OV", "#OV()", "#OV(L)", "#OV(F,%,max)", etc.
+ *
+ * Opts nimmt eine oder mehrere, durch Kommata getrennte Optionen auf:
+ * - Positionsoptionen: L, R, F, FE, FI, X, -.
+ * - Ausschließungsoptionen: %, -.
+ * - Gruppenbildungsoptionen: min, max, -.
+ * Falls keine Optionen eingesetzt werden, kann der Operator #OV eingesetzt werden.
+ */
+
+public class c2ps_opOV
+
+{
+
+ public static Tree check(String input, int index)
+ {
+ ANTLRStringStream
+ ss = new ANTLRStringStream(input);
+ c2ps_opOVLexer
+ lex = new c2ps_opOVLexer(ss);
+ CommonTokenStream tokens =
+ new CommonTokenStream(lex);
+ c2ps_opOVParser
+ g = new c2ps_opOVParser(tokens);
+ c2ps_opOVParser.opOV_return
+ c2PQReturn = null;
+
+ try
+ {
+ c2PQReturn = g.opOV();
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+
+ // AST Tree anzeigen:
+ Tree tree = (Tree)c2PQReturn.getTree();
+ // System.out.println("opOV: " + tree.toStringTree() );
+
+ return tree;
+ }
+
+ /*
+ * main: testprogram:
+ */
+
+ // TODOO: input "OV()" führt zu unendlichem loop... 19.12.12/FB
+ // TODOO: input "#OV(FI,ALL)" -> loop, weil ALL nicht bekannter Token...
+
+ public static void main(String args[]) throws Exception
+ {
+ String[]
+ input = {"#OV", "#OV()", "#OV(L)", "#OV(FE,min)", "#OV(R,% , max)"};
+ Tree
+ tree;
+
+ System.out.println("Tests von #OV-Optionen:\n");
+
+ for(int i=0; i<input.length; i++)
+ {
+ System.out.println("#OV: input: " + input[i]);
+ tree = check(input[i], 0);
+ System.out.println("#OV: AST : " + tree.toStringTree() + "\n");
+ }
+
+ System.out.println("Tests von #OV-Optionen: quit.\n");
+ System.out.flush();
+ } // main
+
+}
+
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
new file mode 100644
index 0000000..4d8561d
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
@@ -0,0 +1,79 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import java.io.*;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.DebugEventSocketProxy;
+import org.antlr.runtime.tree.*;
+
+/*
+ * parses Opts of PROX: /w3:4,s0,min or %w3:4,s0,min.
+ */
+
+public class c2ps_opPROX
+
+{
+
+ public static Tree check(String input, int index)
+ {
+ ANTLRStringStream
+ ss = new ANTLRStringStream(input);
+ c2ps_opPROXLexer
+ lex = new c2ps_opPROXLexer(ss);
+ CommonTokenStream tokens =
+ new CommonTokenStream(lex);
+ c2ps_opPROXParser
+ g = new c2ps_opPROXParser(tokens);
+ c2ps_opPROXParser.opPROX_return
+ c2PQReturn = null;
+
+ /*
+ System.out.println("check opPROX:" + index + ": " + input);
+ System.out.flush();
+ */
+
+ try
+ {
+ c2PQReturn = g.opPROX();
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+
+ // AST Tree anzeigen:
+ Tree tree = (Tree)c2PQReturn.getTree();
+ //System.out.println("PROX: " + tree.toStringTree() );
+
+ return tree;
+ }
+
+ /*
+ * main testprogram:
+ */
+
+ public static void main(String args[]) throws Exception
+ {
+ String[]
+ input = {"/w1:3", "%w5", "/+w3,s0,max"};
+ Tree
+ tree;
+
+ System.out.println("Tests von PROX-Optionen:\n");
+
+ for(int i=0; i<input.length; i++)
+ {
+ tree = check(input[i], 0);
+ System.out.println("PROX: input: " + input[i]);
+ System.out.println("PROX: AST : " + tree.toStringTree() + "\n");
+
+ // Visualize AST Tree:
+ /*
+ DOTTreeGenerator gen = new DOTTreeGenerator();
+ StringTemplate st = gen.toDOT(tree);
+ System.out.println("DOTTREE: " + st);
+ */
+ }
+
+ } // main
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opWF.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opWF.java
new file mode 100644
index 0000000..5400bec
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opWF.java
@@ -0,0 +1,126 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import java.io.*;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.DebugEventSocketProxy;
+import org.antlr.runtime.tree.*;
+
+/*
+ * parses prefixed and suffixed options of a search wordform.
+ * E.g. :fi:Hendrix:sa/-pe.
+ */
+
+public class c2ps_opWF
+
+{
+ /* Arguments:
+ * bStrip: true: 'input' contains "wort" -> strip " away -> wort.
+ * false: 'input' contains no " -> nothing to strip.
+ * bLem: true: input contains a Lemma; generates tree ^(OPLEM...).
+ * false: input contains a Wordform; generates tree ^(OPWF...).
+ * input: may be a single Lemma or Wform or a list of Wforms.
+ */
+
+ public static Tree check(String input, boolean bStrip, boolean bLem, int index)
+ {
+ if( bStrip )
+ input = input.substring(1, input.length()-1);
+
+ if( bLem && input.charAt(0) == '&' )
+ {
+ input = input.substring(1, input.length());
+ //System.out.println("Lemma: strip '&' -> " + input);
+ }
+
+ ANTLRStringStream
+ ss = new ANTLRStringStream(input);
+ c2ps_opWFLexer
+ lex = new c2ps_opWFLexer(ss);
+ CommonTokenStream tokens =
+ new CommonTokenStream(lex);
+ c2ps_opWFParser
+ g = new c2ps_opWFParser(tokens);
+ c2ps_opWFParser.searchWFs_return
+ c2PQWFReturn = null;
+ c2ps_opWFParser.searchLEM_return
+ c2PQLEMReturn = null;
+
+ /*
+ System.out.println("check opWF:" + index + ": " + input);
+ System.out.flush();
+ */
+
+ try
+ {
+ if( bLem )
+ c2PQLEMReturn = g.searchLEM();
+ else
+ c2PQWFReturn = g.searchWFs();
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+
+ // AST Tree anzeigen:
+ Tree tree = bLem ? (Tree)c2PQLEMReturn.getTree() : (Tree)c2PQWFReturn.getTree();
+ // System.out.println(bLem? "opLEM: " : "opWF: " + tree.toStringTree() );
+
+ return tree;
+ }
+
+ /* Wordform Encoding, e.g. to insert a Wordform into an AST.
+ * a) wf -> "wf".
+ * b) remove escape char before ':': abc\: -> abc:.
+ * Returns a Tree.
+ */
+ public static Tree encode(String wf, int tokenType)
+
+ {
+ // b)
+ StringBuffer
+ sbWF = new StringBuffer(wf);
+
+ for(int i=0; i<sbWF.length()-1; i++)
+ {
+ if( sbWF.charAt(i) == '\\' && sbWF.charAt(i+1) == ':' )
+ sbWF.deleteCharAt(i);
+ }
+
+ return new CommonTree(new CommonToken(tokenType, "\"" + sbWF.toString() + "\""));
+ }
+
+ /*
+ * main testprogram:
+ */
+
+ public static void main(String args[]) throws Exception
+ {
+ String[]
+ input = {":fi:Hendrix:sa", ":FiOlDs:été:sa", "&Gitarre", "&Gitarre:sa/-pe",
+ " \"Institut für \\:Deutsche\\: Sprache\" ",
+ ":Fi:der:-sa Wilde:-se Western:/se" };
+ Tree
+ tree;
+ boolean
+ bLem;
+
+ System.out.println("Tests von WF und Lemma-Optionen:\n");
+
+ for(int i=0; i<input.length; i++)
+ {
+ bLem = input[i].charAt(0) == '&' ? true : false;
+
+ System.out.println(bLem? "LEM: " : "WF: " + "input: " + input[i]);
+
+ if( bLem )
+ tree = check(input[i], false, true, 0); // bStrip=false, bLem=true;
+ else
+ tree = check(input[i], false, false, 0); // bStrip=false, bLem=false.
+
+ System.out.println(bLem? "LEM: " : "WF: " + "AST : " + tree.toStringTree() + "\n");
+ }
+
+ } // main
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_optCase.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_optCase.java
new file mode 100644
index 0000000..8f1ad28
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_optCase.java
@@ -0,0 +1,68 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import java.io.*;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.DebugEventSocketProxy;
+import org.antlr.runtime.tree.*;
+
+// parses Case Options.
+
+public class c2ps_optCase
+
+{
+
+ public static Tree check(String input, int index)
+ {
+ ANTLRStringStream
+ ss = new ANTLRStringStream(input);
+ c2ps_optCaseLexer
+ lex = new c2ps_optCaseLexer(ss);
+ CommonTokenStream tokens =
+ new CommonTokenStream(lex);
+ c2ps_optCaseParser
+ g = new c2ps_optCaseParser(tokens);
+ c2ps_optCaseParser.optCase_return
+ c2PQReturn = null;
+
+ /*
+ System.out.println("check optCase: " + index + ": " + input);
+ System.out.flush();
+ */
+
+ try
+ {
+ c2PQReturn = g.optCase();
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+
+ // AST Tree anzeigen:
+ Tree tree = (Tree)c2PQReturn.getTree();
+ //System.out.println("Case Opts: " + tree.toStringTree() );
+
+ return tree;
+ }
+
+ /*
+ * Main Text programm.
+ *
+ */
+
+ public static void main(String args[]) throws Exception
+ {
+ String[]
+ input = {"Fi", "FiOsDi"};
+ Tree
+ tree;
+
+ for(int i=0; i<input.length; i++)
+ {
+ tree = check(input[i], 0);
+ System.out.println("Parsing input: " + input[i] + ": " + tree.toStringTree());
+ }
+
+ } // main
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
index 624e42c..8232617 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
@@ -8,11 +8,10 @@
import java.util.Map;
import org.antlr.v4.runtime.tree.ParseTree;
-import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
public abstract class AbstractSyntaxTree {
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/AqlTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/AqlTree.java
index 57c958f..58e5a16 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/AqlTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/AqlTree.java
@@ -20,10 +20,10 @@
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
-import de.ids_mannheim.korap.query.annis.AqlLexer;
-import de.ids_mannheim.korap.query.annis.AqlParser;
+import de.ids_mannheim.korap.query.parse.annis.AqlLexer;
+import de.ids_mannheim.korap.query.parse.annis.AqlParser;
import de.ids_mannheim.korap.query.serialize.util.Antlr4DescriptiveErrorListener;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
/**
* Map representation of ANNIS QL syntax tree as returned by ANTLR
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CQLTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CQLTree.java
index a9c1d2a..15e4f0e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CQLTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CQLTree.java
@@ -1,6 +1,6 @@
package de.ids_mannheim.korap.query.serialize;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
import org.z3950.zing.cql.*;
import java.io.IOException;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder.java
index 3dfcb12..96b94a0 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder.java
@@ -4,6 +4,7 @@
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multiset;
+
import de.ids_mannheim.korap.resource.Relation;
import de.ids_mannheim.korap.utils.JsonUtils;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder2.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder2.java
index 5f37988..333fd30 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder2.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder2.java
@@ -1,7 +1,7 @@
package de.ids_mannheim.korap.query.serialize;
import com.fasterxml.jackson.databind.JsonNode;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
import de.ids_mannheim.korap.utils.JsonUtils;
import java.io.IOException;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder3.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder3.java
index 786ab94..f3a1bbd 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder3.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryBuilder3.java
@@ -1,6 +1,6 @@
package de.ids_mannheim.korap.query.serialize;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
import de.ids_mannheim.korap.utils.JsonUtils;
import java.io.IOException;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryTree.java
index 71a96fc..9909d25 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryTree.java
@@ -1,10 +1,10 @@
package de.ids_mannheim.korap.query.serialize;
+import de.ids_mannheim.korap.query.parse.collection.CollectionQueryLexer;
+import de.ids_mannheim.korap.query.parse.collection.CollectionQueryParser;
import de.ids_mannheim.korap.query.serialize.util.Antlr4DescriptiveErrorListener;
-import de.ids_mannheim.korap.query.serialize.util.CollectionQueryLexer;
-import de.ids_mannheim.korap.query.serialize.util.CollectionQueryParser;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.*;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
index c83dff7..162b17c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
@@ -1,11 +1,11 @@
package de.ids_mannheim.korap.query.serialize;
-import de.ids_mannheim.korap.query.cosmas2.c2psLexer;
-import de.ids_mannheim.korap.query.cosmas2.c2psParser;
+import de.ids_mannheim.korap.query.parse.cosmas.c2psLexer;
+import de.ids_mannheim.korap.query.parse.cosmas.c2psParser;
import de.ids_mannheim.korap.query.serialize.util.Antlr3DescriptiveErrorListener;
import de.ids_mannheim.korap.query.serialize.util.ResourceMapper;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.RecognitionException;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
index 1429d9b..0c1da76 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
@@ -1,10 +1,10 @@
package de.ids_mannheim.korap.query.serialize;
-import de.ids_mannheim.korap.query.poliqarp.PoliqarpPlusLexer;
-import de.ids_mannheim.korap.query.poliqarp.PoliqarpPlusParser;
+import de.ids_mannheim.korap.query.parse.poliqarpplus.PoliqarpPlusLexer;
+import de.ids_mannheim.korap.query.parse.poliqarpplus.PoliqarpPlusParser;
import de.ids_mannheim.korap.query.serialize.util.Antlr4DescriptiveErrorListener;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.ParseTree;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
deleted file mode 100644
index 614fcdd..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
+++ /dev/null
@@ -1,468 +0,0 @@
-package de.ids_mannheim.korap.query.serialize;
-
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.antlr.v4.runtime.ANTLRInputStream;
-import org.antlr.v4.runtime.BailErrorStrategy;
-import org.antlr.v4.runtime.CharStream;
-import org.antlr.v4.runtime.CommonTokenStream;
-import org.antlr.v4.runtime.Lexer;
-import org.antlr.v4.runtime.Parser;
-import org.antlr.v4.runtime.ParserRuleContext;
-import org.antlr.v4.runtime.tree.ParseTree;
-import org.slf4j.LoggerFactory;
-
-import de.ids_mannheim.korap.query.poliqarp.PoliqarpPlusLexer;
-import de.ids_mannheim.korap.query.poliqarp.PoliqarpPlusParser;
-import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
-
-/**
- * Map representation of Poliqarp syntax tree as returned by ANTLR
- * @author joachim
- *
- */
-public class PoliqarpTree extends Antlr4AbstractSyntaxTree {
- private static org.slf4j.Logger log = LoggerFactory
- .getLogger(PoliqarpTree.class);
- /**
- * Top-level map representing the whole request.
- */
- LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
- /**
- * Keeps track of open node categories
- */
- LinkedList<String> openNodeCats = new LinkedList<String>();
- /**
- * Flag that indicates whether token fields or meta fields are currently being processed
- */
- boolean inMeta = false;
- /**
- * Parser object deriving the ANTLR parse tree.
- */
- static Parser poliqarpParser;
- /**
- * Keeps track of all visited nodes in a tree
- */
- List<ParseTree> visited = new ArrayList<ParseTree>();
-
- /**
- * Keeps track of active fields (like 'base=foo').
- */
- LinkedList<ArrayList<Object>> fieldStack = new LinkedList<ArrayList<Object>>();
- /**
- * Keeps track of active sequences.
- */
- LinkedList<LinkedHashMap<String,Object>> sequenceStack = new LinkedList<LinkedHashMap<String,Object>>();
- /**
- * Keeps track of active tokens.
- */
- LinkedList<LinkedHashMap<String,Object>> tokenStack = new LinkedList<LinkedHashMap<String,Object>>();
- /**
- * Keeps track of sequence/token/field groups.
- */
- LinkedList<ArrayList<Object>> groupStack = new LinkedList<ArrayList<Object>>();
- /**
- * Marks the currently active object (sequence/token/group...) in order to know where to add stuff like occurrence info etc.
- */
- LinkedHashMap<String,Object> curObject = new LinkedHashMap<String,Object>();
- /**
- * Marks the currently active token in order to know where to add flags (might already have been taken away from token stack).
- */
- LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>();
-
- /**
- *
- * @param tree The syntax tree as returned by ANTLR
- * @param parser The ANTLR parser instance that generated the parse tree
- */
- public PoliqarpTree(String query) {
- prepareContext();
- process(query);
- System.out.println(">>> "+requestMap+" <<<");
- }
-
- private void prepareContext() {
- LinkedHashMap<String,Object> context = new LinkedHashMap<String,Object>();
- LinkedHashMap<String,Object> operands = new LinkedHashMap<String,Object>();
- LinkedHashMap<String,Object> relation = new LinkedHashMap<String,Object>();
- LinkedHashMap<String,Object> classMap = new LinkedHashMap<String,Object>();
-
- operands.put("@id", "korap:operands");
- operands.put("@container", "@list");
-
- relation.put("@id", "korap:relation");
- relation.put("@type", "korap:relation#types");
-
- classMap.put("@id", "korap:class");
- classMap.put("@type", "xsd:integer");
-
- context.put("korap", "http://korap.ids-mannheim.de/ns/query");
- context.put("@language", "de");
- context.put("operands", operands);
- context.put("relation", relation);
- context.put("class", classMap);
- context.put("query", "korap:query");
- context.put("filter", "korap:filter");
- context.put("meta", "korap:meta");
-
- requestMap.put("@context", context);
- }
-
- @Override
- public Map<String, Object> getRequestMap() {
- return requestMap;
- }
-
- @Override
- public void process(String query) {
- ParseTree tree = parsePoliqarpQuery(query);
- log.info("Processing Poliqarp query.");
- System.out.println("Processing Poliqarp");
- processNode(tree);
- log.info(requestMap.toString());
- }
-
- @SuppressWarnings("unchecked")
- private void processNode(ParseTree node) {
- // Top-down processing
- if (visited.contains(node)) return;
- else visited.add(node);
-
- String nodeCat = getNodeCat(node);
- openNodeCats.push(nodeCat);
-
-// System.out.println(openNodeCats);
-
- /*
- ****************************************************************
- ****************************************************************
- * Processing individual node categories *
- ****************************************************************
- ****************************************************************
- */
- if (nodeCat.equals("query")) {
- }
-
- // cq_segments/sq_segments: token group
- if (nodeCat.equals("cq_segments") || nodeCat.equals("sq_segments")) {
- // disregard empty segments in simple queries (parsed by ANTLR as empty cq_segments)
- if (node.getChildCount() > 0 && !node.getChild(0).toStringTree(poliqarpParser).equals(" ")) {
- LinkedHashMap<String,Object> sequence = new LinkedHashMap<String,Object>();
- curObject = sequence;
- // Step I: decide type of element (one or more elements? -> token or sequence)
- if (node.getChildCount()>1) {
- sequence.put("@type", "korap:sequence");
- ArrayList<Object> sequenceOperands = new ArrayList<Object>();
- sequence.put("operands", sequenceOperands);
- } else {
- // if only child, make the sequence a mere korap:token
- sequence.put("@type", "korap:token");
- tokenStack.push(sequence);
- }
- // Step II: decide where to put this element (top query node or embedded in super sequence?)
- if (openNodeCats.get(1).equals("query")) {
- requestMap.put("query", sequence);
- } else if (!groupStack.isEmpty()) {
- groupStack.getFirst().add(sequence);
- } else {
- ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
- topSequenceOperands.add(sequence);
- }
- sequenceStack.push(sequence);
- }
- }
-
- // cq_segment
- if (nodeCat.equals("cq_segment")) {
- // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
- LinkedHashMap<String, Object> token;
- if (tokenStack.isEmpty()) {
- token = new LinkedHashMap<String, Object>();
- tokenStack.push(token);
- } else {
- // in case cq_segments has already added the token
- token = tokenStack.getFirst();
- }
- curObject = token;
- curToken = token;
-
- // Step II: start filling object and add to containing sequence
- token.put("@type", "korap:token");
- // add token to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
- if (node.getParent().getChildCount()>1) {
- ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
- topSequenceOperands.add(token);
- }
- }
-
- // disjoint cq_segments, like ([base=foo][base=bar])|[base=foobar]
- if (nodeCat.equals("cq_disj_segments")) {
- LinkedHashMap<String,Object> disjunction = new LinkedHashMap<String,Object>();
- curObject = disjunction;
- ArrayList<Object> disjOperands = new ArrayList<Object>();
- disjunction.put("@type", "korap:group");
- disjunction.put("relation", "or");
- disjunction.put("operands", disjOperands);
- groupStack.push(disjOperands);
-
- // decide where to put the disjunction
- if (openNodeCats.get(1).equals("query")) {
- requestMap.put("query", disjunction);
- } else if (openNodeCats.get(1).equals("cq_segments")) {
- ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
- topSequenceOperands.add(disjunction);
- }
- }
-
- // field element (outside meta)
- if (nodeCat.equals("field")) {
- LinkedHashMap<String,Object> fieldMap = new LinkedHashMap<String,Object>();
-
- // Step I: extract info
- String featureName = node.getChild(0).getChild(0).toStringTree(poliqarpParser); //e.g. (field_name base) (field_op !=) (re_query "bar*")
- String relation = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
- String value = "";
- ParseTree valNode = node.getChild(2);
- String valType = getNodeCat(valNode);
- fieldMap.put("@type", "korap:term");
- if (valType.equals("simple_query")) {
- value = valNode.getChild(0).getChild(0).toStringTree(poliqarpParser); //e.g. (simple_query (sq_segment foo))
- } else if (valType.equals("re_query")) {
- value = valNode.getChild(0).toStringTree(poliqarpParser); //e.g. (re_query "bar*")
- fieldMap.put("@subtype", "korap:value#regex");
- }
- fieldMap.put("@value", featureName+":"+value);
- fieldMap.put("relation", relation);
-
- // Step II: decide where to put the field map (as the only value of a token or the meta filter or as a part of a group in case of coordinated fields)
- if (fieldStack.isEmpty()) {
- if (!inMeta) {
- tokenStack.getFirst().put("@value", fieldMap);
- } else {
- ((HashMap<String, Object>) requestMap.get("meta")).put("@value", fieldMap);
- }
- } else {
- fieldStack.getFirst().add(fieldMap);
- }
- visited.add(node.getChild(0));
- visited.add(node.getChild(1));
- visited.add(node.getChild(2));
- }
-
- // conj_field serves for both conjunctions and disjunctions
- if (nodeCat.equals("conj_field")) {
- LinkedHashMap<String,Object> group = new LinkedHashMap<String,Object>();
- ArrayList<Object> groupOperands = new ArrayList<Object>();
-
- group.put("@type", "korap:group");
- group.put("operands", groupOperands);
- fieldStack.push(groupOperands);
-
- // Step I: get operator (& or |)
- ParseTree operatorNode = node.getChild(1).getChild(0);
- String operator = getNodeCat(operatorNode);
- if (operator.equals("|")) {
- group.put("relation", "or");
- } else if (operator.equals("&")) {
- group.put("relation", "and");
- }
-
- // Step II: decide where to put the group (directly under token or in top meta filter section or embed in super group)
- if (openNodeCats.get(1).equals("cq_segment")) {
- tokenStack.getFirst().put("@value", group);
- } else if (openNodeCats.get(1).equals("meta_field_group")) {
- ((HashMap<String, Object>) requestMap.get("meta")).put("@value", group);
- } else {
- fieldStack.get(1).add(group);
- }
- // skip the operator
- visited.add(node.getChild(1));
- }
-
-
- if (nodeCat.equals("sq_segment")) {
- // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
- LinkedHashMap<String, Object> token;
- if (tokenStack.isEmpty()) {
- token = new LinkedHashMap<String, Object>();
- tokenStack.push(token);
- } else {
- // in case sq_segments has already added the token
- token = tokenStack.getFirst();
- }
- curObject = token;
- curToken = token;
- // Step II: fill object (token values) and put into containing sequence
- token.put("@type", "korap:token");
- String word = node.getChild(0).toStringTree(poliqarpParser);
- LinkedHashMap<String,Object> tokenValues = new LinkedHashMap<String,Object>();
- token.put("@value", tokenValues);
- tokenValues.put("orth", word);
- tokenValues.put("relation", "=");
-
- // add token to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the values from "field")
- if (node.getParent().getChildCount()>1) {
- ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
- topSequenceOperands.add(token);
- }
- }
-
- // repetition of token group
- if (nodeCat.equals("occ")) {
- ParseTree occChild = node.getChild(0);
- String repetition = occChild.toStringTree(poliqarpParser);
- curObject.put("repetition", repetition);
- visited.add(occChild);
- }
-
- // flags for case sensitivity and whole-word-matching
- if (nodeCat.equals("flag")) {
- String flag = getNodeCat(node.getChild(0)).substring(1); //substring removes leading slash '/'
- // add to current token's value
- ((HashMap<String, Object>) curToken.get("@value")).put("flag", flag);
- }
-
- if (nodeCat.equals("meta")) {
- inMeta=true;
- LinkedHashMap<String,Object> metaFilter = new LinkedHashMap<String,Object>();
- requestMap.put("meta", metaFilter);
- metaFilter.put("@type", "korap:meta");
- }
-
-
-
- if (nodeCat.equals("within")) {
- ParseTree domainNode = node.getChild(2);
- String domain = getNodeCat(domainNode);
-// queryOperands.add("within:"+domain);
- curObject.put("within", domain);
- visited.add(node.getChild(0));
- visited.add(node.getChild(1));
- visited.add(domainNode);
- }
-
- /*
- ****************************************************************
- ****************************************************************
- * recursion until 'request' node (root of tree) is processed *
- * **************************************************************
- ****************************************************************
- */
- for (int i=0; i<node.getChildCount(); i++) {
- ParseTree child = node.getChild(i);
- processNode(child);
- }
-
- // Stuff that happens when leaving a node (taking it off the stack)
- if (nodeCat.equals("cq_segments") || nodeCat.equals("sq_segments")) {
- // exclude whitespaces analysed as empty cq_segments
- if (node.getChildCount() > 0 && !getNodeCat(node.getChild(0)).equals(" ")) {
- sequenceStack.pop();
- }
- }
-
- if (nodeCat.equals("cq_disj_segments")) {
- groupStack.pop();
- }
-
- if (nodeCat.equals("cq_segment") || nodeCat.equals("sq_segment")){
- tokenStack.pop();
- }
-
- if (nodeCat.equals("conj_field")) {
- fieldStack.pop();
- }
-
- openNodeCats.pop();
-
- }
-
-// /**
-// * Returns the category (or 'label') of the root of a ParseTree.
-// * @param node
-// * @return
-// */
-// public String getNodeCat(ParseTree node) {
-// String nodeCat = node.toStringTree(poliqarpParser);
-// Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
-// Matcher m = p.matcher(node.toStringTree(poliqarpParser));
-// if (m.find()) {
-// nodeCat = m.group(1);
-// }
-// return nodeCat;
-// }
-
- private static ParserRuleContext parsePoliqarpQuery (String p) {
- Lexer poliqarpLexer = new PoliqarpPlusLexer((CharStream)null);
- ParserRuleContext tree = null;
- // Like p. 111
- try {
-
- // Tokenize input data
- ANTLRInputStream input = new ANTLRInputStream(p);
- poliqarpLexer.setInputStream(input);
- CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
- poliqarpParser = new PoliqarpPlusParser(tokens);
-
- // Don't throw out erroneous stuff
- poliqarpParser.setErrorHandler(new BailErrorStrategy());
- poliqarpParser.removeErrorListeners();
-
- // Get starting rule from parser
- Method startRule = PoliqarpPlusParser.class.getMethod("request");
- tree = (ParserRuleContext) startRule.invoke(poliqarpParser, (Object[])null);
- }
-
- // Some things went wrong ...
- catch (Exception e) {
- log.error(e.getMessage());
- System.err.println( e.getMessage() );
- }
-
- // Return the generated tree
- return tree;
- }
-
- public static void main(String[] args) {
- /*
- * For testing
- */
- String[] queries = new String[] {
-// "[base=foo]|([base=foo][base=bar])*",
-// "([base=foo]|[base=bar])[base=foobar]",
-// "[base=foo]([base=bar]|[base=foobar/i])",
-// "[base=bar|base=foo]",
-// "[base=bar]",
-// "[base=foo][base=bar]",
-// "[(base=bar|base=foo)&orth=wee]",
-// "[base=foo/i][base=bar]{2,4}",
-// "foo bar/i"
- "[base=foo] meta author=Goethe&year=1885",
- "[base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815",
- "[base=foo]*",
- };
- for (String q : queries) {
- try {
- System.out.println(q);
- System.out.println(PoliqarpTree.parsePoliqarpQuery(q).toStringTree(PoliqarpTree.poliqarpParser));
- @SuppressWarnings("unused")
- PoliqarpTree pt = new PoliqarpTree(q);
- System.out.println(PoliqarpTree.parsePoliqarpQuery(q).toStringTree(PoliqarpTree.poliqarpParser));
- System.out.println();
-
- } catch (NullPointerException npe) {
- npe.printStackTrace();
- System.out.println("null\n");
- }
- }
- }
-
-}
\ No newline at end of file
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
index 635b2c8..fb8104f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
@@ -4,7 +4,7 @@
import com.fasterxml.jackson.databind.JsonMappingException;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
import de.ids_mannheim.korap.utils.JsonUtils;
import de.ids_mannheim.korap.utils.KorAPLogger;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
index 2f5c237..537e578 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
@@ -1,9 +1,5 @@
package de.ids_mannheim.korap.query.serialize;
-import de.ids_mannheim.korap.util.QueryException;
-
-import org.apache.commons.lang.StringUtils;
-
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.HashMap;
@@ -11,7 +7,6 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import java.util.regex.Matcher;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java b/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java
index 7030f5c..d64e983 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java
@@ -15,9 +15,9 @@
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
-import de.ids_mannheim.korap.query.annis.AqlLexer;
-import de.ids_mannheim.korap.query.annis.AqlParser;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.parse.annis.AqlLexer;
+import de.ids_mannheim.korap.query.parse.annis.AqlParser;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
/**
* Map representation of syntax tree as returned by ANTLR
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java b/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java
index 2a0fb63..6ae78b9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java
@@ -5,7 +5,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import de.ids_mannheim.korap.query.cosmas2.IErrorReporter;
+import de.ids_mannheim.korap.query.parse.cosmas.IErrorReporter;
import de.ids_mannheim.korap.query.serialize.QueryUtils;
/**
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/util/QueryException.java b/src/main/java/de/ids_mannheim/korap/query/serialize/util/QueryException.java
new file mode 100644
index 0000000..ff63f79
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/util/QueryException.java
@@ -0,0 +1,36 @@
+package de.ids_mannheim.korap.query.serialize.util;
+
+public class QueryException extends Exception {
+
+ int errorCode;
+
+ public QueryException() {
+ super();
+ }
+
+ public QueryException(String message) {
+ super(message);
+ }
+
+ public QueryException(String message, Throwable cause) {
+ super(message, cause);
+ };
+
+ public QueryException(Throwable cause) {
+ super(cause);
+ };
+
+ public QueryException(int code, String message) {
+ super(message);
+ setErrorCode(code);
+ }
+
+ public int getErrorCode() {
+ return errorCode;
+ }
+
+ public void setErrorCode(int errorCode) {
+ this.errorCode = errorCode;
+ }
+
+};
diff --git a/src/test/java/AqlTreeTest.java b/src/test/java/AqlTreeTest.java
index a65d2b1..77d9ec5 100644
--- a/src/test/java/AqlTreeTest.java
+++ b/src/test/java/AqlTreeTest.java
@@ -10,7 +10,7 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import de.ids_mannheim.korap.query.serialize.QuerySerializer;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
/**
* Tests for JSON-LD serialization of ANNIS QL queries.
diff --git a/src/test/java/CQLTest.java b/src/test/java/CQLTest.java
index 0018103..abe8703 100644
--- a/src/test/java/CQLTest.java
+++ b/src/test/java/CQLTest.java
@@ -10,7 +10,7 @@
import de.ids_mannheim.korap.query.serialize.CQLTree;
import de.ids_mannheim.korap.query.serialize.CosmasTree;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
public class CQLTest {
diff --git a/src/test/java/CollectionQueryTreeTest.java b/src/test/java/CollectionQueryTreeTest.java
index ee7b554..a2be38b 100644
--- a/src/test/java/CollectionQueryTreeTest.java
+++ b/src/test/java/CollectionQueryTreeTest.java
@@ -4,7 +4,7 @@
import java.util.ArrayList;
import de.ids_mannheim.korap.query.serialize.QuerySerializer;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
import org.junit.Test;
@@ -108,7 +108,7 @@
assertEquals("2014", res.at("/collection/operands/1/value").asText());
assertEquals(true, res.at("/collection/operands/1/type").isMissingNode());
assertEquals("match:eq", res.at("/collection/operands/1/match").asText());
- assertEquals("", res.at("/warnings/0/"));
+ assertTrue(res.at("/warnings/0/0").asText().startsWith("The collection query contains a value that looks like a date"));
}
@Test
diff --git a/src/test/java/CosmasTreeTest.java b/src/test/java/CosmasTreeTest.java
index 03ab986..62a663a 100644
--- a/src/test/java/CosmasTreeTest.java
+++ b/src/test/java/CosmasTreeTest.java
@@ -6,7 +6,7 @@
import org.junit.Test;
import de.ids_mannheim.korap.query.serialize.QuerySerializer;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
diff --git a/src/test/java/MetaQuerySerializationTest.java b/src/test/java/MetaQuerySerializationTest.java
index 93cca72..acde5a6 100644
--- a/src/test/java/MetaQuerySerializationTest.java
+++ b/src/test/java/MetaQuerySerializationTest.java
@@ -98,39 +98,39 @@
System.out.println("THE RESULTING QUERY: " + s.toJSON());
}
- // @Test
- public void testGenerator() throws QueryException {
- /*
- * just for testing...
- */
- QuerySerializer jg = new QuerySerializer();
- int i = 0;
- String[] queries;
- queries = new String[]{
- "shrink({[base=foo]})",
- "shrink({[base=foo]}[orth=bar])",
- "shrink(1:[base=Der]{1:[base=Mann]})",
- };
-
- for (String q : queries) {
- i++;
- try {
- System.out.println(q);
- jg.run(q, "poliqarp", System.getProperty("user.home") + "/bsp" + i + ".json");
- System.out.println();
- } catch (NullPointerException npe) {
- npe.printStackTrace();
- System.out.println("null\n");
- System.out.println();
- } catch (JsonGenerationException e) {
- e.printStackTrace();
- } catch (JsonMappingException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
+// // @Test
+// public void testGenerator() throws QueryException {
+// /*
+// * just for testing...
+// */
+// QuerySerializer jg = new QuerySerializer();
+// int i = 0;
+// String[] queries;
+// queries = new String[]{
+// "shrink({[base=foo]})",
+// "shrink({[base=foo]}[orth=bar])",
+// "shrink(1:[base=Der]{1:[base=Mann]})",
+// };
+//
+// for (String q : queries) {
+// i++;
+// try {
+// System.out.println(q);
+// jg.run(q, "poliqarp", System.getProperty("user.home") + "/bsp" + i + ".json");
+// System.out.println();
+// } catch (NullPointerException npe) {
+// npe.printStackTrace();
+// System.out.println("null\n");
+// System.out.println();
+// } catch (JsonGenerationException e) {
+// e.printStackTrace();
+// } catch (JsonMappingException e) {
+// e.printStackTrace();
+// } catch (IOException e) {
+// e.printStackTrace();
+// }
+// }
+// }
@Test
public void testLists() {
diff --git a/src/test/java/PoliqarpPlusTreeTest.java b/src/test/java/PoliqarpPlusTreeTest.java
index 3dc2429..06ac536 100644
--- a/src/test/java/PoliqarpPlusTreeTest.java
+++ b/src/test/java/PoliqarpPlusTreeTest.java
@@ -12,7 +12,7 @@
import de.ids_mannheim.korap.query.serialize.QuerySerializer;
-import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.query.serialize.util.QueryException;
/**
* Tests for JSON-LD serialization of PoliqarpPlus queries.