opProx feature (Cosmas 2)
Squashed commit consisting of
- verbosity can be switched on/off on command line.
- Prox: parsing %-w1 and %+w1 correctly.
- opPROX: correcting order of Prox options: WIP.
- beliebige Reihenfolge der Abstands-Optionen: WIP.
- Prox: beliebige Reihenfolge der Optionen: OK.
- Prox: beliebige Reihenfolge der Optionen: OK.
- opPROX: grammar should accept any order of prox. options: WIP.
- PROX: return exact error messasge about prox options.
- PROX: emit a meaningfull error message: wip.
- PROX: emit a meaningfull error message: WIP.
- write parsing error to AST.
- trying to write error message into an error node of the AST.
- PROX: Fehlermeldung in KoralQuery schreiben funktioniert.
- Prox...
- Error detection inside Prox done. Returning a precise error message through JSON: done.
- using addError() for error messages in PROX: WIP.
- Prox: reporting exact error messages: works.
- PROX: Tests with RecognitionExceptions removed. All Error Codes in StatusCodes.java.
- Prox: error messages for wrong prox. options.
- Prox: debug output deactivated.
- Prox: deleted debug output.
- Prox: Test added: WiP.
- Prox: 1 working tests added.
- Prox: 3 more tests added.
Change-Id: I8802becaf840660a1512281b3477762a422f8b4f
diff --git a/src/main/antlr/cosmas/c2ps.g b/src/main/antlr/cosmas/c2ps.g
index 2a63bb7..269f27f 100644
--- a/src/main/antlr/cosmas/c2ps.g
+++ b/src/main/antlr/cosmas/c2ps.g
@@ -74,14 +74,37 @@
fragment DISTVALUE
: ('0' .. '9')+ (':' ('0'..'9')+)? ;
-
+
+fragment DISTTYPE // 30.11.23/FB
+ : ('w'|'s'|'p'|'t');
+
+fragment DISTDIR // 30.11.23/FB
+ : ('+'|'-');
+
+/* old version (before 30.11.23/FB)
fragment DIST
: ('+'|'-')? (DISTVALUE ('w'|'s'|'p'|'t') | ('w'|'s'|'p'|'t') DISTVALUE);
-
+*/
+
+// accept these 3 options in any order.
+// afterwards, we will have to check if any of them is missing.
+// 30.11.23/FB
+
+fragment DIST // 30.11.23/FB
+ : (DISTDIR | DISTTYPE | DISTVALUE )+;
+
fragment GROUP
: ('min' | 'max');
-OP_PROX : ('/' | '%') DIST (',' DIST)* (',' GROUP)? ;
+// version (12.01.24/FB):
+// accept correct and incorrect chars till the next blank, that way the incorrect chars
+// are submitted to the sub-grammer c2ps_opPROX where they are detected and an appropriate
+// error message is inserted:
+OP_PROX : ('/' | '%') DIST (~' ')*;
+
+// old version: accepts only correctly formulated options, so the incorrect
+// chars/options are hard to detect:
+// OP_PROX : ('/' | '%') DIST (',' DIST)* (',' GROUP)? ;
OP_IN : '#IN' | '#IN(' OP_IN_OPTS? ')' ;
@@ -260,7 +283,7 @@
op2 : (opPROX | opIN | opOV | opAND | opOR | opNOT) ;
// AST with Options for opPROX is returned by c2ps_opPROX.check():
-opPROX : OP_PROX -> ^(OPPROX {c2ps_opPROX.check($OP_PROX.text, $OP_PROX.index)} );
+opPROX : OP_PROX -> ^(OPPROX {c2ps_opPROX.check($OP_PROX.text, $OP_PROX.pos)} );
opIN : OP_IN -> {c2ps_opIN.check($OP_IN.text, $OP_IN.index)};
@@ -295,4 +318,3 @@
opALL : ( '#ALL(' | '#EXKLUSIVE(' ) searchExpr ')' -> ^(OPALL searchExpr) ;
opREG : OP_REG -> ^(OPREG {c2ps_opREG.encode($OP_REG.text, OPREG)}) ;
-
diff --git a/src/main/antlr/cosmas/c2ps_opPROX.g b/src/main/antlr/cosmas/c2ps_opPROX.g
index f7a42f5..1569d1a 100644
--- a/src/main/antlr/cosmas/c2ps_opPROX.g
+++ b/src/main/antlr/cosmas/c2ps_opPROX.g
@@ -1,9 +1,10 @@
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
-// //
-// lokale Grammatik der COSMAS II zeilenorientierten Suchanfragesprache (= c2ps) //
-// für den Abstandsoperator /w... und %w... //
-// v-1.0 - 07.12.12/FB //
-// //
+//
+// lokale Grammatik der COSMAS II zeilenorientierten Suchanfragesprache (= c2ps)
+// für den Abstandsoperator /w... und %w...
+// v-1.0 - 07.12.12/FB
+// v-1.1 - 30.11.23/FB opPROX accepts any order of direction, measure and value.
+//
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
grammar c2ps_opPROX;
@@ -15,10 +16,15 @@
DIST_LIST; DIST; RANGE; VAL0;
MEAS; // measure
DIR; PLUS; MINUS; BOTH;
- GRP; MIN; MAX; }
-@header {package de.ids_mannheim.korap.query.parse.cosmas;}
+ GRP; MIN; MAX;
+ }
+
+@header {package de.ids_mannheim.korap.query.parse.cosmas;
+ import de.ids_mannheim.korap.util.C2RecognitionException;}
+
@lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;}
+
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
//
// PROX-Lexer
@@ -28,6 +34,12 @@
DISTVALUE
: ('0' .. '9')+ ;
+// trying to catch everything (at the end of the option sequence) that should not appear inside the prox. options:
+// e.g. /w5umin -> remain = 'umin'.
+
+PROX_REMAIN
+ : (',')? ('b'..'h'|'j'..'l'|'n'|'o'|'q'|'r'|'u'|'v'|'y'|'z'|'B'..'H'|'J'..'L'|'N'|'O'|'Q'|'R'|'U'|'V'|'Y'|'Z') (~ ' ')* ;
+
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
//
// PROX-Parser
@@ -35,36 +47,49 @@
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
-opPROX : proxTyp proxDist (',' proxDist)* (',' proxGroup)?
+opPROX[int pos] : proxTyp proxDist[$pos] (',' proxDist[$pos])* (',' proxGroup)? (proxRemain[$pos])?
- -> ^(PROX_OPTS {$proxTyp.tree} ^(DIST_LIST proxDist+) {$proxGroup.tree});
+ -> ^(PROX_OPTS {$proxTyp.tree} ^(DIST_LIST proxDist+) {$proxGroup.tree} {$proxRemain.tree});
-proxTyp : '/' -> ^(TYP PROX) // klassischer Abstand.
- | '%' -> ^(TYP EXCL); // ausschließender Abstand.
+proxRemain[int pos] : PROX_REMAIN
+
+ -> { c2ps_opPROX.checkRemain(DIST, $PROX_REMAIN.text, $pos) };
+
+proxTyp : '/' -> ^(TYP PROX) // klassischer Abstand.
+ | '%' -> ^(TYP EXCL); // ausschließender Abstand.
// proxDist: e.g. +5w or -s0 or /w2:4 etc.
// kein proxDirection? hier, weil der Default erst innerhalb von Regel proxDirection erzeugt werden kann.
-proxDist: proxDirection (v1=proxDistValue m1=proxMeasure | m2=proxMeasure v2=proxDistValue)
- -> {$v1.tree != null}? ^(DIST {$proxDirection.tree} {$v1.tree} {$m1.tree})
- -> ^(DIST {$proxDirection.tree} {$v2.tree} {$m2.tree});
+// new rule: accepts options in any order:
+// count each option type and find out if any one is missing or occures multiple times.
+// 28.11.23/FB
+
+proxDist[int pos]
+@init{ int countM=0; int countD=0; int countV=0;}
+ :
+ ((m=proxMeasure {countM++;})|(d=proxDirection {countD++;})|(v=proxDistValue {countV++;}) )+
+
+ -> {c2ps_opPROX.encodeDIST(DIST, DIR, $d.tree, $m.tree, $v.tree, $proxDist.text, countD, countM, countV, $pos)};
+
+
+// new rule accepts only '+' and '-'; default tree for direction is
+// set in c2ps_opPROX.encodeDIST() now.
+// 28.11.23/FB
proxDirection
- : (p='+'|m='-')? -> {$p != null}? ^(DIR PLUS)
- -> {$m != null}? ^(DIR MINUS)
- -> ^(DIR BOTH) ;
-/*
-proxDistValue // proxDistMin ( ':' proxDistMax)? ;
- : (m1=proxDistMin -> ^(DIST_RANGE VAL0 $m1)) (':' m2=proxDistMax -> ^(DIST_RANGE $m1 $m2))? ;
-*/
-proxDistValue // proxDistMin ( ':' proxDistMax)? ;
- : (m1=proxDistMin ) (':' m2=proxDistMax)?
+ : '+' -> ^(DIR PLUS)
+ | '-' -> ^(DIR MINUS);
+
+proxDistValue : (m1=proxDistMin ) (':' m2=proxDistMax)?
-> {$m2.text != null}? ^(RANGE $m1 $m2)
- -> ^(RANGE VAL0 $m1);
-
+ -> ^(RANGE VAL0 $m1);
+
+// mentioning >1 measures will be checked/rejected in c2ps_opPROX.encodeDIST().
+
proxMeasure
- : (m='w'|m='s'|m='p'|m='t') -> ^(MEAS $m);
+ : (meas='w'|meas='s'|meas='p'|meas='t') -> ^(MEAS $meas) ;
proxDistMin
: DISTVALUE;
@@ -73,6 +98,8 @@
: DISTVALUE;
proxGroup
- : 'min' -> ^(GRP MIN)
- | 'max' -> ^(GRP MAX);
+ : ('min'|'MIN') -> ^(GRP MIN)
+ | ('max'|'MAX') -> ^(GRP MAX);
+
+
\ No newline at end of file
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
index 2a5b163..6229719 100644
--- a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
@@ -3,29 +3,177 @@
import org.antlr.runtime.*;
import org.antlr.runtime.tree.*;
+import de.ids_mannheim.korap.query.serialize.Antlr3AbstractQueryProcessor;
+import de.ids_mannheim.korap.query.serialize.util.Antlr3DescriptiveErrorListener;
+import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
+import de.ids_mannheim.korap.util.*;
+
/*
* parses Opts of PROX: /w3:4,s0,min or %w3:4,s0,min.
*/
-public class c2ps_opPROX
+public class c2ps_opPROX
{
+ final static boolean bDebug = false;
+
+ // type of an Error CommonToken:
+ final static int typeERROR = 1;
+ // Prox error codes defined in StatusCodes.java.
+
+ private static CommonTree buildErrorTree(String text, int errCode, int typeDIST, int pos)
+
+ {
+ CommonTree
+ errorTree = new CommonTree(new CommonToken(typeDIST, "DIST"));
+ CommonTree
+ errorNode = new CommonTree(new CommonToken(typeERROR, "ERROR"));
+ CommonTree
+ errorPos = new CommonTree(new CommonToken(typeERROR, String.valueOf(pos)));
+ CommonTree
+ errorCode = new CommonTree(new CommonToken(typeERROR, String.valueOf(errCode)));
+ CommonTree
+ errorMes;
+ String
+ mess;
+
+ switch( errCode )
+ {
+ case StatusCodes.ERR_PROX_MEAS_NULL:
+ mess = String.format("Abstandsoperator an der Stelle '%s' es fehlt eine der folgenden Angaben: w,s,p!", text);
+ errorMes = new CommonTree(new CommonToken(typeERROR, mess));
+ break;
+ case StatusCodes.ERR_PROX_MEAS_TOOGREAT:
+ mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 der folgenden Angaben einsetzen: w,s,p! " +
+ "Falls Mehrfachangabe erwünscht, müssen diese durch Kommata getrennt werden (z.B.: /+w2,s0).", text);
+ errorMes = new CommonTree(new CommonToken(typeERROR, mess));
+ break;
+ case StatusCodes.ERR_PROX_VAL_NULL:
+ mess = String.format("Abstandsoperator an der Stelle '%s': Bitte einen numerischen Wert einsetzen (z.B. /+w5)! ", text);
+ errorMes = new CommonTree(new CommonToken(typeERROR, mess));
+ break;
+ case StatusCodes.ERR_PROX_VAL_TOOGREAT:
+ mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 numerischen Wert einsetzen (z.B. /+w5)! ", text);
+ errorMes = new CommonTree(new CommonToken(typeERROR, mess));
+ break;
+ case StatusCodes.ERR_PROX_DIR_TOOGREAT:
+ mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 Angabe '+' oder '-' oder keine! ", text);
+ errorMes = new CommonTree(new CommonToken(typeERROR, mess));
+ break;
+ case StatusCodes.ERR_PROX_WRONG_CHARS:
+ mess = String.format("Abstandsoperator an der Stelle '%s': unbekannte Abstandsoption(en)!", text);
+ errorMes = new CommonTree(new CommonToken(typeERROR, mess));
+ break;
+ default:
+ mess = String.format("Abstandsoperator an der Stelle '%s': unbekannter Fehler. Korrekte Syntax z.B.: /+w2 oder /w10,s0.", text);
- public static Tree check (String input, int index) {
+ errorMes = new CommonTree(new CommonToken(typeERROR, mess));
+ }
+
+ errorTree.addChild(errorNode);
+ errorNode.addChild(errorPos);
+ errorNode.addChild(errorCode);
+ errorNode.addChild(errorMes);
+
+ return errorTree;
+ }
+
+ /* encodeDIST():
+ * - returns a CommonTree built from the Direction/Measure/Distance value.
+ * - accepts options in any order.
+ * - creates CommonTree in that order: Direction .. Distance value .. Measure.
+ * - sets default direction to BOTH if not set yet.
+ * - unfortunately, in ANTLR3 it seems that there is no way inside the Parser Grammar to get
+ * the absolute token position from the beginning of the query. Something like $ProxDist.pos or
+ * $start.pos is not available, so we have no info in this function about the position at which
+ * an error occurs.
+ * - For multiple prox options, e.g. /w2,s2,p0, this function if called 3 times.
+ * Arguments:
+ * countD : how many occurences of distance: + or - or nothing. If 0 insert the default BOTH.
+ * countM : how many occurences of measure: w,s,p,t: should be 1.
+ * countV : how many occurences of distance value: should be 1.
+ * 28.11.23/FB
+ */
+
+ public static Object encodeDIST(int typeDIST, int typeDIR, Object ctDir, Object ctMeas, Object ctVal, String text,
+ int countD, int countM, int countV, int pos)
+
+ {
+ CommonTree tree1 = (CommonTree)ctDir;
+ CommonTree tree2 = (CommonTree)ctMeas;
+ CommonTree tree3 = (CommonTree)ctVal;
+
+ if( bDebug )
+ System.err.printf("Debug: encodeDIST: scanned input='%s' countM=%d countD=%d countV=%d pos=%d.\n",
+ text, countM, countD, countV, pos);
+
+ if( countM == 0 )
+ return buildErrorTree(text, StatusCodes.ERR_PROX_MEAS_NULL, typeDIST, pos);
+ if( countM > 1 )
+ return buildErrorTree(text, StatusCodes.ERR_PROX_MEAS_TOOGREAT, typeDIST, pos);
+ if( countV == 0 )
+ return buildErrorTree(text, StatusCodes.ERR_PROX_VAL_NULL, typeDIST, pos);
+ if( countV > 1 )
+ return buildErrorTree(text, StatusCodes.ERR_PROX_VAL_TOOGREAT, typeDIST, pos);
+
+ if( countD == 0 )
+ {
+ // if direction is not specified (ctDir == null or countD==0), return default = BOTH:
+ CommonTree treeDIR = new CommonTree(new CommonToken(typeDIR, (String)"DIR"));
+ CommonTree treeBOTH = new CommonTree(new CommonToken(typeDIR, "BOTH"));
+ treeDIR.addChild(treeBOTH);
+
+ if( bDebug )
+ System.err.printf("Debug: encodeDIST: tree for DIR: '%s'.\n", treeDIR.toStringTree());
+ tree1 = treeDIR;
+ }
+ else if( countD > 1 )
+ return buildErrorTree(text, StatusCodes.ERR_PROX_DIR_TOOGREAT, typeDIST, pos);
+
+ // create DIST tree:
+ CommonTree
+ tree = new CommonTree(new CommonToken(typeDIST, "DIST"));
+
+ tree.addChild(tree1);
+ tree.addChild(tree3); // tree3 before tree2 expected by serialization.
+ tree.addChild(tree2);
+
+ if( bDebug )
+ System.err.printf("Debug: encodeDIST: returning '%s'.\n", tree.toStringTree());
+
+ return tree;
+ } // encodeDIST
+
+ /* checkRemain:
+ *
+ * - the chars in proxRemain are not allowed in prox. options.
+ * - return an error tree.
+ * 12.01.24/FB
+ */
+
+ public static Object checkRemain(int typeDIST, String proxRemain, int pos)
+
+ {
+ if( bDebug )
+ System.out.printf("Debug: checkRemain: '%s' at pos %d.\n", proxRemain, pos);
+
+ return buildErrorTree(proxRemain, StatusCodes.ERR_PROX_WRONG_CHARS, typeDIST, pos);
+ }
+
+ public static Tree check (String input, int pos) throws RecognitionException
+ {
ANTLRStringStream ss = new ANTLRStringStream(input);
c2ps_opPROXLexer lex = new c2ps_opPROXLexer(ss);
CommonTokenStream tokens = new CommonTokenStream(lex);
c2ps_opPROXParser g = new c2ps_opPROXParser(tokens);
c2ps_opPROXParser.opPROX_return c2PQReturn = null;
- /*
- System.out.println("check opPROX:" + index + ": " + input);
- System.out.flush();
- */
+ if( bDebug )
+ System.out.printf("check opPROX: pos=%d input='%s'.\n", pos, input);
try {
- c2PQReturn = g.opPROX();
- }
+ c2PQReturn = g.opPROX(pos);
+ }
catch (RecognitionException e) {
e.printStackTrace();
}
@@ -37,7 +185,19 @@
return tree;
}
-
+ public static boolean checkFalse()
+ {
+
+ return false; // testwise
+ }
+
+ public static boolean checkMeasure( Object measure)
+ {
+ System.err.printf("Debug: checkMeasure: measure = %s.\n",
+ measure == null ? "null" : "not null");
+ return true;
+ }
+
/*
* main testprogram:
*/
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
index 68f5f7c..285a3e7 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
@@ -1,5 +1,6 @@
package de.ids_mannheim.korap.query.serialize;
+import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROX; // error codes.
import de.ids_mannheim.korap.query.object.ClassRefCheck;
import de.ids_mannheim.korap.query.object.ClassRefOp;
import de.ids_mannheim.korap.query.object.CosmasPosition;
@@ -18,12 +19,16 @@
import de.ids_mannheim.korap.util.StringUtils;
import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.FailedPredicateException;
import org.antlr.runtime.RecognitionException;
+import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
import org.antlr.v4.runtime.tree.ParseTree;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table;
@@ -128,6 +133,83 @@
public static Pattern wildcardPlusPattern = Pattern.compile("([+])");
public static Pattern wildcardQuestionPattern = Pattern.compile("([?])");
+ /**
+ * reportErrorsinTree:
+ * - traverse the AST tree and search for nodes of type ERROR, they contain
+ * the errCode, the error message and the error char position.
+ * - returns true if an error node is found in the tree referenced by 'node'.
+ * - adds error code, error position and error message to the error list.
+ * Arguments:
+ * node : might be null if it has been reseted previously by another error handler.
+ * @param node
+ * @return: true: error node was found,
+ * false; no error node found.
+ * 19.12.23/FB
+ */
+
+ private boolean reportErrorsinTree(Tree node)
+
+ {
+ final String func = "reportErrorsinTree";
+
+ //System.err.printf("Debug: %s: '%s' has %d children.\n",
+ // func, node.getText(), node.getChildCount());
+ if( node == null )
+ {
+ // System.err.printf("Warning: %s: node == null: no action requested.\n", func);
+ return false;
+ }
+
+ if( node.getType() == 1 && node.getText().compareTo("ERROR") == 0 )
+ {
+ // error node found:
+ // child[0] : error pos.
+ // child[1] : error code.
+ // child[2] : error message, containing offending string.
+ /*
+ System.err.printf("Debug: %s: child[0]='%s' child[1]='%s' child[2]='%s'.\n", func,
+ node.getChild(0) != null ? node.getChild(0).getText() : "???",
+ node.getChild(1) != null ? node.getChild(1).getText() : "???",
+ node.getChild(2) != null ? node.getChild(2).getText() : "???");
+ */
+
+ int
+ errPos = node.getChild(0) != null ? Integer.parseInt(node.getChild(0).getText()) : 0;
+ int
+ errCode = node.getChild(1) != null ? Integer.parseInt(node.getChild(1).getText()) : StatusCodes.ERR_PROX_UNKNOWN;
+ String
+ errMess = node.getChild(2) != null ? node.getChild(2).getText() : "Genaue Fehlermeldung nicht auffindbar.";
+
+ ArrayList<Object>
+ errorSpecs = new ArrayList<Object>();
+
+ errorSpecs.add(errCode);
+ errorSpecs.add(errMess);
+ errorSpecs.add(errPos);
+ addError(errorSpecs);
+ return true;
+ }
+
+ for(int i=0; i<node.getChildCount(); i++)
+ {
+ Tree
+ son = node.getChild(i);
+
+ /* System.err.printf(" node: text='%s' type=%d start=%d end=%d.\n",
+ son.getText(),
+ son.getType(),
+ son.getTokenStartIndex(),
+ son.getTokenStopIndex());
+ */
+ // return the first error found only:
+ if( reportErrorsinTree(son) )
+ return true; // error found, stop here.
+ }
+
+ // no error node:
+ return false;
+ } // reportErrorsinTree
+
/**
* @param tree
* The syntax tree as returned by ANTLR
@@ -140,28 +222,40 @@
KoralObjectGenerator.setQueryProcessor(this);
this.query = query;
process(query);
- if (DEBUG) {
- log.debug(">>> " + requestMap.get("query") + " <<<");
- System.out.printf("Cosmas2QueryProcessor: >>%s<<.\n", requestMap.get("query"));
- }
- }
+ if (verbose)
+ {
+ //log.debug(">>> " + requestMap.get("query") + " <<<");
+ try {
+ // query from requestMap is unformatted JSON. Make it pretty before displaying:
+ ObjectMapper mapper = new ObjectMapper();
+ String jsonQuery = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(requestMap.get("query"));
+ System.out.printf("Cosmas2QueryProcessor: JSON output:\n%s\n\n", jsonQuery);
+ }
+ catch (JsonProcessingException e)
+ {
+ System.out.printf("Cosmas2QueryProcessor: >>%s<<.\n", requestMap.get("query"));
+ //e.printStackTraObjectMapper mapper = new ObjectMapper();ce();
+ }
+ }
+ }
@Override
public void process (String query) {
Tree tree = null;
tree = parseCosmasQuery(query);
- if (DEBUG) {
+ if (DEBUG)
+ {
System.out.printf("\nProcessing COSMAS II query: %s.\n\n", query);
log.debug("Processing CosmasII query: " + query);
- }
+ }
+
if (tree != null)
{
-
- if (DEBUG) {
- log.debug("ANTLR parse tree: " + tree.toStringTree());
- System.out.printf("\nANTLR parse tree: %s.\n\n", tree.toStringTree());
- }
+ if (verbose) {
+ log.debug("ANTLR parse tree: " + tree.toStringTree());
+ System.out.printf("\nANTLR parse tree: %s.\n\n", tree.toStringTree());
+ }
processNode(tree);
}
@@ -181,11 +275,13 @@
stackedObjects = 0;
stackedToWrap = 0;
- if (verbose) {
+ /*
+ if (verbose) {
System.err.println(" " + objectStack);
System.out.println(openNodeCats);
}
-
+ */
+
/* ***************************************
* Processing individual node categories *
* ***************************************
@@ -716,11 +812,13 @@
@SuppressWarnings("unchecked")
private void processOPPROX (Tree node) {
+
// collect info
Tree prox_opts = node.getChild(0);
Tree typ = prox_opts.getChild(0);
Tree dist_list = prox_opts.getChild(1);
- // Step I: create group
+
+ // Step I: create group
Map<String, Object> group =
KoralObjectGenerator.makeGroup(KoralOperation.SEQUENCE);
@@ -1746,30 +1844,41 @@
org.antlr.runtime.CommonTokenStream tokens =
new org.antlr.runtime.CommonTokenStream(lex); // v3
- // System.out.printf("parseCosmasQuery: tokens = %d\n", tokens.size());
- // System.out.printf("parseCosmasQuery: tokens = %s\n", tokens.toString());
-
parser = new c2psParser(tokens);
// Use custom error reporters
lex.setErrorReporter(errorListener);
((c2psParser) parser).setErrorReporter(errorListener);
-
+
c2psParser.c2ps_query_return c2Return =
((c2psParser) parser).c2ps_query(); // statt t().
// AST Tree anzeigen:
tree = (Tree) c2Return.getTree();
- if (DEBUG) log.debug(tree.toStringTree());
+
+ if (DEBUG)
+ {
+ System.out.printf("Debug: parseCosmasQuery: tree = '%s'.\n", tree.toStringTree());
+ log.debug(tree.toStringTree());
+ }
}
+ catch (FailedPredicateException fe)
+ { // unused so far - 11.01.24/FB
+ System.out.printf("parseCosmasQuery: FailedPredicateException!\n");
+ addError(StatusCodes.MALFORMED_QUERY,
+ "failed predicate on prox something.");
+ }
catch (RecognitionException e) {
+ // unused so far - 11.01.24/FB
+ System.out.printf("Debug: out: parseCosmasQuery: RecognitionException!\n");
log.error(
"Could not parse query. Please make sure it is well-formed.");
addError(StatusCodes.MALFORMED_QUERY,
"Could not parse query. Please make sure it is well-formed.");
}
- String treestring = tree.toStringTree();
+ String treestring = tree.toStringTree();
+
boolean erroneous = false;
if (parser.failed() || parser.getNumberOfSyntaxErrors() > 0) {
erroneous = true;
@@ -1778,10 +1887,28 @@
if (erroneous || treestring.contains("<mismatched token")
|| treestring.contains("<error")
- || treestring.contains("<unexpected")) {
- log.error(errorListener.generateFullErrorMsg().toString());
+ || treestring.contains("<unexpected"))
+ {
+ //System.err.printf("Debug: parseCosmasQuery: tree: '%s'.\n", treestring);
+ //System.err.printf("Debug: parseCosmasQuery: FullErrorMsg: '%s'.\n", errorListener.generateFullErrorMsg().toString());
+ log.error(errorListener.generateFullErrorMsg().toString());
addError(errorListener.generateFullErrorMsg());
}
+
+ // collect and report errors found by other functions than the lexer/parser:
+ // tree might already be null if another error was reported above.
+ if( reportErrorsinTree(tree) == true )
+ {
+ if( DEBUG )
+ System.out.printf("Debug: parseCosmasQuery: reportErrorsinTree at least 1 error message found. Setting tree = null.\n");
+ return null;
+ }
+ else
+ {
+ if(DEBUG)
+ System.out.printf("Debug: parseCosmasQuery: reportErrorsinTree has found no error messages.\n");
+ }
+
return tree;
- }
+ } // parseCosmasQuery
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
index 94bf15d..edc527e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
@@ -18,6 +18,7 @@
import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
+import com.fasterxml.jackson.core.JsonPointer;
/**
* Main class for Koral, serializes queries from concrete QLs to KoralQuery
@@ -33,7 +34,7 @@
private String version = "Unknown";
private String name = "Unknown";
private static Properties info;
-
+ private boolean bDebug = false;
{
loadInfo();
@@ -104,24 +105,31 @@
int i = 0;
String[] queries = null;
String ql = "poliqarpplus";
- boolean bDebug = true;
+ boolean
+ bDebug = false;
if (args.length < 2) {
- System.err
- .println("Usage: QuerySerializer \"query\" queryLanguage");
+ System.err.println("\nUsage: QuerySerializer \"query\" queryLanguage [-show]");
System.exit(1);
}
else {
queries = new String[] { args[0] };
ql = args[1];
}
+ if( args.length >= 3 )
+ {
+ if( args[2].compareToIgnoreCase("-show") == 0 )
+ bDebug = true;
+ }
+
for (String q : queries) {
i++;
try {
- if( bDebug ) System.out.printf("QuerySerialize: query = >>%s<< lang = %s.\n", q, ql);
-
- jg.run(q, ql);
- System.out.println();
+ if( bDebug )
+ System.out.printf("QuerySerialize: query = >>%s<< lang = %s.\n", q, ql);
+
+ jg.run(q, ql, bDebug);
+ System.out.println();
}
catch (NullPointerException npe) {
npe.printStackTrace();
@@ -145,9 +153,9 @@
* 'poliqarpplus', 'cqp', 'cosmas2', 'annis' or 'cql'.
* @throws IOException
*/
- public void run (String query, String queryLanguage) throws IOException {
+ public void run (String query, String queryLanguage, boolean bDebug) throws IOException {
- ast.verbose = DEBUG ? true : false; // debugging: 01.09.23/FB
+ ast.verbose = bDebug; // debugging: 01.09.23/FB
if (queryLanguage.equalsIgnoreCase("poliqarp")) {
ast = new PoliqarpPlusQueryProcessor(query);
@@ -174,7 +182,9 @@
throw new IllegalArgumentException(
queryLanguage + " is not a supported query language!");
}
- System.out.println(this.toJSON());
+
+ if( bDebug )
+ System.out.println(this.toJSON());
}
public QuerySerializer setQuery (String query, String ql, String version) {
@@ -230,7 +240,7 @@
public final String toJSON () {
String ser;
try {
- ser = mapper.writeValueAsString(raw());
+ ser = mapper.writeValueAsString(raw());
// System.out.println(ser);
}
catch (JsonProcessingException e) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java b/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java
index 087ae32..2618d57 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java
@@ -167,7 +167,7 @@
// Some things went wrong ...
catch (Exception e) {
- System.err.println(e.getMessage());
+ System.err.println("parseQuery: " + e.getMessage());
}
// Return the generated tree
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java b/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java
index 6e574fd..e5f5d71 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java
@@ -31,7 +31,8 @@
@Override
public void reportError (String error) {
- String charPositionStr = null;
+
+ String charPositionStr = null;
String offendingSymbol = null;
String expected = null;
Pattern p = Pattern
@@ -52,7 +53,8 @@
public ArrayList<Object> generateFullErrorMsg () {
- ArrayList<Object> errorSpecs = new ArrayList<Object>();
+
+ ArrayList<Object> errorSpecs = new ArrayList<Object>();
String msg = getDetailedErrorMessage();
errorSpecs.add(StatusCodes.MALFORMED_QUERY);
errorSpecs.add(msg);
@@ -62,7 +64,14 @@
private String getDetailedErrorMessage () {
- // default message, in case no detailed info is available;
+
+ /*
+ System.err.printf("Debug: getDetailedErrorMessage: pos=%d expected='%s' offend='%s' query='%s'.\n",
+ charPosition, expected != null ? expected : "null", offendingSymbol != null ? offendingSymbol : "null",
+ query != null ? query : "null");
+ */
+
+ // default message, in case no detailed info is available;
String msg = "Malformed query. Could not parse.";
char offendingSymbol = query.charAt(0);
if (query.length() > charPosition)
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java b/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java
index 656228d..b8c0765 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java
@@ -14,4 +14,13 @@
public final static int QUERY_TOO_COMPLEX = 311;
public final static int UNKNOWN_QUERY_ERROR = 399;
public final static int SERIALIZATION_FAILED = 300;
+
+ // error codes for PROX syntax errors:
+ final public static int ERR_PROX_UNKNOWN = 320;
+ public final static int ERR_PROX_MEAS_NULL = 321;
+ public final static int ERR_PROX_MEAS_TOOGREAT = 322;
+ public final static int ERR_PROX_VAL_NULL = 323;
+ public final static int ERR_PROX_VAL_TOOGREAT = 324;
+ public final static int ERR_PROX_DIR_TOOGREAT = 325;
+ public final static int ERR_PROX_WRONG_CHARS = 326;
}
\ No newline at end of file
diff --git a/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java b/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java
new file mode 100644
index 0000000..92ba9ef
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java
@@ -0,0 +1,27 @@
+package de.ids_mannheim.korap.util;
+
+import org.antlr.runtime.*;
+
+/* general String manipulation functions moved
+ * from de.ids_mannheim.de.korap.query.parse.cosmas.c2ps_opREG.java and Cosmas2QueryProcessor.java.
+ * 24.10.23/FB
+ */
+
+public final class C2RecognitionException extends RecognitionException {
+
+ private static final boolean DEBUG = false;
+ public String mismatchedToken;
+
+ public C2RecognitionException(String mismatchedToken)
+
+ {
+ this.mismatchedToken = mismatchedToken;
+
+ } // constructor C2RecognitionException
+
+ public String getMismatchedToken()
+ {
+ return this.mismatchedToken;
+ }
+
+}
diff --git a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
index e3d1ea8..bb4319c 100644
--- a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
@@ -629,6 +629,32 @@
assertEquals("Mond", res.at("/query/operands/1/operands/0/wrap/key")
.asText());
assertFalse(res.at("/query/inOrder").asBoolean());
+
+ // 15.01.24/FB: checking syntax error detectiong:
+
+ query = "Sonne /+w Mond"; // distance value missing.
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertTrue(res.get("errors") != null);
+
+ query = "Sonne /+2sw Mond"; // 2 distance types instead of 1.
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertTrue(res.get("errors") != null);
+
+ query = "Sonne /+2s- Mond"; // 2 distance directions instead of 1.
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertTrue(res.get("errors") != null);
+
+ query = "Sonne /+2s7 Mond"; // 2 distance values instead of 1.
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertTrue(res.get("errors") != null);
}