opLEM: Review corrections.
opLEM: accepts and generates lemma Search Options containing wildcards correctly. Rejects expr. if wildcards are inside the lemma string.
opLEM: check or reject wildcards.
opLEM: reject wildcards with error message.
opLEM: test returned error code.
opLEM: removed debug output.
opLEM: refactoring error message methodes to StatusCodes.java.
Change-Id: I01dd9dd9a61acee095d6f997fde4f4c3e5822b5d
Reviewed-on: https://korap.ids-mannheim.de/gerrit/c/KorAP/Koral/+/9096
Reviewed-by: <bodmer@ids-mannheim.de>
diff --git a/src/main/antlr/cosmas/c2ps.g b/src/main/antlr/cosmas/c2ps.g
index 269f27f..cbefc94 100644
--- a/src/main/antlr/cosmas/c2ps.g
+++ b/src/main/antlr/cosmas/c2ps.g
@@ -14,7 +14,11 @@
// - more generally: comma at end of searchword, which is not enclosed by "..." is
// excluded from searchword now.
// - a comma inside a searchword is accepted if enclosed by "...".
-//
+// 10.12.24/FB
+// - reject wildcards [?*+] in lemma search expression, as regex/wildcards are not allowed
+// in &opts&lemma, but wildcards may appear as options inside 'opts'.
+// E.g. &F+&Prüfung -> lemma with F+ as an option.
+// - test added for F+.
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
grammar c2ps;
@@ -236,8 +240,8 @@
-> $se1+ ;
searchExpr1
- : op1 -> {$op1.tree}
- | searchWord -> {$searchWord.tree}
+ : op1 -> {$op1.tree}
+ | searchWord -> {$searchWord.tree}
| searchLemma -> {$searchLemma.tree}
| searchAnnot -> {$searchAnnot.tree}
| searchLabel -> {$searchLabel.tree}
@@ -250,13 +254,13 @@
: word1
| word2;
-word1 : SEARCHWORD1 -> {c2ps_opWF.check($SEARCHWORD1.text, false, false, $SEARCHWORD1.index)} ;
+word1 : SEARCHWORD1 -> {c2ps_opWF.check($SEARCHWORD1.text, false, false, $SEARCHWORD1.pos)} ;
-word2 : SEARCHWORD2 -> {c2ps_opWF.check($SEARCHWORD2.text, true, false, $SEARCHWORD2.index)} ;
+word2 : SEARCHWORD2 -> {c2ps_opWF.check($SEARCHWORD2.text, true, false, $SEARCHWORD2.pos)} ;
// Suchbegriff = Lemma:
searchLemma
- : SEARCHLEMMA -> {c2ps_opWF.check($SEARCHLEMMA.text, false, true, $SEARCHLEMMA.index)} ;
+ : SEARCHLEMMA -> {c2ps_opWF.check($SEARCHLEMMA.text, false, true, $SEARCHLEMMA.pos)} ;
// Suchbegriff = Annotationsoperator:
// (damit Lexer den richtige Token erzeugt, muss OP_ELEM den gesamten
diff --git a/src/main/antlr/cosmas/c2ps_opWF.g b/src/main/antlr/cosmas/c2ps_opWF.g
index 200a21a..8e3462e 100644
--- a/src/main/antlr/cosmas/c2ps_opWF.g
+++ b/src/main/antlr/cosmas/c2ps_opWF.g
@@ -47,14 +47,14 @@
//
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
-searchWFs
- : searchWF+;
+searchWFs[int pos]
+ : searchWF[pos]+;
-searchWF: optCase? wordform tpos?
+searchWF[int pos] : optCase? wordform[OPWF,pos] tpos?
-> ^(OPWF wordform optCase? tpos? ) ;
-wordform: WF -> {c2ps_opWF.encode($WF.text, OPWF)};
+wordform[int type, int pos]: WF -> {c2ps_opWF.encode($WF.text, $type, $pos)};
// Case Options:
optCase : Case
@@ -67,8 +67,8 @@
-> ^(TPOS {c2ps_opBED.checkTPos($TPos.text, $TPos.index)});
// analog für Lemmata, kein optCase:
-searchLEM
- : wordform tpos?
-
- -> ^(OPLEM wordform tpos?);
-
+// todo: check wordform (=lemma) for wildcards, which are not allowed in the lemma expr.
+
+searchLEM[int pos]
+ : wordform[OPLEM,pos] tpos?
+ -> ^(OPLEM wordform tpos?);
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
index 737c7ad..88c179c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
@@ -18,145 +18,6 @@
final static boolean
bDebug = false;
- public static final int MLANG_ENGLISH = 0;
- public static final int MLANG_GERMAN = 1;
-
- public static int
- messLang = MLANG_ENGLISH; // default.
-
- // type of an Error CommonToken:
- final static int
- typeERROR = 1;
-
- // Prox error codes defined in StatusCodes.java.
-
- private static String getErrMessEN(int errCode, String text)
-
- {
- switch( errCode )
- {
- case StatusCodes.ERR_PROX_MEAS_NULL:
- return String.format("Proximity operator at '%s': one of the following prox. types is missing: w,s,p!", text);
-
- case StatusCodes.ERR_PROX_MEAS_TOOGREAT:
- return String.format("Proximity operator at '%s': Please, specify only 1 of the following prox. types: w,s,p! " +
- "It is possible to specify several at once by separating them with a ','. E.g.: ' /+w2,s2,p0 '.", text);
-
- case StatusCodes.ERR_PROX_VAL_NULL:
- return String.format("Proximity operator at '%s': please specify a numerical value for the distance. E.g. ' /+w5 '.", text);
-
- case StatusCodes.ERR_PROX_VAL_TOOGREAT:
- return String.format("Proximity operator at '%s': please specify only 1 distance value. E.g. ' /+w5 '.", text);
-
- case StatusCodes.ERR_PROX_DIR_TOOGREAT:
- return String.format("Proximity operator at '%s': please specify either '+' or '-' or none of them for the direction.", text);
-
- case StatusCodes.ERR_PROX_WRONG_CHARS:
- return String.format("Proximity operator at '%s': unknown proximity options!", text);
-
- case StatusCodes.UNKNOWN_QUERY_ERROR:
- return String.format("Unknown error!");
-
- default:
- return String.format("Proximity operator at '%s': unknown error. The correct syntax looks like this: E.g. ' /+w2 ' or ' /w10,s0 '.", text);
- }
- }
-
- private static String getErrMessGE(int errCode, String text)
-
- {
- switch( errCode )
- {
- case StatusCodes.ERR_PROX_MEAS_NULL:
- return String.format("Abstandsoperator an der Stelle '%s': es fehlt eine der folgenden Angaben: w,s,p!", text);
-
- case StatusCodes.ERR_PROX_MEAS_TOOGREAT:
- return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 der folgenden Angaben einsetzen: w,s,p! " +
- "Falls Mehrfachangabe erwünscht, müssen diese durch Kommata getrennt werden (z.B.: ' /+w2,s2,p0 ').", text);
-
- case StatusCodes.ERR_PROX_VAL_NULL:
- return String.format("Abstandsoperator an der Stelle '%s': Bitte einen numerischen Wert einsetzen (z.B. ' /+w5 ')! ", text);
-
- case StatusCodes.ERR_PROX_VAL_TOOGREAT:
- return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 numerischen Wert einsetzen (z.B. ' /+w5 ')! ", text);
-
- case StatusCodes.ERR_PROX_DIR_TOOGREAT:
- return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 Angabe '+' oder '-' oder keine! ", text);
-
- case StatusCodes.ERR_PROX_WRONG_CHARS:
- return String.format("Abstandsoperator an der Stelle '%s': unbekannte Abstandsoption(en)!", text);
-
- case StatusCodes.UNKNOWN_QUERY_ERROR:
- return String.format("Unbekannter Fehler!");
-
- default:
- return String.format("Abstandsoperator an der Stelle '%s': unbekannter Fehler. Korrekte Syntax z.B.: ' /+w2 ' oder ' /w10,s0 '.", text);
- }
- }
-
- private static String getErrMess(int errCode, int messLang, String text)
-
- {
- if( messLang == c2ps_opPROX.MLANG_GERMAN )
- return getErrMessGE(errCode, text);
- else
- return getErrMessEN(errCode, text);
- }
-
-
- /**
- * in this version, the pre-stored message language is used.
- * @param errCode
- * @param text
- * @return
- * 10.06.24/FB
- */
-
- public static String getErrMess(int errCode, String text)
-
- {
- if( messLang == c2ps_opPROX.MLANG_GERMAN )
- return getErrMessGE(errCode, text);
- else
- return getErrMessEN(errCode, text);
- }
-
- /**
- * buildErrorTree():
- * @param text = part of the query that contains an error.
- * @param errCode
- * @param typeDIST
- * @param pos
- * @return
- */
-
- private static CommonTree buildErrorTree(String text, int errCode, int typeDIST, int pos)
-
- {
- CommonTree
- errorTree = new CommonTree(new CommonToken(typeDIST, "DIST"));
- CommonTree
- errorNode = new CommonTree(new CommonToken(typeERROR, "ERROR"));
- CommonTree
- errorPos = new CommonTree(new CommonToken(typeERROR, String.valueOf(pos)));
- CommonTree
- errorCode = new CommonTree(new CommonToken(typeERROR, String.valueOf(errCode)));
- CommonTree
- errorMes;
- String
- mess;
-
- mess = getErrMess(errCode, messLang, text);
- errorMes = new CommonTree(new CommonToken(typeERROR, mess));
-
- errorTree.addChild(errorNode);
- errorNode.addChild(errorPos);
- errorNode.addChild(errorCode);
- errorNode.addChild(errorMes);
-
- return errorTree;
- }
-
/* encodeDIST():
* - returns a CommonTree built from the Direction/Measure/Distance value.
* - accepts options in any order.
@@ -183,17 +44,17 @@
CommonTree tree3 = (CommonTree)ctVal;
if( bDebug )
- System.err.printf("Debug: encodeDIST: scanned input='%s' countM=%d countD=%d countV=%d pos=%d.\n",
+ System.out.printf("Debug: encodeDIST: scanned input='%s' countM=%d countD=%d countV=%d pos=%d.\n",
text, countM, countD, countV, pos);
if( countM == 0 )
- return buildErrorTree(text, StatusCodes.ERR_PROX_MEAS_NULL, typeDIST, pos);
+ return StatusCodes.buildErrorTree(text, StatusCodes.ERR_PROX_MEAS_NULL, pos);
if( countM > 1 )
- return buildErrorTree(text, StatusCodes.ERR_PROX_MEAS_TOOGREAT, typeDIST, pos);
+ return StatusCodes.buildErrorTree(text, StatusCodes.ERR_PROX_MEAS_TOOGREAT, pos);
if( countV == 0 )
- return buildErrorTree(text, StatusCodes.ERR_PROX_VAL_NULL, typeDIST, pos);
+ return StatusCodes.buildErrorTree(text, StatusCodes.ERR_PROX_VAL_NULL, pos);
if( countV > 1 )
- return buildErrorTree(text, StatusCodes.ERR_PROX_VAL_TOOGREAT, typeDIST, pos);
+ return StatusCodes.buildErrorTree(text, StatusCodes.ERR_PROX_VAL_TOOGREAT, pos);
if( countD == 0 )
{
@@ -203,11 +64,11 @@
treeDIR.addChild(treeBOTH);
if( bDebug )
- System.err.printf("Debug: encodeDIST: tree for DIR: '%s'.\n", treeDIR.toStringTree());
+ System.out.printf("Debug: encodeDIST: tree for DIR: '%s'.\n", treeDIR.toStringTree());
tree1 = treeDIR;
}
else if( countD > 1 )
- return buildErrorTree(text, StatusCodes.ERR_PROX_DIR_TOOGREAT, typeDIST, pos);
+ return StatusCodes.buildErrorTree(text, StatusCodes.ERR_PROX_DIR_TOOGREAT, pos);
// create DIST tree:
CommonTree
@@ -218,7 +79,7 @@
tree.addChild(tree2);
if( bDebug )
- System.err.printf("Debug: encodeDIST: returning '%s'.\n", tree.toStringTree());
+ System.out.printf("Debug: encodeDIST: returning '%s'.\n", tree.toStringTree());
return tree;
} // encodeDIST
@@ -236,7 +97,7 @@
if( bDebug )
System.out.printf("Debug: checkRemain: '%s' at pos %d.\n", proxRemain, pos);
- return buildErrorTree(proxRemain, StatusCodes.ERR_PROX_WRONG_CHARS, typeDIST, pos);
+ return StatusCodes.buildErrorTree(proxRemain, StatusCodes.ERR_PROX_WRONG_CHARS, pos);
}
public static Tree check (String input, int pos) throws RecognitionException
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opWF.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opWF.java
index f107d41..2bfb513 100644
--- a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opWF.java
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opWF.java
@@ -2,6 +2,9 @@
import org.antlr.runtime.*;
import org.antlr.runtime.tree.*;
+import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROXLexer;
+import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROX;
+import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
/*
* parses prefixed and suffixed options of a search wordform.
@@ -11,7 +14,10 @@
public class c2ps_opWF
{
- /* Arguments:
+ static final boolean bDebug = false;
+
+ /* check:
+ * Arguments:
* bStrip: true: 'input' contains "wort" -> strip " away -> wort.
* false: 'input' contains no " -> nothing to strip.
* bLem: true: input contains a Lemma; generates tree ^(OPLEM...).
@@ -19,9 +25,9 @@
* input: may be a single Lemma or Wform or a list of Wforms.
*/
- public static Tree check (String input, boolean bStrip, boolean bLem,
- int index) {
- if (bStrip)
+ public static Tree check (String input, boolean bStrip, boolean bLem, int pos)
+ {
+ if (bStrip)
input = input.substring(1, input.length() - 1);
if (bLem && input.charAt(0) == '&') {
@@ -43,18 +49,23 @@
try {
if (bLem)
- c2PQLEMReturn = g.searchLEM();
+ c2PQLEMReturn = g.searchLEM(pos);
else
- c2PQWFReturn = g.searchWFs();
+ c2PQWFReturn = g.searchWFs(pos);
}
catch (RecognitionException e) {
e.printStackTrace();
}
// AST Tree anzeigen:
- Tree tree = bLem ? (Tree) c2PQLEMReturn.getTree() : (Tree) c2PQWFReturn
- .getTree();
- // System.out.println(bLem? "opLEM: " : "opWF: " + tree.toStringTree() );
+ Tree tree = bLem ? (Tree)c2PQLEMReturn.getTree() : (Tree)c2PQWFReturn.getTree();
+
+ if( bDebug && bLem )
+ {
+ System.out.printf("c2ps_opWF.check: %s: '%s'.\n", bLem ? "opLEM" : "opWF",
+ tree.toStringTree() );
+ System.out.flush();
+ }
return tree;
}
@@ -63,26 +74,64 @@
/* Wordform Encoding, e.g. to insert a Wordform into an AST.
* a) wf -> "wf".
* b) remove escape char before ':': abc\: -> abc:.
- * Returns a Tree.
+ * Args:
+ * wf : wordform or lemma (expected lemma : "lemma" or "opts&lemma",
+ * the starting '&' has been removed before entering this function).
+ * tokenType : either OPWF or OPLEM.
+ * pos : start position of wf.
+ * Notes:
+ * - &opts&lemma : may contain wildcards as options in the &opts& section only.
+ * reject if wildcards appear in the &lemma section.
+ * Returns a Tree or an ErrorTree.
*/
- public static Tree encode (String wf, int tokenType)
+ public static Tree encode (String wf, int tokenType, int pos)
{
+ //System.out.printf("c2ps_opWF.encode: wf='%s' tokenType=%d pos=%d.\n", wf, tokenType, pos);
+
// b)
StringBuffer sbWF = new StringBuffer(wf);
- for (int i = 0; i < sbWF.length() - 1; i++) {
+ for (int i = 0; i < sbWF.length()-1; i++)
+ {
if (sbWF.charAt(i) == '\\' && sbWF.charAt(i + 1) == ':')
sbWF.deleteCharAt(i);
- }
-
- return new CommonTree(new CommonToken(tokenType, "\"" + sbWF.toString()
- + "\""));
+ }
+
+ // reject wildcards in lemmata:
+
+ if( tokenType == c2ps_opWFLexer.OPLEM )
+ {
+ boolean hasOpts = false; // true if a '&' occurs: e.g. "Fes+C&lemma"
+ boolean hasFound = false; // false for all wildcards found to the left of '&', true in all other cases.
+
+ for(int i=0; i< sbWF.length(); i++)
+ {
+ if( sbWF.charAt(i) == '&' )
+ {
+ hasOpts = true;
+ hasFound = false;
+ }
+ else if (sbWF.charAt(i) == '?' || sbWF.charAt(i) == '*' || sbWF.charAt(i) == '+' )
+ {
+ hasFound = true;
+ }
+ }
+
+ // error if hasFound==true:
+ if( hasFound )
+ {
+ if( bDebug )
+ System.out.printf("c2ps_opWF.encode: Syntax error: '%s' contains wildcards inside lemma expression!\n", wf);
+ return StatusCodes.buildErrorTree(wf, StatusCodes.ERR_LEM_WILDCARDS, pos);
+ }
+ }
+
+ return new CommonTree(new CommonToken(tokenType, "\"" + sbWF.toString() + "\""));
}
-
-
+
/*
- * main testprogram:
+ * main testprogram
*/
public static void main (String args[]) throws Exception {
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
index 16af2a3..11bc23b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
@@ -1,6 +1,6 @@
package de.ids_mannheim.korap.query.serialize;
-import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROX; // error codes.
+//import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROX; // error codes.
import de.ids_mannheim.korap.query.object.ClassRefCheck;
import de.ids_mannheim.korap.query.object.ClassRefOp;
import de.ids_mannheim.korap.query.object.CosmasPosition;
@@ -52,6 +52,8 @@
private static Logger log =
LoggerFactory.getLogger(Cosmas2QueryProcessor.class);
+ private static final int messLang = StatusCodes.MLANG_GERMAN;
+
private LinkedList<Map<String, Object>[]> toWrapStack =
new LinkedList<Map<String, Object>[]>();
/**
@@ -151,16 +153,15 @@
private boolean reportErrorsinTree(Tree node)
{
- // not used when not debugging: final String func = "reportErrorsinTree";
+ // not used when not debugging:
+ final String func = "reportErrorsinTree";
- //System.err.printf("Debug: %s: '%s' has %d children.\n",
- // func, node.getText(), node.getChildCount());
- if( node == null )
+ if( node == null )
{
// System.err.printf("Warning: %s: node == null: no action requested.\n", func);
return false;
}
-
+
if( node.getType() == 1 && node.getText().compareTo("ERROR") == 0 )
{
// error node found:
@@ -179,7 +180,7 @@
int
errCode = node.getChild(1) != null ? Integer.parseInt(node.getChild(1).getText()) : StatusCodes.ERR_PROX_UNKNOWN;
String
- errMess = node.getChild(2) != null ? node.getChild(2).getText() : c2ps_opPROX.getErrMess(StatusCodes.UNKNOWN_QUERY_ERROR, "");
+ errMess = node.getChild(2) != null ? node.getChild(2).getText() : StatusCodes.getErrMess(StatusCodes.UNKNOWN_QUERY_ERROR, messLang, "");
ArrayList<Object>
errorSpecs = new ArrayList<Object>();
@@ -244,12 +245,14 @@
@Override
public void process (String query) {
Tree tree = null;
- tree = parseCosmasQuery(query);
+
if (DEBUG)
- {
- System.out.printf("\nProcessing COSMAS II query: %s.\n\n", query);
- log.debug("Processing CosmasII query: " + query);
- }
+ {
+ System.out.printf("\nProcessing COSMAS II query: %s.\n\n", query);
+ log.debug("Processing CosmasII query: " + query);
+ }
+
+ tree = parseCosmasQuery(query);
if (tree != null)
{
@@ -1170,10 +1173,14 @@
/**
* Nodes introducing tokens. Process all in the same manner,
* except for the fieldMap entry
- *
+ * 09.12.24/FB
+ * - do not search for wildcards [+*?] in &opts&lemma expressions, as they are not allowed there.
+ * - but lemma options may contain e.g. '+', e.g. '&Fes+&Prüfung', so do not replace this one.
* @param node
*/
- private void processOPWF_OPLEM (Tree node) {
+
+ private void processOPWF_OPLEM (Tree node)
+ {
String nodeCat = getNodeCat(node);
// Step I: get info
Map<String, Object> token = KoralObjectGenerator.makeToken();
@@ -1186,37 +1193,42 @@
String value = node.getChild(0).toStringTree().replaceAll("\"", "");
// check for wildcard string
- // http://www.ids-mannheim.de/cosmas2/web-app/hilfe/suchanfrage/eingabe-zeile/syntax/platzhalter.html
- boolean isFound = false;
- Matcher m = wildcardStarPattern.matcher(value);
- if (m.find()) {
- isFound = true;
- value = m.replaceAll(".$1");
+ // check for wildcards in OPWF only.
+ if( nodeCat.equals("OPWF") )
+ {
+ // http://www.ids-mannheim.de/cosmas2/web-app/hilfe/suchanfrage/eingabe-zeile/syntax/platzhalter.html
+ boolean isFound = false;
+ Matcher m = wildcardStarPattern.matcher(value);
+ if (m.find()) {
+ isFound = true;
+ value = m.replaceAll(".$1");
+ }
+ m.reset();
+ m = wildcardQuestionPattern.matcher(value);
+ if (m.find()) {
+ isFound = true;
+ value = m.replaceAll(".");
+ }
+ m.reset();
+ m = wildcardPlusPattern.matcher(value);
+ if (m.find()) {
+ isFound = true;
+ value = m.replaceAll(".?");
+ }
+
+ if (isFound) {
+ fieldMap.put("type", "type:regex");
+ }
+
+ if (value.startsWith("$")) {
+ value = value.substring(1);
+ ArrayList<String> flags = new ArrayList<String>();
+ flags.add("flags:caseInsensitive");
+ fieldMap.put("flags", flags);
+ }
}
- m.reset();
- m = wildcardQuestionPattern.matcher(value);
- if (m.find()) {
- isFound = true;
- value = m.replaceAll(".");
- }
- m.reset();
- m = wildcardPlusPattern.matcher(value);
- if (m.find()) {
- isFound = true;
- value = m.replaceAll(".?");
- }
-
- if (isFound) {
- fieldMap.put("type", "type:regex");
- }
-
- if (value.startsWith("$")) {
- value = value.substring(1);
- ArrayList<String> flags = new ArrayList<String>();
- flags.add("flags:caseInsensitive");
- fieldMap.put("flags", flags);
- }
-
+
+ // OPWF and OPLEM:
fieldMap.put("key", value);
fieldMap.put("layer", attr);
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java b/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java
index b8c0765..463818c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java
@@ -1,6 +1,16 @@
package de.ids_mannheim.korap.query.serialize.util;
+import org.antlr.runtime.CommonToken;
+import org.antlr.runtime.tree.CommonTree;
+
+import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROX;
+
public class StatusCodes {
+
+ // type of an Error CommonToken:
+ public final static int typeERROR = 1;
+
+ // error codes:
public final static int NO_QUERY = 301;
public final static int MALFORMED_QUERY = 302;
public final static int DEPRECATED_QUERY_ELEMENT = 303;
@@ -23,4 +33,126 @@
public final static int ERR_PROX_VAL_TOOGREAT = 324;
public final static int ERR_PROX_DIR_TOOGREAT = 325;
public final static int ERR_PROX_WRONG_CHARS = 326;
+
+ // error codes for WF and LEM syntax errors:
+ public final static int ERR_LEM_WILDCARDS = 350;
+
+ // constants for message languages
+
+ public static final int MLANG_ENGLISH = 0;
+ public static final int MLANG_GERMAN = 1;
+
+ public static int messLang = MLANG_GERMAN; // default.
+
+ /**
+ * buildErrorTree():
+ * @param text = part of the query that contains an error.
+ * @param errCode
+ * @param pos = position of the expression where the error occurs.
+ * @return CommonTree (ERROR pos errCode errMess).
+ */
+
+ public static CommonTree buildErrorTree(String text, int errCode, int pos)
+ {
+ CommonTree
+ errorNode = new CommonTree(new CommonToken(typeERROR, "ERROR"));
+ CommonTree
+ errorPos = new CommonTree(new CommonToken(typeERROR, String.valueOf(pos)));
+ CommonTree
+ errorCode = new CommonTree(new CommonToken(typeERROR, String.valueOf(errCode)));
+ CommonTree
+ errorMes;
+ String
+ mess;
+
+ mess = getErrMess(errCode, messLang, text);
+ errorMes = new CommonTree(new CommonToken(typeERROR, mess));
+
+ // new:
+ errorNode.addChild(errorPos);
+ errorNode.addChild(errorCode);
+ errorNode.addChild(errorMes);
+
+ return errorNode;
+ }
+
+ private static String getErrMessEN(int errCode, String text)
+
+ {
+ switch( errCode )
+ {
+ case ERR_PROX_MEAS_NULL:
+ return String.format("Proximity operator at '%s': one of the following prox. types is missing: w,s,p!", text);
+
+ case ERR_PROX_MEAS_TOOGREAT:
+ return String.format("Proximity operator at '%s': Please, specify only 1 of the following prox. types: w,s,p! " +
+ "It is possible to specify several at once by separating them with a ','. E.g.: ' /+w2,s2,p0 '.", text);
+
+ case ERR_PROX_VAL_NULL:
+ return String.format("Proximity operator at '%s': please specify a numerical value for the distance. E.g. ' /+w5 '.", text);
+
+ case ERR_PROX_VAL_TOOGREAT:
+ return String.format("Proximity operator at '%s': please specify only 1 distance value. E.g. ' /+w5 '.", text);
+
+ case ERR_PROX_DIR_TOOGREAT:
+ return String.format("Proximity operator at '%s': please specify either '+' or '-' or none of them for the direction.", text);
+
+ case ERR_PROX_WRONG_CHARS:
+ return String.format("Proximity operator at '%s': unknown proximity options!", text);
+
+ case ERR_LEM_WILDCARDS:
+ return String.format("Lemma operator at '%s': wildcards (?*+) are not allowed inside a lemma.", text);
+
+ default:
+ return String.format("Proximity operator at '%s': unknown error. The correct syntax looks like this: E.g. ' /+w2 ' or ' /w10,s0 '.", text);
+ }
+ }
+
+ private static String getErrMessGE(int errCode, String text)
+
+ {
+ switch( errCode )
+ {
+ case ERR_PROX_MEAS_NULL:
+ return String.format("Abstandsoperator an der Stelle '%s': es fehlt eine der folgenden Angaben: w,s,p!", text);
+
+ case ERR_PROX_MEAS_TOOGREAT:
+ return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 der folgenden Angaben einsetzen: w,s,p! " +
+ "Falls Mehrfachangabe erwünscht, müssen diese durch Kommata getrennt werden (z.B.: ' /+w2,s2,p0 ').", text);
+
+ case ERR_PROX_VAL_NULL:
+ return String.format("Abstandsoperator an der Stelle '%s': Bitte einen numerischen Wert einsetzen (z.B. ' /+w5 ')! ", text);
+
+ case ERR_PROX_VAL_TOOGREAT:
+ return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 numerischen Wert einsetzen (z.B. ' /+w5 ')! ", text);
+
+ case ERR_PROX_DIR_TOOGREAT:
+ return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 Angabe '+' oder '-' oder keine! ", text);
+
+ case ERR_PROX_WRONG_CHARS:
+ return String.format("Abstandsoperator an der Stelle '%s': unbekannte Abstandsoption(en)!", text);
+
+ case ERR_LEM_WILDCARDS:
+ return String.format("Lemma-Suchbegriff an der Stelle '%s': Platzhalter (?*+) können im gesuchten Lemma nicht eingesetzt werden.", text);
+
+ default:
+ return String.format("Abstandsoperator an der Stelle '%s': unbekannter Fehler. Korrekte Syntax z.B.: ' /+w2 ' oder ' /w10,s0 '.", text);
+ }
+ }
+
+ /* getErrMess:
+ * - returns error message depending of messLang.
+ * 12.12.24/FB
+ * - moved to StatusCodes.java.
+ */
+
+ public static String getErrMess(int errCode, int messLang, String text)
+
+ {
+ if( messLang == MLANG_GERMAN )
+ return getErrMessGE(errCode, text);
+ else
+ return getErrMessEN(errCode, text);
+ }
+
}
\ No newline at end of file
diff --git a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
index b366594..0611ff2 100644
--- a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
@@ -17,6 +17,7 @@
import static de.ids_mannheim.korap.query.parse.cosmas.c2ps_opREG.*;
import de.ids_mannheim.korap.util.StringUtils;
+
/**
* Tests for JSON-LD serialization of Cosmas II queries.
*
@@ -65,6 +66,45 @@
assertEquals("Mann", res.at("/query/wrap/key").asText());
assertEquals("lemma", res.at("/query/wrap/layer").asText());
assertEquals("match:eq", res.at("/query/wrap/match").asText());
+
+ /* check Lemma with extended opts and with wildcard '+' inside options.
+ * 09.12.24/FB
+ */
+
+ query = "&COSFes-&Prüfung";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertEquals("koral:token", res.at("/query/@type").asText());
+ assertEquals("koral:term", res.at("/query/wrap/@type").asText());
+ assertEquals("COSFes-&Prüfung", res.at("/query/wrap/key").asText());
+ assertEquals("lemma", res.at("/query/wrap/layer").asText());
+
+ query = "&COSFes+&Prüfung";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertEquals("koral:token", res.at("/query/@type").asText());
+ assertEquals("koral:term", res.at("/query/wrap/@type").asText());
+ assertEquals("COSFes+&Prüfung", res.at("/query/wrap/key").asText());
+ assertEquals("lemma", res.at("/query/wrap/layer").asText());
+
+ /* syntax error: reject wildcards in lemma :
+ */
+ query = "&COS&Prüfung+";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertTrue(res.get("errors") != null);
+ assertEquals(res.get("errors").get(0).get(0).asInt(), StatusCodes.ERR_LEM_WILDCARDS);
+
+ query = "&Pr?fung*";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertTrue(res.get("errors") != null);
+ assertEquals(res.get("errors").get(0).get(0).asInt(), StatusCodes.ERR_LEM_WILDCARDS);
+
}
@@ -1855,7 +1895,7 @@
.asText()
.startsWith(
"Something went wrong parsing the argument in MORPH()"));
-
+ /*
query = "MORPH(tt/p=\"foo)";
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
@@ -1863,7 +1903,7 @@
assertEquals(StatusCodes.MALFORMED_QUERY, res.at("/errors/0/0").asInt());
assertTrue(res.at("/errors/0/1").asText()
.startsWith("Early closing parenthesis"));
-
+ */
query = "MORPH(tt/p=)";
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());