opProx feature (Cosmas 2)
rebase on origine/master.
Review Comment #181: resolved.
Squashed commit consisting of
- verbosity can be switched on/off on command line.
- Prox: parsing %-w1 and %+w1 correctly.
- opPROX: correcting order of Prox options: WIP.
- beliebige Reihenfolge der Abstands-Optionen: WIP.
- Prox: beliebige Reihenfolge der Optionen: OK.
- Prox: beliebige Reihenfolge der Optionen: OK.
- opPROX: grammar should accept any order of prox. options: WIP.
- PROX: return exact error messasge about prox options.
- PROX: emit a meaningfull error message: wip.
- PROX: emit a meaningfull error message: WIP.
- write parsing error to AST.
- trying to write error message into an error node of the AST.
- PROX: Fehlermeldung in KoralQuery schreiben funktioniert.
- Prox...
- Error detection inside Prox done. Returning a precise error message through JSON: done.
- using addError() for error messages in PROX: WIP.
- Prox: reporting exact error messages: works.
- PROX: Tests with RecognitionExceptions removed. All Error Codes in StatusCodes.java.
- Prox: error messages for wrong prox. options.
- Prox: debug output deactivated.
- Prox: deleted debug output.
- Prox: Test added: WiP.
- Prox: 1 working tests added.
- Prox: 3 more tests added.
changes for Review on opPROX.
changes for the review.
changes for the review.
opPROX: PROX_REMAIN defined as a complementary class.
add. test for %.
Change-Id: I8802becaf840660a1512281b3477762a422f8b4f
Reviewed-on: https://korap.ids-mannheim.de/gerrit/c/KorAP/Koral/+/8015
Reviewed-by: Nils Diewald <nils@diewald-online.de>
diff --git a/Changes b/Changes
index c770400..d1ccfc2 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.43 2024-06-11
+ - [feature] Support opProx in C2 (bodmer)
+
0.42 2024-01-11
- [feature] Support #REG in C2 (bodmer)
- [bugfix] Fix comma in #BED in C2 (bodmer)
diff --git a/pom.xml b/pom.xml
index b409fe5..049b395 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
<groupId>de.ids-mannheim.korap.koral</groupId>
<artifactId>Koral</artifactId>
- <version>0.42.0</version>
+ <version>0.43.0</version>
<packaging>jar</packaging>
<name>Koral</name>
<url>https://korap.ids-mannheim.de</url>
diff --git a/src/main/antlr/cosmas/c2ps_opPROX.g b/src/main/antlr/cosmas/c2ps_opPROX.g
index 1569d1a..58e00f4 100644
--- a/src/main/antlr/cosmas/c2ps_opPROX.g
+++ b/src/main/antlr/cosmas/c2ps_opPROX.g
@@ -38,8 +38,8 @@
// e.g. /w5umin -> remain = 'umin'.
PROX_REMAIN
- : (',')? ('b'..'h'|'j'..'l'|'n'|'o'|'q'|'r'|'u'|'v'|'y'|'z'|'B'..'H'|'J'..'L'|'N'|'O'|'Q'|'R'|'U'|'V'|'Y'|'Z') (~ ' ')* ;
-
+ : ~(','|'a'|'i'|'m'|'n'|'p'|'s'|'t'|'w'|'x'|'A'|'I'|'M'|'N'|'P'|'S'|'T'|'W'|'X'|'0'..'9'|'+'|'-'|':'|'/'|'%') (~ ' ')* ;
+
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
//
// PROX-Parser
@@ -102,4 +102,4 @@
| ('max'|'MAX') -> ^(GRP MAX);
-
\ No newline at end of file
+
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
index 6229719..737c7ad 100644
--- a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java
@@ -15,12 +15,121 @@
public class c2ps_opPROX
{
- final static boolean bDebug = false;
+ final static boolean
+ bDebug = false;
+
+ public static final int MLANG_ENGLISH = 0;
+ public static final int MLANG_GERMAN = 1;
+
+ public static int
+ messLang = MLANG_ENGLISH; // default.
// type of an Error CommonToken:
- final static int typeERROR = 1;
+ final static int
+ typeERROR = 1;
+
// Prox error codes defined in StatusCodes.java.
+ private static String getErrMessEN(int errCode, String text)
+
+ {
+ switch( errCode )
+ {
+ case StatusCodes.ERR_PROX_MEAS_NULL:
+ return String.format("Proximity operator at '%s': one of the following prox. types is missing: w,s,p!", text);
+
+ case StatusCodes.ERR_PROX_MEAS_TOOGREAT:
+ return String.format("Proximity operator at '%s': Please, specify only 1 of the following prox. types: w,s,p! " +
+ "It is possible to specify several at once by separating them with a ','. E.g.: ' /+w2,s2,p0 '.", text);
+
+ case StatusCodes.ERR_PROX_VAL_NULL:
+ return String.format("Proximity operator at '%s': please specify a numerical value for the distance. E.g. ' /+w5 '.", text);
+
+ case StatusCodes.ERR_PROX_VAL_TOOGREAT:
+ return String.format("Proximity operator at '%s': please specify only 1 distance value. E.g. ' /+w5 '.", text);
+
+ case StatusCodes.ERR_PROX_DIR_TOOGREAT:
+ return String.format("Proximity operator at '%s': please specify either '+' or '-' or none of them for the direction.", text);
+
+ case StatusCodes.ERR_PROX_WRONG_CHARS:
+ return String.format("Proximity operator at '%s': unknown proximity options!", text);
+
+ case StatusCodes.UNKNOWN_QUERY_ERROR:
+ return String.format("Unknown error!");
+
+ default:
+ return String.format("Proximity operator at '%s': unknown error. The correct syntax looks like this: E.g. ' /+w2 ' or ' /w10,s0 '.", text);
+ }
+ }
+
+ private static String getErrMessGE(int errCode, String text)
+
+ {
+ switch( errCode )
+ {
+ case StatusCodes.ERR_PROX_MEAS_NULL:
+ return String.format("Abstandsoperator an der Stelle '%s': es fehlt eine der folgenden Angaben: w,s,p!", text);
+
+ case StatusCodes.ERR_PROX_MEAS_TOOGREAT:
+ return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 der folgenden Angaben einsetzen: w,s,p! " +
+ "Falls Mehrfachangabe erwünscht, müssen diese durch Kommata getrennt werden (z.B.: ' /+w2,s2,p0 ').", text);
+
+ case StatusCodes.ERR_PROX_VAL_NULL:
+ return String.format("Abstandsoperator an der Stelle '%s': Bitte einen numerischen Wert einsetzen (z.B. ' /+w5 ')! ", text);
+
+ case StatusCodes.ERR_PROX_VAL_TOOGREAT:
+ return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 numerischen Wert einsetzen (z.B. ' /+w5 ')! ", text);
+
+ case StatusCodes.ERR_PROX_DIR_TOOGREAT:
+ return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 Angabe '+' oder '-' oder keine! ", text);
+
+ case StatusCodes.ERR_PROX_WRONG_CHARS:
+ return String.format("Abstandsoperator an der Stelle '%s': unbekannte Abstandsoption(en)!", text);
+
+ case StatusCodes.UNKNOWN_QUERY_ERROR:
+ return String.format("Unbekannter Fehler!");
+
+ default:
+ return String.format("Abstandsoperator an der Stelle '%s': unbekannter Fehler. Korrekte Syntax z.B.: ' /+w2 ' oder ' /w10,s0 '.", text);
+ }
+ }
+
+ private static String getErrMess(int errCode, int messLang, String text)
+
+ {
+ if( messLang == c2ps_opPROX.MLANG_GERMAN )
+ return getErrMessGE(errCode, text);
+ else
+ return getErrMessEN(errCode, text);
+ }
+
+
+ /**
+ * in this version, the pre-stored message language is used.
+ * @param errCode
+ * @param text
+ * @return
+ * 10.06.24/FB
+ */
+
+ public static String getErrMess(int errCode, String text)
+
+ {
+ if( messLang == c2ps_opPROX.MLANG_GERMAN )
+ return getErrMessGE(errCode, text);
+ else
+ return getErrMessEN(errCode, text);
+ }
+
+ /**
+ * buildErrorTree():
+ * @param text = part of the query that contains an error.
+ * @param errCode
+ * @param typeDIST
+ * @param pos
+ * @return
+ */
+
private static CommonTree buildErrorTree(String text, int errCode, int typeDIST, int pos)
{
@@ -37,38 +146,8 @@
String
mess;
- switch( errCode )
- {
- case StatusCodes.ERR_PROX_MEAS_NULL:
- mess = String.format("Abstandsoperator an der Stelle '%s' es fehlt eine der folgenden Angaben: w,s,p!", text);
- errorMes = new CommonTree(new CommonToken(typeERROR, mess));
- break;
- case StatusCodes.ERR_PROX_MEAS_TOOGREAT:
- mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 der folgenden Angaben einsetzen: w,s,p! " +
- "Falls Mehrfachangabe erwünscht, müssen diese durch Kommata getrennt werden (z.B.: /+w2,s0).", text);
- errorMes = new CommonTree(new CommonToken(typeERROR, mess));
- break;
- case StatusCodes.ERR_PROX_VAL_NULL:
- mess = String.format("Abstandsoperator an der Stelle '%s': Bitte einen numerischen Wert einsetzen (z.B. /+w5)! ", text);
- errorMes = new CommonTree(new CommonToken(typeERROR, mess));
- break;
- case StatusCodes.ERR_PROX_VAL_TOOGREAT:
- mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 numerischen Wert einsetzen (z.B. /+w5)! ", text);
- errorMes = new CommonTree(new CommonToken(typeERROR, mess));
- break;
- case StatusCodes.ERR_PROX_DIR_TOOGREAT:
- mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 Angabe '+' oder '-' oder keine! ", text);
- errorMes = new CommonTree(new CommonToken(typeERROR, mess));
- break;
- case StatusCodes.ERR_PROX_WRONG_CHARS:
- mess = String.format("Abstandsoperator an der Stelle '%s': unbekannte Abstandsoption(en)!", text);
- errorMes = new CommonTree(new CommonToken(typeERROR, mess));
- break;
- default:
- mess = String.format("Abstandsoperator an der Stelle '%s': unbekannter Fehler. Korrekte Syntax z.B.: /+w2 oder /w10,s0.", text);
-
- errorMes = new CommonTree(new CommonToken(typeERROR, mess));
- }
+ mess = getErrMess(errCode, messLang, text);
+ errorMes = new CommonTree(new CommonToken(typeERROR, mess));
errorTree.addChild(errorNode);
errorNode.addChild(errorPos);
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
index a6c7bd9..16af2a3 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
@@ -9,6 +9,7 @@
import de.ids_mannheim.korap.query.object.KoralOperation;
import de.ids_mannheim.korap.query.object.KoralTermGroupRelation;
import de.ids_mannheim.korap.query.object.KoralType;
+import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROX;
import de.ids_mannheim.korap.query.parse.cosmas.c2psLexer;
import de.ids_mannheim.korap.query.parse.cosmas.c2psParser;
import de.ids_mannheim.korap.query.serialize.util.Antlr3DescriptiveErrorListener;
@@ -150,7 +151,7 @@
private boolean reportErrorsinTree(Tree node)
{
- final String func = "reportErrorsinTree";
+ // not used when not debugging: final String func = "reportErrorsinTree";
//System.err.printf("Debug: %s: '%s' has %d children.\n",
// func, node.getText(), node.getChildCount());
@@ -178,8 +179,8 @@
int
errCode = node.getChild(1) != null ? Integer.parseInt(node.getChild(1).getText()) : StatusCodes.ERR_PROX_UNKNOWN;
String
- errMess = node.getChild(2) != null ? node.getChild(2).getText() : "Genaue Fehlermeldung nicht auffindbar.";
-
+ errMess = node.getChild(2) != null ? node.getChild(2).getText() : c2ps_opPROX.getErrMess(StatusCodes.UNKNOWN_QUERY_ERROR, "");
+
ArrayList<Object>
errorSpecs = new ArrayList<Object>();
@@ -1844,9 +1845,6 @@
org.antlr.runtime.CommonTokenStream tokens =
new org.antlr.runtime.CommonTokenStream(lex); // v3
- // System.out.printf("parseCosmasQuery: tokens = %d\n", tokens.size());
- // System.out.printf("parseCosmasQuery: tokens = %s\n", tokens.toString());
-
parser = new c2psParser(tokens);
// Use custom error reporters
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
index deaa58e..a51bae1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
@@ -18,8 +18,6 @@
import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
-import com.fasterxml.jackson.core.JsonPointer;
-
/**
* Main class for Koral, serializes queries from concrete QLs to KoralQuery
*
@@ -116,11 +114,9 @@
queries = new String[] { args[0] };
ql = args[1];
}
- if( args.length >= 3 )
- {
- if( args[2].compareToIgnoreCase("-show") == 0 )
- bDebug = true;
- }
+
+ if( args.length >= 3 && args[2].compareToIgnoreCase("-show") == 0 )
+ bDebug = true;
for (String q : queries) {
i++;
@@ -153,7 +149,7 @@
* 'poliqarpplus', 'cqp', 'cosmas2', 'annis' or 'cql'.
* @throws IOException
*/
-
+
public void run (String query, String queryLanguage, boolean bDebug) throws IOException {
ast.verbose = bDebug; // debugging: 01.09.23/FB
diff --git a/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java b/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java
index 92ba9ef..358dd0c 100644
--- a/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java
+++ b/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java
@@ -9,7 +9,6 @@
public final class C2RecognitionException extends RecognitionException {
- private static final boolean DEBUG = false;
public String mismatchedToken;
public C2RecognitionException(String mismatchedToken)
diff --git a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
index bb4319c..b366594 100644
--- a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
@@ -513,6 +513,7 @@
@Test
public void testOPPROX () throws JsonProcessingException, IOException {
+
query = "Sonne /+w1:4 Mond";
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
@@ -630,31 +631,95 @@
.asText());
assertFalse(res.at("/query/inOrder").asBoolean());
- // 15.01.24/FB: checking syntax error detectiong:
+ // -- check exclude operator -- //
+
+ query = "Sonne %-w1:2 Sterne";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ /*
+ System.out.printf("Query '%s': returns: '%s'.\n", query, res.toPrettyString()) ;
+ System.out.printf("[0]: '%s'.\n", res.at("/query/distances").get(0).get("boundary").toPrettyString());
+ System.out.printf("@type: '%s'.\n", res.at("/query/distances").get(0).get("@type").asText());
+ System.out.printf("exclude: '%s'.\n", res.at("/query/distances").get(0).get("exclude").asText());
+ System.out.printf("key: '%s'.\n", res.at("/query/distances").get(0).get("key").asText());
+ */
+
+ assertEquals("cosmas:distance", res.at("/query/distances").get(0).get("@type").asText());
+ assertTrue( res.at("/query/distances").get(0).get("exclude").asBoolean());
+ assertEquals("w", res.at("/query/distances").get(0).get("key").asText());
+
+ // 15.01.24/FB: checking syntax error:
query = "Sonne /+w Mond"; // distance value missing.
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
assertTrue(res.get("errors") != null);
+ //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ;
+ assertEquals(StatusCodes.ERR_PROX_VAL_NULL, res.get("errors").get(0).get(0).asInt());
query = "Sonne /+2sw Mond"; // 2 distance types instead of 1.
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
-
+
assertTrue(res.get("errors") != null);
+ //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ;
+ assertEquals(StatusCodes.ERR_PROX_MEAS_TOOGREAT, res.get("errors").get(0).get(0).asInt());
query = "Sonne /+2s- Mond"; // 2 distance directions instead of 1.
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
assertTrue(res.get("errors") != null);
-
+ //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ;
+ assertEquals(StatusCodes.ERR_PROX_DIR_TOOGREAT, res.get("errors").get(0).get(0).asInt());
+
query = "Sonne /+2s7 Mond"; // 2 distance values instead of 1.
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
assertTrue(res.get("errors") != null);
+ //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ;
+ assertEquals(StatusCodes.ERR_PROX_VAL_TOOGREAT, res.get("errors").get(0).get(0).asInt());
+
+ // tests for error messages for unknown proximity options:
+ // 29.05.24/FB
+
+ query = "ab /+w1:2u,p cd";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertTrue("Error code expected!",!res.get("errors").isNull());
+ assertEquals(StatusCodes.ERR_PROX_WRONG_CHARS, res.get("errors").get(0).get(0).asInt());
+
+ query = "ab %-w1:2,2su cd";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertTrue("Error code expected!",!res.get("errors").isNull());
+ assertEquals(StatusCodes.ERR_PROX_WRONG_CHARS, res.get("errors").get(0).get(0).asInt());
+
+ query = "ab /w1:2s cd";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ //System.out.printf("Query '%s': context: '%s'.\n", query, res.get("@context").toPrettyString()) ;
+ //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ;
+ //System.out.printf("Query '%s': errorCode: '%s'.\n", query, res.get("errors").get(0).get(0).toPrettyString()) ;
+ //System.out.printf("Query '%s': errorText : '%s'.\n", query, res.get("errors").get(0).get(1).toPrettyString()) ;
+ //System.out.printf("Query '%s': errorPos : '%s'.\n", query, res.get("errors").get(0).get(2).toPrettyString()) ;
+
+ assertTrue("Error code expected!", res.get("errors") != null);
+ assertEquals(StatusCodes.ERR_PROX_MEAS_TOOGREAT, res.get("errors").get(0).get(0).asInt());
+
+ query = "Sonne %-w1:2,+2su Galaxien";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertTrue("Error code expected!", res.get("errors") != null);
+ assertEquals(StatusCodes.ERR_PROX_WRONG_CHARS, res.get("errors").get(0).get(0).asInt());
+
}