Issue 66: missing C2-operator #REG: resolved.
Change-Id: If2e74a8d1898b171a2e2c156b4be1b0f90bbd633
diff --git a/pom.xml b/pom.xml
index 6bdd557..03b9f9f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -178,6 +178,8 @@
<exclude>**/c2ps_opOV.java</exclude>
<exclude>**/c2ps_opPROX.java</exclude>
<exclude>**/c2ps_opWF.java</exclude>
+ <exclude>**/c2ps_opREG.java</exclude>
+ <exclude>**/c2ps_optCase.java</exclude>
<exclude>**/c2ps_optCase.java</exclude>
<exclude>**/.gitignore</exclude>
<exclude>fcsql/*</exclude>
diff --git a/src/main/antlr/cosmas/c2ps.g b/src/main/antlr/cosmas/c2ps.g
index c264ea6..568679a 100644
--- a/src/main/antlr/cosmas/c2ps.g
+++ b/src/main/antlr/cosmas/c2ps.g
@@ -1,16 +1,20 @@
- // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
-// //
-// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax) //
-// globale Grammatik (ruft lokale c2ps_x.g Grammatiken auf). //
-// 17.12.12/FB //
-// v-0.6 //
-// TODO: //
-// - se1: Einsetzen des Default-Operators in den kumulierten AST. //
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//
+// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax)
+// globale Grammatik (ruft lokale c2ps_x.g Grammatiken auf).
+// 17.12.12/FB
+// v-0.6
+// TODO:
+// - se1: Einsetzen des Default-Operators in den kumulierten AST.
+//
+// v0.7 - 25.07.23/FB
+// - added: #REG(x)
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
grammar c2ps;
options { output=AST; backtrack=true; k=5;}
+// tokens that will appear as node names in the resulting AST:
tokens {C2PQ; OPBED; OPTS; OPBEG; OPEND; OPNHIT; OPALL; OPLEM; OPPROX;
ARG1; ARG2;
OPWF; OPLEM; OPANNOT;
@@ -21,6 +25,7 @@
OPNOT;
OPEXPR1;
OPMORPH; OPELEM;
+ OPREG;
}
@header {package de.ids_mannheim.korap.query.parse.cosmas;}
@@ -76,6 +81,14 @@
OP_OV : '#OV' | '#OV(' OP_OV_OPTS? ')' ;
+// #REG(abc['"]) or #REG('abc\'s') or #REG("abc\"s"):
+
+OP_REG : '#REG(' ' '* '\'' ('\\\''|~'\'')+ '\'' (' ')* ')'
+ |
+ '#REG(' ' '* '"' ('\\"'|~'"')+ '"' (' ')* ')'
+ |
+ '#REG(' ' '* ~('\''|'"'|' ') (~(')'))* ')';
+
// EAVEXP wird hier eingesetzt für eine beliebige Sequenz von Zeichen bis zu ')'.
fragment OP_IN_OPTS
: EAVEXPR ;
@@ -241,7 +254,7 @@
// OP1: Suchoperatoren mit 1 Argument:
// -----------------------------------
-op1 : opBEG | opEND | opNHIT | opALL | opBED;
+op1 : opBEG | opEND | opNHIT | opALL | opBED | opREG;
// #BED(serchExpr, B).
// B muss nachträglich in einer lokalen Grammatik überprüft werden.
@@ -259,3 +272,6 @@
opNHIT : ( '#NHIT(' | '#INKLUSIVE(' ) searchExpr ')' -> ^(OPNHIT searchExpr) ;
opALL : ( '#ALL(' | '#EXKLUSIVE(' ) searchExpr ')' -> ^(OPALL searchExpr) ;
+
+opREG : OP_REG -> ^(OPREG {c2ps_opREG.encode($OP_REG.text, OPREG)}) ; //^(OPREG OP_REG);
+
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java
index fb9df4e..b95a0f3 100644
--- a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java
@@ -16,10 +16,11 @@
c2ps_opBEDParser g = new c2ps_opBEDParser(tokens);
c2ps_opBEDParser.opBEDOpts_return c2PQReturn = null;
- /*
- System.out.println("check opBED: " + index + ": " + input);
+ /**/
+ System.out.format("opBED: check: input='%s', index=%d.\n", input, index);
+ System.out.format("opBED: tokens ='%s'.\n", tokens.toString());
System.out.flush();
- */
+ /**/
try {
c2PQReturn = g.opBEDOpts();
@@ -30,6 +31,7 @@
// AST Tree anzeigen:
Tree tree = (Tree) c2PQReturn.getTree();
+ /**/
//System.out.println("#BED Opts: " + tree.toStringTree() );
return tree;
@@ -68,7 +70,7 @@
public static void main (String args[]) throws Exception {
- String[] input = { ",sa,se,-ta,-te/pa,-pe)", ",sa)", ",/pa,-pe)" };
+ String[] input = { ",sa,se,-ta,-te/pa,-pe)", ",sa)", ",/pa,-pe)"};
Tree tree;
for (int i = 0; i < input.length; i++) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opREG.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opREG.java
new file mode 100644
index 0000000..24cef38
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opREG.java
@@ -0,0 +1,324 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import org.antlr.runtime.*;
+import org.antlr.runtime.tree.*;
+
+import de.ids_mannheim.korap.query.serialize.util.Antlr3DescriptiveErrorListener;
+
+/*
+ * 1. transforms and encodes a regular COSMAS II like expression #REG(regexpr)
+ * into a AST tree -> encode().
+ * 2. transforms tree into the corresponding Koral:token/Koral:term, like:
+ * e.g. #REG(abc[']?s) ->
+ * {
+ * "@type": "koral:term",
+ * "match": "match:eq",
+ * "type" : "type:regex",
+ * "key" : "abc[']?s",
+ * "layer": "orth"
+ * }...
+ *
+ * - see doc: http://korap.github.io/Koral/
+ * - generation of koral:term -> processOPREG().
+ * 06.09.23/FB
+ */
+
+public class c2ps_opREG
+
+{
+
+ private static boolean DEBUG = false;
+
+ /* encode():
+ *
+ * input = e.g. "#REG('abc(d|e)*')" -> return AST = (OPREG "abc(d|e)*"):
+ * The regular expression is returned enclosed by "...".
+ * i.e. input #REG(abc) -> ^(OPREG "abc"),
+ * i.e. input #REG("abc") -> ^(OPREG "abc"),
+ * i.e. input #REG('abc') -> ^(OPREG "abc").
+ * - Escaping the >>"<<:
+ * i.e. input #REG(ab"c) -> ^(OPREG "ab\"c").
+ * i.e. input #REG(ab\"c) -> ^(OPREG "ab\"c").
+ * i.e. input #REG(ab\\"c) -> ^(OPREG "ab\\"c").
+ * i.e. input #REG("ab"c") : not possible.
+ * i.e. input #REG("ab\"c") -> ^(OPREG "ab\"c") : already escaped.
+ * i.e. input #REG("ab\\\"c")-> ^(OPREG "ab\\"c") : already escaped.
+ * i.e. input #REG('ab"c') -> ^(OPREG "ab\"c").
+ * i.e. input #REG('ab\"c') -> ^(OPREG "ab\"c") already escaped.
+ * i.e. input #REG('ab\\"c') -> ^(OPREG "ab\\"c"): 1 >>\<< + 1 >>\"<<.
+ *
+ * 06.09.23/FB
+ */
+ public static Tree encode (String input, int tokenType)
+
+ {
+ /*
+ if( DEBUG )
+
+ {
+ System.out.printf("opREG.encode: input='%s', token type=%d.\n", input, tokenType);
+ System.out.flush();
+ }
+ */
+ StringBuffer sb = null;
+
+ // #REG("a"), #REG(a), #REG('a') -> "a".
+ if( input.substring(0, 5).compareToIgnoreCase("#REG(") == 0 )
+ {
+ sb = new StringBuffer(input.substring(5));
+
+ if( sb.charAt(sb.length()-1) == ')' )
+ sb.deleteCharAt(sb.length()-1);
+
+ // #REG(a), #REG('a') -> #REG("a") both:
+ if( sb.charAt(0) == '\'' )
+ {
+ sb.setCharAt(0, '"');
+ sb.setCharAt(sb.length()-1, '"');
+ }
+ else if( sb.charAt(0) != '"')
+ {
+ sb.insert(0, '"');
+ sb.append('"');
+ }
+
+ // remove leading blanks: " abc " -> "abc ":
+ while( sb.length() > 2 && sb.charAt(1) == ' ')
+ sb.deleteCharAt(1);
+
+ // remove trailing blanks: "abc " -> "abc":
+ int len;
+ while( (len=sb.length()) > 3 && sb.charAt(len-2) == ' ' )
+ sb.deleteCharAt(len-2);
+
+ /* de-escape >>\'<< -> >>'<<,
+ * there is no need any more to keep >>'<< escaped.
+ * e.g. #REG('that\'s') -> "that\'s" -> >>that's<<.
+ * The >>"<< must still be kept escaped as "..." are still delimiters in the AST.
+ */
+ for(int i=1; i<sb.length()-1; i++)
+ {
+ if( sb.charAt(i) == '\'' && sb.charAt(i-1) == '\\' )
+ {
+ sb.deleteCharAt(i-1);
+ i--;
+ }
+ }
+
+ /* escaping >>"<<:
+ * " -> \"
+ * \" -> \"
+ * \\" -> \\\"
+ * \\\" -> \\\"
+ */
+ int i, n;
+ // skip leading and trailing >>"<<:
+
+ for(i=1, n=0; i<sb.length()-1; i++)
+ {
+ if( sb.charAt(i) == '\\' )
+ n++;
+ else if( n>0 )
+ { // end of >>\<< sequence:
+ //System.out.printf("encode: n=%d\n", n);
+ if( sb.charAt(i) == '"' )
+ {
+ // escape >>"<< if no. of >>\<< in the sequence % 2 == 0.
+ if( (n % 2) == 0 )
+ sb.insert(i, '\\');
+ }
+ n = 0;
+ }
+ else if( sb.charAt(i) == '"')
+ { // single >>"<< must be escaped:
+ sb.insert(i, '\\');
+ i++; // necessary to jump over '"' after insertion.
+ }
+ }
+
+ if( DEBUG ) System.out.printf("opREG.encode: encoded='%s'.\n", sb.toString());
+
+ return new CommonTree(new CommonToken(tokenType, sb.toString()));
+ }
+ else // error: '#REG(' and ')' not found: return input unchanged.
+ {
+ if( DEBUG ) System.out.printf("opREG.encode: nothing encoded.\n");
+ return new CommonTree(new CommonToken(tokenType, input));
+ }
+
+ } // encode
+
+ /*
+ * printTokens:
+ * Notes:
+ * - must build a separate CommonTokenStream here, because
+ * tokens.fill() will consume all tokens.
+ * - prints to stdout list of tokens from lexer.
+ * - mainly for debugging.
+ * 14.09.23/FB
+ *
+ */
+
+ private static void printTokens(String query, Antlr3DescriptiveErrorListener errorListener)
+
+ {
+ ANTLRStringStream
+ ss = new ANTLRStringStream(query);
+ c2psLexer
+ lex = new c2psLexer(ss);
+ org.antlr.runtime.CommonTokenStream
+ tokens = new org.antlr.runtime.CommonTokenStream(lex); // v3
+
+ lex.setErrorReporter(errorListener);
+
+ // get all tokens from lexer:
+ tokens.fill();
+
+ System.out.printf("opREG.check: no. of tokens = %d.\n", tokens.size());
+ for(int i=0; i<tokens.size(); i++)
+ System.out.printf("opREG.check: token[%2d] = %s.\n", i, tokens.get(i).getText());
+
+ } // printTokens
+
+ /* check:
+ * Notes:
+ * - must build a separate CommonTokenStream here, because
+ * tokens.fill() will consume all tokens.
+ */
+
+ public static Tree check (String query, int index)
+
+ {
+ ANTLRStringStream
+ ss = new ANTLRStringStream(query);
+ c2psLexer
+ lex = new c2psLexer(ss);
+ org.antlr.runtime.CommonTokenStream
+ tokens = new org.antlr.runtime.CommonTokenStream(lex); // v3
+ c2psParser
+ g = new c2psParser(tokens);
+ Tree
+ tree = null;
+ Antlr3DescriptiveErrorListener errorListener =
+ new Antlr3DescriptiveErrorListener(query);
+
+ // Use custom error reporters for lex for use in printTokens(lex), or programm will break
+ // by broken input, e.g. >>#REG(\" a"s\")<<.
+ lex.setErrorReporter(errorListener);
+ ((c2psParser) g).setErrorReporter(errorListener);
+
+ if( DEBUG )
+ {
+ //System.out.format("opREG.check: input='%s', index=%d.\n", query, index);
+ printTokens(query, errorListener);
+ System.out.flush();
+ }
+
+
+ try {
+ c2psParser.c2ps_query_return
+ c2Return = ((c2psParser) g).c2ps_query(); // statt t().
+
+ // AST Tree anzeigen:
+ tree = (Tree) c2Return.getTree();
+ //if (DEBUG)
+ // System.out.printf("opREG.check: tree = '%s'.\n", tree.toStringTree());
+ }
+ catch (RecognitionException e) {
+ System.err.printf("c2po_opREG.check: Recognition Exception!\n");
+ }
+
+ return tree;
+ } // check
+
+ /*
+ * replaceIfNotEscaped:
+ * - kind of adhoc alternative to String.replaceAll().
+ * - replaces every occurence of >>"<< in buf IF it is'nt escaped by >>\<<.
+ * Returns the replaced string.
+ * 25.09.23/FB
+ */
+
+ private static String replaceIfNotEscaped(String buf)
+
+ {
+ StringBuffer
+ sb = new StringBuffer(buf);
+
+ // Ersatz für replaceALL() für #REG:
+
+ for(int i=1; i<sb.length(); i++)
+ {
+ //System.out.printf("ssb.length=%d ssb=%s.\n", ssb.length(), ssb);
+ if( sb.codePointAt(i) == '"' && sb.codePointBefore(i) != '\\')
+ {
+ sb.deleteCharAt(i);
+ i--;
+ }
+ }
+
+ return sb.toString();
+
+ } // replaceIfNotEscaped
+
+ /**
+ * main
+ */
+
+ public static void main (String args[]) throws Exception
+
+ {
+ String input[] = { "#REG(abc)",
+ "#REG(def's )",
+ "#REG( def's)",
+ "#REG( def's )",
+ "#REG(abc[\"]ef)",
+ "#REG('abc)", // ' fehlt: generates Syntax Error .
+ "#REG('abc\')", // User input = #REG('abc\') : OK, nothing escaped.
+ "#REG('abc\\')", // User input = #REG('abc\') : OK, same behavior: \\ == \.
+ "#REG((a|b))", // broken input, should use ".." or '..'.
+ "#REG('(a|b)')", // OK.
+ "#REG(\"(a|b)\")", // OK.
+ "#REG(^[A-Z]+abc[\']*ung$)",
+ "#REG('ab(cd|ef)*')",
+ "#REG('abc(def|g)*[)(]')",
+ "#REG(\"abc(def|g)*[)(]\")",
+ "#REG('abc[\"]')", // User input = #REG('abc["]') : OK, needs escape => #REG("...\"...")
+ "#REG(\"abc[\"]\")", // User input = #REG("abc["]") : broken because of 2nd " -> syntax error.
+ "#REG(\"abc[\\\"]\")", // User input = #REG("abc[\"]"): OK, already escaped by user => #REG("...\"...")
+ "#REG(\"abc[\\\\\"]\")" // User input = #REG("abc[\\"]") : broken. with escaped " => #REG("...\"...")
+ };
+ Tree tree;
+
+ String
+ s = "#REG('\"Prefix\"a\\\"b')";
+ String
+ s2 = s.replaceAll("\"", "");
+ String
+ s3 = s.replaceAll("[^\\\\]\"", "");
+ String
+ s4 = replaceIfNotEscaped(s);
+ char[]
+ as = s.toCharArray();
+
+
+
+ System.out.printf("Test: s before replaceAll : >>%s<<.\n", s);
+ System.out.printf("Test: s2 after replaceAll : >>%s<<.\n", s2);
+ System.out.printf("Test: s3 after replaceAll : >>%s<<.\n", s3);
+ System.out.printf("Test: s3 after replaceAll : >>%s<<.\n", s3);
+ System.out.printf("Test: s4 after replaceIfNotEscaped: >>%s<<.\n", s4);
+
+ System.exit(124);
+
+ for (int i = 0; i < input.length; i++)
+ {
+ System.out.printf("c2ps_opREG: Parsing input %02d: >>%s<<: ...\n", i, input[i]);
+ tree = check(input[i], 0);
+ System.out.printf("c2ps_opREG: tree %02d: >>%s<<.\n\n", i, tree.toStringTree());
+ }
+
+
+ } // main
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
index 69a6293..1f8198a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
@@ -127,7 +127,79 @@
public static Pattern wildcardPlusPattern = Pattern.compile("([+])");
public static Pattern wildcardQuestionPattern = Pattern.compile("([?])");
-
+ /**
+ * replaceDoubleQuotes:
+ * - kind of adhoc enhanced replacement function for >>"<< for #REG(expr)
+ * instead of String.replaceAll().
+ * - replaces every occurence of >>"<< in buf that is not escaped by >>\<<.
+ * - If the >>"<< is escaped, the escape char is removed: >>\"<< -> >>"<<.
+ * Notes:
+ * - the converted string is intented to be greped.
+ * E.g.:
+ * - >>"\"Abend\"-Ticket"<< -> >>"Abend"-Ticket<<.
+ * Returns the replaced string.
+ * 26.09.23/FB
+ */
+
+ private static String replaceDoubleQuotes(String buf)
+
+ {
+ StringBuffer
+ sb = new StringBuffer(buf);
+
+ for(int i=0; i<sb.length(); i++)
+ {
+ //System.out.printf("ssb.length=%d ssb=%s.\n", ssb.length(), ssb);
+ if( sb.codePointAt(i) == '"' )
+ {
+ if( i == 0 || sb.codePointBefore(i) != '\\')
+ {
+ sb.deleteCharAt(i);
+ i--;
+ }
+ else if( sb.codePointAt(i-1) == '\\' )
+ {
+ sb.deleteCharAt(i-1);
+ i--;
+ }
+ }
+ }
+
+ return sb.toString();
+
+ } // replaceDoubleQuotes
+
+ /**
+ * replaceIfNotEscaped:
+ * - kind of adhoc alternative to String.replaceAll().
+ * - replaces every occurence of >>"<< in buf IF it isn't escaped by >>\<<.
+ * Notes:
+ * - first intention: replace String.replaceALL() in processOPREG() because
+ * replaceALL() cannot be used in that special case.
+ * Returns the replaced string.
+ * 25.09.23/FB
+ */
+
+ private static String replaceIfNotEscaped(String buf)
+
+ {
+ StringBuffer
+ sb = new StringBuffer(buf);
+
+ for(int i=0; i<sb.length(); i++)
+ {
+ //System.out.printf("ssb.length=%d ssb=%s.\n", ssb.length(), ssb);
+ if( sb.codePointAt(i) == '"' && (i==0 || sb.codePointBefore(i) != '\\') )
+ {
+ sb.deleteCharAt(i);
+ i--;
+ }
+ }
+
+ return sb.toString();
+
+ } // replaceIfNotEscaped
+
/**
* @param tree
* The syntax tree as returned by ANTLR
@@ -142,6 +214,7 @@
process(query);
if (DEBUG) {
log.debug(">>> " + requestMap.get("query") + " <<<");
+ System.out.printf("Cosmas2QueryProcessor: >>%s<<.\n", requestMap.get("query"));
}
}
@@ -151,14 +224,18 @@
Tree tree = null;
tree = parseCosmasQuery(query);
if (DEBUG) {
+ System.out.printf("\nProcessing COSMAS II query: %s.\n\n", query);
log.debug("Processing CosmasII query: " + query);
}
- if (tree != null) {
- if (DEBUG) {
- log.debug("ANTLR parse tree: " + tree.toStringTree());
- }
+ if (tree != null)
+ {
+ System.out.printf("\nANTLR parse tree: %s.\n\n", tree.toStringTree());
+
+ if (DEBUG)
+ log.debug("ANTLR parse tree: " + tree.toStringTree());
+
processNode(tree);
- }
+ }
}
@@ -278,6 +355,11 @@
if (nodeCat.equals("OPBED")) {
processOPBED(node);
}
+
+ if (nodeCat.equals("OPREG")) {
+ processOPREG(node);
+ }
+
objectsToPop.push(stackedObjects);
toWrapsToPop.push(stackedToWrap);
@@ -444,6 +526,83 @@
}
}
+ /* processOPREG:
+ *
+ * - input Node structure is: (OPREG "regexpr").
+ * - transforms tree into the corresponding Koral:token/Koral:term, like:
+ * e.g. #REG(abc[']?s) ->
+ * {
+ * "@type": "koral:term",
+ * "match": "match:eq", // optional
+ * "type" : "type:regex",
+ * "key" : "abc[']?s",
+ * "layer": "orth"
+ * }.
+ *
+ * - see doc: http://korap.github.io/Koral/
+ *
+ * 06.09.23/FB
+ */
+
+ private void processOPREG (Tree node)
+
+ {
+ int
+ nChild = node.getChildCount() - 1;
+ Tree
+ nodeChild = node.getChild(0);
+
+ if( DEBUG )
+ {
+ //System.out.printf("Debug: processOPREG: node='%s' nChilds=%d.\n", node.toStringTree(), nChild+1);
+ System.out.printf("Debug: processOPREG: child: '%s' cat=%s type=%d.\n",nodeChild.getText(), getNodeCat(node),
+ nodeChild.getType());
+ }
+
+ // empty case (is that possible?):
+ if( nChild < 0 )
+ return;
+
+ // see processOPWF_OPWF_OPLEM
+ // for how to insert regexpr into Koral JSON-LD
+
+ Map<String, Object>
+ token = KoralObjectGenerator.makeToken();
+
+ objectStack.push(token);
+ stackedObjects++;
+
+ Map<String, Object>
+ fieldMap = KoralObjectGenerator.makeTerm();
+
+ token.put("wrap", fieldMap);
+
+ // make category-specific fieldMap entry:
+ /*
+ System.out.printf("Debug: processOPREG: before replaceALL: >>%s<<.\n", nodeChild.toStringTree());
+ String
+ value = nodeChild.toStringTree().replaceAll("\"", "");
+ System.out.printf("Debug: processOPREG: after replaceALL: >>%s<<.\n", value);
+ */
+
+ /* replace replaceALL() by replaceIfNotEscaped() to delete every occurence of >>"<<
+ * which is not escaped by >>\<<, as it is important to keep the escaped sequence for
+ * the argument of #REG().
+ * This is not possible with replaceALL().
+ */
+ String
+ value = replaceDoubleQuotes(nodeChild.toStringTree());
+
+ fieldMap.put("key", value);
+ fieldMap.put("layer", "orth");
+ fieldMap.put("type", "type:regex");
+ fieldMap.put("match", "match:eq");
+
+ // decide where to put (objPos=1, not clear why, but it works only like that - 20.09.23/FB):
+ putIntoSuperObject(token,1);
+
+ } // processOPREG
+
private void processOPNHIT (Tree node) {
Integer[] classRef = new Integer[] { classCounter + 128 + 1,
@@ -1511,19 +1670,40 @@
@SuppressWarnings("unchecked")
- private void putIntoSuperObject (Map<String, Object> object,
- int objStackPosition) {
- if (objectStack.size() > objStackPosition) {
+ private void putIntoSuperObject (Map<String, Object> object, int objStackPosition)
+
+ {
+ if( DEBUG )
+ {
+ System.out.printf("Debug: putIntosuperObject(<>,int): objectStack.size=%d objStackPos=%d object=%s.\n",
+ objectStack.size(), objStackPosition, object == null ? "null" : "not null");
+
+ if( objectStack != null && objectStack.size() > 0 )
+ System.out.printf("Debug: putIntosuperObject: objectStack = %s.\n", objectStack.toString());
+
+ if( invertedOperandsLists != null )
+ System.out.printf("Debug: putIntosuperObject: invertedOperandsLists: [%s].\n", invertedOperandsLists.toString());
+ }
+
+
+ if (objectStack.size() > objStackPosition)
+ {
ArrayList<Object> topObjectOperands =
- (ArrayList<Object>) objectStack.get(objStackPosition)
- .get("operands");
- if (!invertedOperandsLists.contains(topObjectOperands)) {
+ (ArrayList<Object>) objectStack.get(objStackPosition).get("operands");
+
+ if( DEBUG )
+ System.out.printf("Debug: putIntosuperObject: topObjectOperands = [%s].\n", topObjectOperands == null ? "null" : "not null");
+
+ objectStack.get(objStackPosition);
+
+ if (!invertedOperandsLists.contains(topObjectOperands))
+ {
topObjectOperands.add(object);
- }
+ }
else {
topObjectOperands.add(0, object);
- }
- }
+ }
+ }
else {
requestMap.put("query", object);
}
@@ -1618,7 +1798,8 @@
private Tree parseCosmasQuery (String query) {
- query = rewritePositionQuery(query);
+
+ query = rewritePositionQuery(query);
Tree tree = null;
Antlr3DescriptiveErrorListener errorListener =
new Antlr3DescriptiveErrorListener(query);
@@ -1627,17 +1808,25 @@
c2psLexer lex = new c2psLexer(ss);
org.antlr.runtime.CommonTokenStream tokens =
new org.antlr.runtime.CommonTokenStream(lex); // v3
+
+ // System.out.printf("parseCosmasQuery: tokens = %d\n", tokens.size());
+ // System.out.printf("parseCosmasQuery: tokens = %s\n", tokens.toString());
+
parser = new c2psParser(tokens);
+
// Use custom error reporters
lex.setErrorReporter(errorListener);
((c2psParser) parser).setErrorReporter(errorListener);
+
c2psParser.c2ps_query_return c2Return =
((c2psParser) parser).c2ps_query(); // statt t().
+
// AST Tree anzeigen:
tree = (Tree) c2Return.getTree();
if (DEBUG) log.debug(tree.toStringTree());
- }
+ }
catch (RecognitionException e) {
+ System.err.printf("parseCosmasQuery: Recognition Exception!\n");
log.error(
"Could not parse query. Please make sure it is well-formed.");
addError(StatusCodes.MALFORMED_QUERY,
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
index 8294dca..5ed3814 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
@@ -55,8 +55,6 @@
new HashMap<String, Class<? extends AbstractQueryProcessor>>();
qlProcessorAssignment.put("poliqarpplus",
PoliqarpPlusQueryProcessor.class);
- qlProcessorAssignment.put("cqp",
- CQPQueryProcessor.class);
qlProcessorAssignment.put("cosmas2", Cosmas2QueryProcessor.class);
qlProcessorAssignment.put("annis", AnnisQueryProcessor.class);
qlProcessorAssignment.put("cql", CqlQueryProcessor.class);
@@ -74,6 +72,8 @@
private List<Object> warnings;
private List<Object> messages;
+ private boolean DEBUG = false;
+
public QuerySerializer () {
this.errors = new ArrayList<>();
this.warnings = new ArrayList<>();
@@ -136,15 +136,19 @@
* The query string
* @param queryLanguage
* The query language. As of 17 Dec 2014, this must be one of
- * 'poliqarpplus', 'cqp', 'cosmas2', 'annis' or 'cql'.
+ * 'poliqarpplus', 'cosmas2', 'annis' or 'cql'.
* @throws IOException
*/
public void run (String query, String queryLanguage) throws IOException {
+
+ ast.verbose = DEBUG ? true : false; // debugging: 01.09.23/FB
+
if (queryLanguage.equalsIgnoreCase("poliqarp")) {
ast = new PoliqarpPlusQueryProcessor(query);
}
else if (queryLanguage.equalsIgnoreCase("cosmas2")) {
ast = new Cosmas2QueryProcessor(query);
+ //System.out.printf("\ncosmas2 AST='%s'.\n\n", ast.query);
}
else if (queryLanguage.equalsIgnoreCase("poliqarpplus")) {
ast = new PoliqarpPlusQueryProcessor(query);
@@ -152,9 +156,6 @@
else if (queryLanguage.equalsIgnoreCase("cql")) {
ast = new CqlQueryProcessor(query);
}
- else if (queryLanguage.equalsIgnoreCase("cqp")) {
- ast = new CQPQueryProcessor(query);
- }
else if (queryLanguage.equalsIgnoreCase("fcsql")) {
ast = new FCSQLQueryProcessor(query);
}
@@ -165,7 +166,8 @@
throw new IllegalArgumentException(
queryLanguage + " is not a supported query language!");
}
- System.out.println(this.toJSON());
+
+ /*if( DEBUG )*/ System.out.println(this.toJSON());
}
public QuerySerializer setQuery (String query, String ql, String version) {
@@ -186,9 +188,6 @@
else if (ql.equalsIgnoreCase("poliqarpplus")) {
ast = new PoliqarpPlusQueryProcessor(query);
}
- else if (ql.equalsIgnoreCase("cqp")) {
- ast = new CQPQueryProcessor(query);
- }
else if (ql.equalsIgnoreCase("cql")) {
if (version == null) {
ast = new CqlQueryProcessor(query);
diff --git a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
index 0722c9b..8c98f62 100644
--- a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
@@ -19,7 +19,8 @@
*
* @author Joachim Bingel (bingel@ids-mannheim.de)
* @author Nils Diewald
- * @version 1.1
+ * @author Franck Bodmer
+ * @version 1.2 - 21.09.23
*/
public class Cosmas2QueryProcessorTest {
@@ -1702,4 +1703,135 @@
assertEquals("s", res.at("/query/distances/0/key").asText());
assertEquals("operation:sequence", res.at("/query/operation").asText());
}
+
+ /* Testing #REG(expr), #REG('expr') and #REG("expr").
+ * 21.09.23/FB
+ */
+
+ @Test
+ public void testREG () throws JsonProcessingException, IOException {
+
+ boolean debug = true;
+
+ query = "#REG(^aber$)";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("koral:token", res.at("/query/@type").asText());
+ assertEquals("koral:term", res.at("/query/wrap/@type").asText());
+ assertEquals("^aber$", res.at("/query/wrap/key").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("match:eq", res.at("/query/wrap/match").asText());
+
+ query = "#REG(l'été)";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("l'été", res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ query = "#REG(l\\'été)";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("l'été", res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ query = "#REG(\"l'été\")";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("l'été", res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ query = "#REG(\"l\\'été\")";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("l'été", res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ query = "#REG('l\\'été.*')";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("l'été.*", res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ query = "#REG('\\\"été\\\"$')"; // means user input is #REG('\"été\"').
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("\"été\"$", res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ // checks the >>"<<:
+ query = "#REG(\\\"Abend\\\"-Ticket)"; // means user input = #REG(\"Abend\"-Ticket).
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ query = "#REG('\\\"Abend\\\"-Ticket')"; // means user input = #REG(\"Abend\"-Ticket).
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ query = "#REG('\"Abend\"-Ticket')"; // means user input = #REG('"Abend"-Ticket').
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText()); // key must be escaped, because converted to in "...".
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ query = "#REG(\"\\\"Abend\\\"-Ticket\")"; // means user input = #REG("\"Abend\"-Ticket") -> key: >>"Abend"-Ticket<<.
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText());
+ assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+ //
+
+ query = "#REG('^(a|b)?+*$')";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertEquals("^(a|b)?+*$", res.at("/query/wrap/key").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+
+ query = "#REG(\"[A-Z()]\")";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+
+ assertEquals("[A-Z()]", res.at("/query/wrap/key").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+
+ }
}