solved issues with any string without quoutes being parsed as span; see test Regex_without_quoutes in CqpGrammarTest
Change-Id: I704c47704e8a744b427b0082dccb8df0aa1d2c74
Reviewed-on: https://korap.ids-mannheim.de/gerrit/c/KorAP/Koral/+/7384
Reviewed-by: Nils Diewald <nils@diewald-online.de>
diff --git a/src/main/antlr/cqp/CQP.g4 b/src/main/antlr/cqp/CQP.g4
index 29475c5..1507abb 100644
--- a/src/main/antlr/cqp/CQP.g4
+++ b/src/main/antlr/cqp/CQP.g4
@@ -84,9 +84,9 @@
/* Regular expressions and Regex queries */
fragment RE_symbol : ~('*' | '?' | '+' | '{' | '}' | '[' | ']'
| '(' | ')' | '|' | '\\' | '"' | ':' | '\'');
-fragment RE_esc : ('\\' ('.' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
- | '(' | ')' | '|' | '\\' | ':' | '"' | '\''))| '\'' '\'' | '"' '"';
-fragment RE_char : (RE_symbol | RE_esc );
+fragment RE_esc : '\\' ('.' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
+ | '(' | ')' | '|' | '\\' | ':' | '"' | '\'')| '\'' '\'' | '"' '"';
+fragment RE_char : RE_symbol | RE_esc ;
fragment RE_alter : ((RE_char | ('(' RE_expr ')') | RE_chgroup) '|' RE_expr )+;
fragment RE_chgroup : '[' RE_char+ ']';
@@ -94,8 +94,8 @@
fragment RE_group : '(' RE_expr ')';
fragment RE_expr : ('.' | RE_char | RE_alter | RE_chgroup | RE_quant | RE_group)+;
/* you can search for DQUOTE inside SQUOUTE, and viceversa: '"' or "'"; */
-fragment RE_dquote : DQUOTE (RE_expr | '\'' | ':' )* DQUOTE; // DQOUTE is not good, modify like verbatim in PQ+!
-fragment RE_squote : SQUOTE (RE_expr | '"' | ':')* SQUOTE;
+fragment RE_dquote : DQUOTE (RE_expr | '\'' | ':' )+ DQUOTE; // empty regex are no longer valid
+fragment RE_squote : SQUOTE (RE_expr | '"' | ':')+ SQUOTE;
@@ -219,10 +219,9 @@
)
;
-
-span
-: skey // for lbound/sbound; check how it works for meet!
- | LT ((foundry SLASH)? layer termOp)? skey (( NEG* (LRPAREN term RRPAREN| LRPAREN termGroup RRPAREN | NEG* term | NEG* termGroup))? GT)
+spankey: skey; // simple span to be used only with operators (region, lbound,rbound, within, meet)
+span:
+ LT ((foundry SLASH)? layer termOp)? skey (( NEG* (LRPAREN term RRPAREN| LRPAREN termGroup RRPAREN | NEG* term | NEG* termGroup))? GT)
;
closingspan
@@ -232,7 +231,7 @@
position
//: POSITION_OP LRPAREN (segment|sequence) COMMA (segment|sequence) RRPAREN
-: POSITION_OP LRPAREN span RRPAREN
+: POSITION_OP LRPAREN (span|spankey) RRPAREN
;
@@ -288,7 +287,7 @@
startswith: span (sequence|segment);
endswith: (sequence|segment) closingspan;
-region: SLASH REGION_OP LPAREN span RPAREN;
+region: SLASH REGION_OP LPAREN (span|spankey) RPAREN;
@@ -324,7 +323,7 @@
;
within
-: WITHIN span //WORD
+: WITHIN (span|spankey) //WORD
;
/**
@@ -363,7 +362,7 @@
meetunion
:
-(((LRPAREN meetunion RRPAREN) | segment) ((LRPAREN meetunion RRPAREN) | segment) ((NUMBER NUMBER) | span))
+(((LRPAREN meetunion RRPAREN) | segment) ((LRPAREN meetunion RRPAREN) | segment) ((NUMBER NUMBER) | span | spankey))
;
/**
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CQPQueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CQPQueryProcessor.java
index 69c6f25..66acf08 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CQPQueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CQPQueryProcessor.java
@@ -180,12 +180,12 @@
processAlignment(node);
}
- else if ((nodeCat.equals("span") || nodeCat.equals("closingspan")) && getNodeCat(node.getChild(0))!="skey") {
- String nCat0 = getNodeCat(node.getChild(0));
- if (nCat0.equals("skey")) {
- // for unlayered spans: \region[np], lbound(np), etc!
+ else if (nodeCat.equals("spankey")) {
processSpan(node);
- } else {
+ }
+
+ else if ((nodeCat.equals("span") || nodeCat.equals("closingspan")) && getNodeCat(node.getChild(0))!="skey") {
+
// for struct like <s> ... </s>; we don't want to serialize span s two times;
String spankey ="";
int ccount = node.getChildCount();
@@ -200,7 +200,7 @@
spanNodeCats.push(spankey);
processSpan(node);
}
- }
+ // }
}
else if (nodeCat.equals("disjunction")) {
@@ -850,67 +850,62 @@
List<ParseTree> negations = getChildrenWithCat(node, "!");
int termOrTermGroupChildId = 1;
boolean negated = false;
+ ArrayList<String> flags = new ArrayList<String>();
if (negations.size() % 2 == 1) {
negated = true;
termOrTermGroupChildId += negations.size();
}
-
+
if (getNodeCat(node.getChild(0)).equals("key")) {
// no 'term' child, but direct key specification: process here
+ String type ="type:regex"; // type depends on flag;
Map<String, Object> term = KoralObjectGenerator
.makeTerm();
-
- String key = node.getChild(0).getText();
-
- if (getNodeCat(node.getChild(0).getChild(0)).equals("regex")) {
- // isRegex = true;
- term.put("type", "type:regex");
-
- // fixme: use stream with offset to get text!
- // TokenStream stream = parser.getTokenStream();
- // key = stream.getText(node.getChild(0).getSourceInterval());
+ TokenStream stream = parser.getTokenStream();
+ String key = stream.getText(node.getChild(0).getSourceInterval());
+ ParseTree flagNode = getFirstChildWithCat(node, "flag");
+ String flag ="";
+ if (flagNode != null) {
+ // substring removes leading slash '/'
+ flag = getNodeCat(flagNode.getChild(0)).substring(1);
+ if (flag.contains("c") || flag.contains("C")){
+ flags.add("flags:caseInsensitive");
+ }
+ if (flag.contains("d") || flag.contains("D")){
+ flags.add("flags:diacriticsInsensitive");
+ }
+ if (flag.contains("l")|| flag.contains("L")) {
+ // overwrite type
+ type = "type:string";
+ }
+
+ }
String first = key.substring(0, 1);
String last = key.substring(key.length()-1, key.length());
- key = key.substring(1, key.length() - 1);
- // treat the doubleqoutes and singlequoutes inside regex!
+ key = key.substring(1, key.length()-1).replaceAll("\\\\\\\\","\\\\"); //.replaceAll("\\\\'", "'");
+ // treat escaping doubleqoutes and singlequoutes inside regex!
if (first.equals("\"") && last.equals("\"")) {
- key = key.replaceAll("\"\"", "\"");
+ //remove escape by doubling ''
+ key = key.replaceAll("\"\"", "\"");
+ //remove the "\" escape for "
+ key = key.replaceAll("\\\\\"", "\"");
}
if (first.equals("'") && last.equals("'")) {
- key = key.replaceAll("''", "'");
+ //remove escape by doubling ''
+ key = key.replaceAll("''", "'");
+ //remove the "\" escape for '
+ key = key.replace("\\\'", "'");
}
- }
+
+ term.put("type", type);
term.put("layer", "orth");
term.put("key", key);
KoralMatchOperator matches = negated ? KoralMatchOperator.NOT_EQUALS
: KoralMatchOperator.EQUALS;
term.put("match", matches.toString());
- ParseTree flagNode = getFirstChildWithCat(node, "flag");
- if (flagNode != null) {
- ArrayList<String> flags = new ArrayList<String>();
- // substring removes leading slash '/'
- String flag = getNodeCat(flagNode.getChild(0)).substring(1);
- if (flag.contains("c") || flag.contains("C"))
- flags.add("flags:caseInsensitive");
- if (flag.contains("d") || flag.contains("D"))
- flags.add("flags:diacriticsInsensitive");
-
- if (flag.contains("l")|| flag.contains("L")) {
- ParseTree keyNode = node.getChild(0);
-
- // Get stream from hidden channel
- TokenStream stream = parser.getTokenStream();
- key = stream.getText(keyNode.getChild(0).getSourceInterval());
- key = key.substring(1, key.length()-1).replaceAll("\\\\\\\\","\\\\").replaceAll("\\\\'", "'");
- //override key and type:string
- term.put("key", key);
- term.put("type", "type:string");
- }
-
- if (!flags.isEmpty())
+ if (!flags.isEmpty())
term.put("flags", flags);
- }
token.put("wrap", term);
}
@@ -930,80 +925,7 @@
visited.addAll(getChildren(node));
}
- /*
- !! not used!
- private void processTokenStruct (ParseTree node) {
- // differs from processToken because it doesn't require/have [] around the token
- Map<String, Object> token = KoralObjectGenerator.makeToken();
- // handle negation
- List<ParseTree> negations = getChildrenWithCat(node, "!");
- int termOrTermGroupChildId = 0;
- boolean negated = false;
- // boolean isRegex = false;
- if (negations.size() % 2 == 1) {
- negated = true;
- termOrTermGroupChildId += negations.size();
- }
-
- if (getNodeCat(node.getChild(0)).equals("key")) {
- // no 'term' child, but direct key specification: process here
- Map<String, Object> term = KoralObjectGenerator
- .makeTerm();
-
- String key = node.getChild(0).getText();
-
- if (getNodeCat(node.getChild(0).getChild(0)).equals("regex")) {
- // isRegex = true;
- term.put("type", "type:regex");
-
- // fixme: use stream with offset to get text!
- // TokenStream stream = parser.getTokenStream();
- // key = stream.getText(node.getChild(0).getSourceInterval());
- key = key.substring(1, key.length() - 1);
- }
- term.put("layer", "orth");
- term.put("key", key);
- KoralMatchOperator matches = negated ? KoralMatchOperator.NOT_EQUALS
- : KoralMatchOperator.EQUALS;
- term.put("match", matches.toString());
- ParseTree flagNode = getFirstChildWithCat(node, "flag");
- if (flagNode != null) {
- ArrayList<String> flags = new ArrayList<String>();
- // substring removes leading slash '/'
- String flag = getNodeCat(flagNode.getChild(0)).substring(1);
- if (flag.contains("c") || flag.contains("C"))
- flags.add("flags:caseInsensitive");
- if (flag.contains("d") || flag.contains("D"))
- flags.add("flags:diacriticsInsensitive");
- if (flag.contains("l")|| flag.contains("L"))
- {
- ParseTree keyNode = node.getChild(0);
-
- // Get stream from hidden channel
- TokenStream stream = parser.getTokenStream();
- key = stream.getText(keyNode.getChild(0).getSourceInterval());
- key = key.substring(1, key.length()-1).replaceAll("\\\\\\\\","\\\\").replaceAll("\\\\'", "'");
- //override key and type:string
- term.put("key", key);
- term.put("type", "type:string");
- }
- if (!flags.isEmpty()) {
- term.put("flags", flags);
- }
- }
- token.put("wrap", term);
- }
- else {
- // child is 'term' or 'termGroup' -> process in extra method
- Map<String, Object> termOrTermGroup = parseTermOrTermGroup(
- node.getChild(termOrTermGroupChildId), negated);
- token.put("wrap", termOrTermGroup);
- }
- putIntoSuperObject(token);
- visited.addAll(getChildren(node));
- }
-
- */
+
/**
* Processes an 'alignment' node. These nodes represent alignment
@@ -1139,13 +1061,19 @@
stackedObjects++;
// for lboud and rbound, when span is child of position;
- if (hasChild(node, "span")) {
+ if (hasChild(node, "span")) {
ParseTree spanchildnode = getFirstChildWithCat (node, "span");
processSpan(spanchildnode);
-
objectStack.pop();
stackedObjects=stackedObjects-2;
}
+ if (hasChild(node, "spankey")) {
+ ParseTree spanchildnode = getFirstChildWithCat (node, "spankey");
+ processSpan(spanchildnode);
+ objectStack.pop();
+ stackedObjects=stackedObjects-2;
+ }
+
}
@@ -1486,11 +1414,11 @@
String key = null;
String value = null;
+ String type = "type:regex";
Map<String, Object> term = KoralObjectGenerator
.makeTerm();
// handle negation
boolean negated = negatedGlobal;
- boolean isRegex = false;
List<ParseTree> negations = getChildrenWithCat(node, "!");
if (negations.size() % 2 == 1)
negated = !negated;
@@ -1504,36 +1432,38 @@
// process foundry
if (foundryNode != null)
term.put("foundry", foundryNode.getText());
-
- // process regex
-
- if (getNodeCat(keyNode.getChild(0)).equals("regex")) {
- isRegex = true;
- term.put("type", "type:regex");
- // remove leading and trailing quotes
- //process verbatim flag %l
- if (flagNode!=null) {
- if (getNodeCat(flagNode.getChild(0)).contains("l") ||
- getNodeCat(flagNode.getChild(0)).contains("L")) {
-
- // Get stream from hidden channel
- TokenStream stream = parser.getTokenStream();
- key = stream.getText(keyNode.getChild(0).getSourceInterval());
- key = key.substring(1, key.length()-1).replaceAll("\\\\\\\\","\\\\").replaceAll("\\\\'", "'");
- //override with type:string
- term.put("type", "type:string");
- } else {
- key = keyNode.getText();
- key = key.substring(1, key.length() - 1);
- }
+
+ TokenStream stream = parser.getTokenStream();
+ key = stream.getText(keyNode.getSourceInterval());
+ String flag ="";
+ if (flagNode != null) {
+
+ flag = getNodeCat(flagNode.getChild(0)).substring(1);
+ if (flag.contains("l")|| flag.contains("L")) {
+ //overwrite type
+ type = "type:string";
+ }
}
+
+ String first = key.substring(0, 1);
+ String last = key.substring(key.length()-1, key.length());
+ key = key.substring(1, key.length()-1).replaceAll("\\\\\\\\","\\\\"); //.replaceAll("\\\\'", "'");
+ // treat escaping doubleqoutes and singlequoutes inside regex!
+ if (first.equals("\"") && last.equals("\"")) {
+ //remove escape by doubling ''
+ key = key.replaceAll("\"\"", "\"");
+ //remove the "\" escape for "
+ key = key.replaceAll("\\\\\"", "\"");
+ }
- else {
- key = keyNode.getText();
- key = key.substring(1, key.length() - 1);
- };
- }
-
+ if (first.equals("'") && last.equals("'")) {
+ //remove escape by doubling ''
+ key = key.replaceAll("''", "'");
+ //remove the "\" escape for '
+ key = key.replace("\\\'", "'");
+ }
+
+ term.put("type", type);
if (mode.equals("span"))
term.put("value", key);
else
@@ -1560,7 +1490,6 @@
// process value
if (valueNode != null && getNodeCat(valueNode.getChild(0)).equals("regex")) {
- isRegex = true;
term.put("type", "type:regex");
// remove leading and trailing quotes
value = valueNode.getText();
@@ -1581,7 +1510,7 @@
// process possible flags
if (flagNode != null) {
// substring removes leading %
- String flag = getNodeCat(flagNode.getChild(0)).substring(1);
+ flag = getNodeCat(flagNode.getChild(0)).substring(1);
// EM: handling flagnode as layer
if (node.getChild(1).equals(flagNode)){
@@ -1598,11 +1527,11 @@
for (int i=1; i < list.size(); i++) {
ParseTree n = list.get(i);
flag = getNodeCat(n.getChild(0)).substring(1);
- parseFlag(flag, isRegex, key, term);
+ parseFlag(flag, key, term);
}
}
else {
- term = parseFlag(flag, isRegex, key, term);
+ term = parseFlag(flag, key, term);
}
}
return term;
@@ -1676,7 +1605,7 @@
}
- private Map<String, Object> parseFlag (String flag, boolean isRegex,
+ private Map<String, Object> parseFlag (String flag,
String key, Map<String, Object> term) {
ArrayList<String> flags = new ArrayList<String>();
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPFlagTest.java b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPFlagTest.java
index d4688a5..412cf1c 100644
--- a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPFlagTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPFlagTest.java
@@ -22,22 +22,47 @@
super("CQP");
}
-
+
@Test
- public void testLiteralx () throws JsonProcessingException {
- query = "[mate/b=\"Der + Mann\"]";
+ public void testLiteral () throws JsonProcessingException {
+ query = "'22\\'-inch'%l";
result = runQuery(query);
assertEquals("koral:token", result.at("/query/@type").asText());
assertEquals("koral:term", result.at("/query/wrap/@type").asText());
- assertEquals("Der + Mann", result.at("/query/wrap/key").asText());
- assertEquals("b", result.at("/query/wrap/layer").asText());
- assertEquals("mate", result.at("/query/wrap/foundry").asText());
+ assertEquals("type:string", result.at("/query/wrap/type").asText());
+ assertEquals("orth", result.at("/query/wrap/layer").asText());
assertEquals("match:eq", result.at("/query/wrap/match").asText());
- assertEquals("type:regex", result.at("/query/wrap/type").asText());
- }
- @Test
- public void testLiteral () throws JsonProcessingException {
+ assertEquals("22\'-inch", result.at("/query/wrap/key").asText());
+
+ query = "\"22\\\"-inch\"%l;"; // query = "22\"-inch";
+ result = runQuery(query);
+ assertEquals("koral:token", result.at("/query/@type").asText());
+ assertEquals("koral:term", result.at("/query/wrap/@type").asText());
+ assertEquals("type:string", result.at("/query/wrap/type").asText());
+ assertEquals("orth", result.at("/query/wrap/layer").asText());
+ assertEquals("match:eq", result.at("/query/wrap/match").asText());
+ assertEquals("22\"-inch", result.at("/query/wrap/key").asText());
+
+ query = "[mate/b='22\\'-inch'%l]";
+ result = runQuery(query);
+ assertEquals("koral:token", result.at("/query/@type").asText());
+ assertEquals("koral:term", result.at("/query/wrap/@type").asText());
+ assertEquals("type:string", result.at("/query/wrap/type").asText());
+ assertEquals("b", result.at("/query/wrap/layer").asText());
+ assertEquals("match:eq", result.at("/query/wrap/match").asText());
+ assertEquals("22\'-inch", result.at("/query/wrap/key").asText());
+
+ query = "[mate/b=\"22\\\"-inch\"%l];";
+ result = runQuery(query);
+ assertEquals("koral:token", result.at("/query/@type").asText());
+ assertEquals("koral:term", result.at("/query/wrap/@type").asText());
+ assertEquals("type:string", result.at("/query/wrap/type").asText());
+ assertEquals("b", result.at("/query/wrap/layer").asText());
+ assertEquals("match:eq", result.at("/query/wrap/match").asText());
+ assertEquals("22\"-inch", result.at("/query/wrap/key").asText()); // (no regex escape)
+
+
query = "[mate/b=\"Der + Mann\"%l]";
result = runQuery(query);
assertEquals("koral:token", result.at("/query/@type").asText());
@@ -51,8 +76,6 @@
@Test
public void testLiteralWithEscape () throws JsonProcessingException {
- // why do i need an escape for ' ?
- // EM: because you use ' for the key
query = "[mate/b='D\\'Ma \\\\nn'%l]";
result = runQuery(query);
assertEquals("koral:token", result.at("/query/@type").asText());
@@ -61,6 +84,15 @@
assertEquals("type:string", result.at("/query/wrap/type").asText());
assertEquals("D'Ma \\nn", result.at("/query/wrap/key").asText());
assertEquals("b", result.at("/query/wrap/layer").asText());
+ // escape by doubling + verbatim--> the doubling stays!
+ query = "[mate/b='D''Ma \\\\nn'%l]";
+ result = runQuery(query);
+ assertEquals("koral:token", result.at("/query/@type").asText());
+ assertEquals("koral:term", result.at("/query/wrap/@type").asText());
+ assertEquals("match:eq", result.at("/query/wrap/match").asText());
+ assertEquals("type:string", result.at("/query/wrap/type").asText());
+ assertEquals("D'Ma \\nn", result.at("/query/wrap/key").asText());
+ assertEquals("b", result.at("/query/wrap/layer").asText());
}
@@ -85,7 +117,7 @@
result = runQuery(query);
assertEquals("koral:token", result.at("/query/@type").asText());
assertEquals("koral:term", result.at("/query/wrap/@type").asText());
- assertEquals("D'Ma\\\\nn", result.at("/query/wrap/key").asText());
+ assertEquals("D'Ma\\nn", result.at("/query/wrap/key").asText());
assertEquals("b", result.at("/query/wrap/layer").asText());
assertEquals("mate", result.at("/query/wrap/foundry").asText());
assertEquals("match:eq", result.at("/query/wrap/match").asText());
@@ -107,17 +139,38 @@
@Test
public void testDoubleQuoteWithinSingleQuote ()
throws JsonProcessingException {
+ query = "'D\"Ma\\\\nn'";
+ result = runQuery(query);
+ assertEquals("koral:token", result.at("/query/@type").asText());
+ assertEquals("koral:term", result.at("/query/wrap/@type").asText());
+ assertEquals("D\"Ma\\nn", result.at("/query/wrap/key").asText());
+ assertEquals("orth", result.at("/query/wrap/layer").asText());
+ assertEquals("match:eq", result.at("/query/wrap/match").asText());
+ assertEquals("type:regex", result.at("/query/wrap/type").asText());
+
+
query = "[mate/b='D\"Ma\\\\nn']";
result = runQuery(query);
assertEquals("koral:token", result.at("/query/@type").asText());
assertEquals("koral:term", result.at("/query/wrap/@type").asText());
- assertEquals("D\"Ma\\\\nn", result.at("/query/wrap/key").asText());
+ assertEquals("D\"Ma\\nn", result.at("/query/wrap/key").asText());
assertEquals("b", result.at("/query/wrap/layer").asText());
assertEquals("mate", result.at("/query/wrap/foundry").asText());
assertEquals("match:eq", result.at("/query/wrap/match").asText());
assertEquals("type:regex", result.at("/query/wrap/type").asText());
- // with literal
+ // with literal/verbatim
+ query = "'D\"Ma\\\\nn'%l";
+ result = runQuery(query);
+ assertEquals("koral:token", result.at("/query/@type").asText());
+ assertEquals("koral:term", result.at("/query/wrap/@type").asText());
+ assertEquals("D\"Ma\\nn", result.at("/query/wrap/key").asText());
+ assertEquals("orth", result.at("/query/wrap/layer").asText());
+ assertEquals("match:eq", result.at("/query/wrap/match").asText());
+ assertEquals("type:string", result.at("/query/wrap/type").asText());
+
+
+ // with literal/verbatim
query = "[mate/b='D\"Ma\\\\nn'%l]";
result = runQuery(query);
assertEquals("koral:token", result.at("/query/@type").asText());
@@ -126,13 +179,9 @@
assertEquals("b", result.at("/query/wrap/layer").asText());
assertEquals("mate", result.at("/query/wrap/foundry").asText());
assertEquals("match:eq", result.at("/query/wrap/match").asText());
- assertEquals("type:string", result.at("/query/wrap/type").asText());
-
-
-
+ assertEquals("type:string", result.at("/query/wrap/type").asText());
}
-
@Test
public void testURL () throws JsonProcessingException {
query = "'http://racai.ro'%l";
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPPositionTest.java b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPPositionTest.java
index f20ba7a..fd22903 100644
--- a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPPositionTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPPositionTest.java
@@ -436,14 +436,26 @@
// EM: rbound: last token in the region
// match token at the end of a sentence
@Test
+
+
+
public void testRbound () throws JsonProcessingException {
- query ="[\"copil\" & rbound(<s>)];";
- result = runQuery(query);
- assertEquals("koral:group", result.at("/query/@type").asText());
query ="[\"copil\" & rbound(s)];";
+
result = runQuery(query);
+ String res1 = result.toString();
assertEquals("koral:group", result.at("/query/@type").asText());
+ assertEquals("koral:span", result.at("/query/operands/0/@type").asText());
+
+ query ="[\"copil\" & rbound(<s>)];";
+ result = runQuery(query);
+ String res2 = result.toString();
+ assertEquals("koral:group", result.at("/query/@type").asText());
+ assertEquals("koral:span", result.at("/query/operands/0/@type").asText());
+ assertEquals(res1, res2);
+
+
query ="[\"copil\" & rbound(<base/s=s>)];";
result = runQuery(query);
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPQueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPQueryProcessorTest.java
index e11233b..ad487d5 100644
--- a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPQueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPQueryProcessorTest.java
@@ -70,9 +70,7 @@
assertEquals("flags:caseInsensitive", result.at("/query/wrap/flags/0").asText());
result = runQuery("[mate/x=\"\"%c]");
- assertEquals("mate", result.at("/query/wrap/foundry").asText());
- assertEquals("x", result.at("/query/wrap/layer").asText());
- assertEquals("flags:caseInsensitive", result.at("/query/wrap/flags/0").asText());
+ assertEquals(302, result.at("/errors/0/0").asInt());
}
@@ -1007,7 +1005,7 @@
assertEquals("VVFIN", result.at("/query/operands/1/wrap/key").asText());
- }
+ }
@Test
public void testWithinElement () throws JsonProcessingException, IOException {
@@ -1019,8 +1017,7 @@
assertEquals("frames:isAround", result.at("/query/frames/0").asText());
assertEquals("s", result.at("/query/operands/0/wrap/key").asText());
assertEquals("VVFIN", result.at("/query/operands/1/wrap/key").asText());
- }
-
+ }
@Test
public void testSpanSerialization () throws JsonProcessingException,
@@ -1060,6 +1057,7 @@
assertEquals(result1, result2);
}
+
@Ignore
@Test
public void testQueryReferences () throws JsonProcessingException, IOException {
@@ -1119,4 +1117,4 @@
}
-}
\ No newline at end of file
+}
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPRegexTest.java b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPRegexTest.java
index fe84399..394d724 100644
--- a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPRegexTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CQPRegexTest.java
@@ -17,7 +17,30 @@
public CQPRegexTest () {
super("CQP");
}
+
+ @Test
+ public void testRegexError () throws JsonProcessingException {
+
+ query = "\"?\"";
+ result = runQuery(query);
+ assertEquals(302, result.at("/errors/0/0").asInt());
+ assertEquals("Failing to parse at symbol: '\"'", result.at("/errors/0/1").asText());
+
+ query = "\"\\\"";
+ result = runQuery(query);
+ assertEquals(302, result.at("/errors/0/0").asInt());
+ assertEquals("Failing to parse at symbol: '\"'", result.at("/errors/0/1").asText());
+
+ query = "\"\"\"";
+ result = runQuery(query);
+ assertEquals(302, result.at("/errors/0/0").asInt());
+ assertEquals("Failing to parse at symbol: '\"'", result.at("/errors/0/1").asText());
+ query = "''';";
+ result = runQuery(query);
+ assertEquals(302, result.at("/errors/0/0").asInt());
+ assertEquals("Failing to parse at symbol: '\''", result.at("/errors/0/1").asText());
+ }
@Test
public void testRegex () throws JsonProcessingException {
@@ -68,14 +91,8 @@
assertEquals("match:eq", result.at("/query/wrap/match").asText());
- query = "\"?\"";
- result = runQuery(query);
- assertEquals(302, result.at("/errors/0/0").asInt());
+
- query = "\"\"\"";
- result = runQuery(query);
- assertEquals("", result.at("/query/wrap/key").asText());
- assertNotEquals(302, result.at("/errors/0/0").asInt());
}
@@ -83,6 +100,18 @@
public void testRegexEscape () throws JsonProcessingException {
// Escape regex symbols
+
+
+
+ query = "\"a\\.\\+\\?\\\\\""; //query = "a\.\+\?\\"
+ result = runQuery(query);
+ assertEquals("koral:token", result.at("/query/@type").asText());
+ assertEquals("koral:term", result.at("/query/wrap/@type").asText());
+ assertEquals("type:regex", result.at("/query/wrap/type").asText());
+ assertEquals("orth", result.at("/query/wrap/layer").asText());
+ assertEquals("match:eq", result.at("/query/wrap/match").asText());
+ assertEquals("a\\.\\+\\?\\", result.at("/query/wrap/key").asText());
+
query = "\"a\\.\"";
result = runQuery(query);
assertEquals("koral:token", result.at("/query/@type").asText());
@@ -151,7 +180,7 @@
assertEquals("type:regex", result.at("/query/wrap/type").asText());
assertEquals("orth", result.at("/query/wrap/layer").asText());
assertEquals("match:eq", result.at("/query/wrap/match").asText());
- assertEquals("22\\\"-inch", result.at("/query/wrap/key").asText());
+ assertEquals("22\"-inch", result.at("/query/wrap/key").asText());
query = "'a''.+?';"; //query = 'a''.+?'
@@ -197,15 +226,7 @@
assertEquals("match:eq", result.at("/query/wrap/match").asText());
assertEquals("a\\.", result.at("/query/wrap/key").asText());
- query = "\"a\\.\\+\\?\\\\\""; //query = "a\.\+\?\\"
- result = runQuery(query);
-
- assertEquals("koral:token", result.at("/query/@type").asText());
- assertEquals("koral:term", result.at("/query/wrap/@type").asText());
- assertEquals("type:regex", result.at("/query/wrap/type").asText());
- assertEquals("orth", result.at("/query/wrap/layer").asText());
- assertEquals("match:eq", result.at("/query/wrap/match").asText());
- assertEquals("a\\.\\+\\?\\\\", result.at("/query/wrap/key").asText());
+
}
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CqpGrammarTest.java b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CqpGrammarTest.java
index 1908a0c..27827bf 100644
--- a/src/test/java/de/ids_mannheim/korap/query/test/cqp/CqpGrammarTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/test/cqp/CqpGrammarTest.java
@@ -21,18 +21,51 @@
String query;
Lexer lexer = new CQPLexer((CharStream) null);
ParserRuleContext tree = null;
+ @Test
+ public void Regex_without_quoutes () {
+
+
+ assertEquals(
+ "(request (query kid))",
+ treeString("kid")
+ );
+ }
@Test
public void squoutes_verbatim () {
+ assertEquals(
+ "(request (query ' ' ') ;)",
+ treeString("''';") // now the query isn't parsed, as expected
+ );
- assertEquals(
- "(request (query \" ? \"))", // it should not parse the query
- treeString("\"?\"")
+ // the weird behaviour moved to the following query:
+ assertEquals(
+ "(request (query ' (segment (token (key (regex 'kid'))))))",
+ treeString("''kid'")
);
- assertNotEquals(
- "(request (query \" \" \"))", // it should not parse the query
+
+ assertEquals(
+ "(request (query \" \" \"))", // now the query isn't parsed, as expected
treeString("\"\"\"")
);
+
+ // the weird behaviour moved to the following query:
+ assertEquals(
+ "(request (query \" (segment (token (key (regex \"kid\"))))))", // correct, it doesn't parse the query!
+ treeString("\"\"kid\"")
+ );
+
+
+ assertEquals(
+ "(request (query \" ? \"))", // correct, it doesn't parse the query!
+ treeString("\"?\"")
+ );
+
+ assertEquals(
+ "(request (query (segment (token (key (regex \"\\?\"))))))", // correct, it is "\?" and matches ?
+ treeString("\"\\?\"")
+ );
+
assertEquals(
"(request (query (segment (token (key (regex 'copil'))))))",
treeString("'copil'")
@@ -46,15 +79,16 @@
"(request (query (segment (token (key (regex '\\''))))))",
treeString("'\\''")
);
+ assertEquals(
+ "(request (query (segment (token (key (regex ''''))))))",
+ treeString("''''")
+ );
assertEquals(
"(request (query (segment (token (key (regex '\\'')) (flag %l)))))",
treeString("'\\''%l")
);
- assertEquals(
- "(request (query (segment (token (key (regex ''))))))", // it should not parse the query
- treeString("'''")
- );
+
};
@@ -173,10 +207,7 @@
public void testRegexEscapedSquoutes () {
// escape by doubling the single quote
- assertEquals(
- "(request (query (segment (token (key (regex ''))))))",
- treeString("''';") // this should not parse!! should signal an error! the regex is constructed when the parser finds the second ' !
- ); // how are these situations treated in PQ+?
+
assertEquals(
"(request (query (segment (token (key (regex 'anna''s house'))))) ;)",
treeString("'anna''s house';")
@@ -185,7 +216,12 @@
assertNotEquals(
"(request (query (segment (token (key (regex 'anna's house'))))) ;)",
treeString("'anna's house';")
- );
+ );
+ // everything after 'anna' is parsed as span;
+ assertEquals(
+ "(request (query 'anna' s house ') ;)",
+ treeString("'anna's house';")
+ );
//escape by using double quotes to encapsulate expressions that contain single quotes
assertEquals(
"(request (query (segment (token (key (regex \"anna's house\"))))) ;)",
@@ -372,7 +408,7 @@
public void testEmptyTokenWithin () {
assertEquals(
- "(request (query (sequence (segment (token (key (regex \"no\")))) (segment (token (key (regex \"sooner\")))) (segment (emptyTokenSequence (emptyToken [ ]) (repetition (kleene *)))) (segment (token (key (regex \"than\")))))) (within within (span (skey s))) ;)",
+ "(request (query (sequence (segment (token (key (regex \"no\")))) (segment (token (key (regex \"sooner\")))) (segment (emptyTokenSequence (emptyToken [ ]) (repetition (kleene *)))) (segment (token (key (regex \"than\")))))) (within within (spankey (skey s))) ;)",
treeString("\"no\" \"sooner\" []* \"than\" within s;")
);
};
@@ -518,7 +554,7 @@
"(request (query (sequence (segment (token [ (term (layer base) (termOp =) (key (regex \"Mann\"))) ])) (segment (region / region [ (span < (foundry cnx) / (layer c) (termOp =) (skey vp) >) ])))))",
treeString("[base=\"Mann\"] /region[<cnx/c=vp>]"));
assertEquals(
- "(request (query (sequence (segment (token [ (term (layer base) (termOp =) (key (regex \"Mann\"))) ])) (segment (region / region [ (span (skey vp)) ])))))",
+ "(request (query (sequence (segment (token [ (term (layer base) (termOp =) (key (regex \"Mann\"))) ])) (segment (region / region [ (spankey (skey vp)) ])))))",
treeString("[base=\"Mann\"] /region[vp]"));
assertEquals(
"(request (query (segment (region / region [ (span < (foundry cnx) / (layer c) (termOp ! =) (skey vp) ( (termGroup (term (layer class) (termOp ! =) (key (regex \"header\"))) (boolOp &) (term (layer id) (termOp =) (key (regex \"7\")))) ) >) ]))))",
@@ -775,6 +811,10 @@
"(request (query (segment (token [ (term (key (regex \"copil\")) & (position rbound ( (span < (foundry base) / (layer s) (termOp =) (skey s) >) ))) ]))) ;)",
treeString("[\"copil\" & rbound(<base/s=s>)];")
);
+ assertEquals(
+ "(request (query (segment (token [ (term (key (regex \"copil\")) & (position rbound ( (spankey (skey s)) ))) ]))) ;)",
+ treeString("[\"copil\" & rbound(s)];")
+ );
};
@Test
@@ -793,7 +833,7 @@
public void testWithinNp () {
assertEquals(
- "(request (query (sequence (segment (token [ (term (layer pos) (termOp =) (key (regex \"NN\"))) ])) (segment (emptyTokenSequence (emptyToken [ ]) (repetition (kleene *)))) (segment (token [ (term (layer pos) (termOp =) (key (regex \"NN\"))) ])))) (within within (span (skey np))) ;)",
+ "(request (query (sequence (segment (token [ (term (layer pos) (termOp =) (key (regex \"NN\"))) ])) (segment (emptyTokenSequence (emptyToken [ ]) (repetition (kleene *)))) (segment (token [ (term (layer pos) (termOp =) (key (regex \"NN\"))) ])))) (within within (spankey (skey np))) ;)",
treeString(" [pos=\"NN\"] []* [pos=\"NN\"] within np;")
);
@@ -826,7 +866,7 @@
public void testMU01rec () {
assertEquals(
- "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet ( (meetunion (segment (spanclass meet (segment (token (key (regex \"in\")))))) (segment (token (key (regex \"due\")))) -1 1) ))) (segment (token (key (regex \"time\")))) (span (skey s))) )))) ;)",
+ "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet ( (meetunion (segment (spanclass meet (segment (token (key (regex \"in\")))))) (segment (token (key (regex \"due\")))) -1 1) ))) (segment (token (key (regex \"time\")))) (spankey (skey s))) )))) ;)",
treeString("MU(meet (meet \"in\" \"due\" -1 1) \"time\" s);")
);
};
@@ -882,7 +922,7 @@
public void testMU5 () {
assertEquals(
- "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet ( (meetunion (segment (spanclass meet (segment (token (key (regex \"one\")))))) (segment (token (key (regex \"hand\")))) 1 1) ))) ( (meetunion (segment (spanclass meet (segment (token (key (regex \"other\")))))) (segment (token (key (regex \"hand\")))) 1 1) ) (span (skey s))) )))) ;)",
+ "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet ( (meetunion (segment (spanclass meet (segment (token (key (regex \"one\")))))) (segment (token (key (regex \"hand\")))) 1 1) ))) ( (meetunion (segment (spanclass meet (segment (token (key (regex \"other\")))))) (segment (token (key (regex \"hand\")))) 1 1) ) (spankey (skey s))) )))) ;)",
treeString(" MU(meet (meet \"one\" \"hand\" 1 1) (meet \"other\" \"hand\" 1 1) s);")
);
};
@@ -892,20 +932,20 @@
public void testMUinS () {
assertEquals(
- "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet (segment (token (key (regex \"tea\")) (flag %c))))) (segment (token (key (regex \"cakes\")) (flag %c))) (span (skey s))) )))) ;)",
+ "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet (segment (token (key (regex \"tea\")) (flag %c))))) (segment (token (key (regex \"cakes\")) (flag %c))) (spankey (skey s))) )))) ;)",
treeString(" MU(meet \"tea\"%c \"cakes\"%c s);")
);
};
@Test
public void testMUInSrec () {
- assertEquals( "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet ( (meetunion (segment (spanclass meet (segment (token (key (regex \"piel\")))))) (segment (token (key (regex \"azul\")))) (span (skey np))) ))) (segment (token (key (regex \"de\")))) (span (skey s))) )))) ;)",
+ assertEquals( "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet ( (meetunion (segment (spanclass meet (segment (token (key (regex \"piel\")))))) (segment (token (key (regex \"azul\")))) (spankey (skey np))) ))) (segment (token (key (regex \"de\")))) (spankey (skey s))) )))) ;)",
treeString("MU(meet (meet \"piel\" \"azul\" np) \"de\" s);"));
}
@Test
public void testMUInSrec1 () {
- assertEquals( "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet ( (meetunion (segment (spanclass meet (segment (token (key (regex \"piel\")))))) (segment (token (key (regex \"azul\")))) (span (skey np))) ))) ( (meetunion (segment (spanclass meet (segment (token (key (regex \"de\")))))) (segment (token (key (regex \"color\")))) (span (skey pp))) ) (span (skey s))) )))) ;)",
+ assertEquals( "(request (query (segment (matching MU ( (meetunion (segment (spanclass meet ( (meetunion (segment (spanclass meet (segment (token (key (regex \"piel\")))))) (segment (token (key (regex \"azul\")))) (spankey (skey np))) ))) ( (meetunion (segment (spanclass meet (segment (token (key (regex \"de\")))))) (segment (token (key (regex \"color\")))) (spankey (skey pp))) ) (spankey (skey s))) )))) ;)",
treeString("MU(meet (meet \"piel\" \"azul\" np) (meet \"de\" \"color\" pp) s);"));
}
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/poliqarpplus/PQGrammarTest.java b/src/test/java/de/ids_mannheim/korap/query/test/poliqarpplus/PQGrammarTest.java
index 4bb2f75..536ae0c 100644
--- a/src/test/java/de/ids_mannheim/korap/query/test/poliqarpplus/PQGrammarTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/test/poliqarpplus/PQGrammarTest.java
@@ -57,12 +57,16 @@
public void dquoutes () {
-
+ // how it behaves
assertEquals(
- "(request (query (segment (token (key (regex \"\"))))) <EOF>)", // see different behaviour of " and '; for ", the query is parsed and an empty regex is generated
+ "(request (query (segment (token (key (regex \"\"))))) <EOF>)",
treeString("\"\"\"")
);
-
+ // how it should behave
+ assertNotEquals(
+ "(request (query \"\"\") <EOF>)", // not parsing the query
+ treeString("\"\"\"")
+ );
assertEquals(
"(request (query (sequence (segment (token (key (regex \"\")))) (segment (token (key -key))))) <EOF>)", // see different behaviour of " and '; for ", the query is parsed and an empty regex is generated
treeString("\"\"-key\"")