leading and trailing empty segments
bugfix: distances, sequences
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
index 120fc53..0f320a5 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
@@ -85,6 +85,13 @@
return group;
}
+ protected LinkedHashMap<String, Object> makeRepetition(int min, int max) {
+ LinkedHashMap<String, Object> group = makeGroup("repetition");
+ group.put("min", min);
+ group.put("max", max);
+ return group;
+ }
+
protected LinkedHashMap<String, Object> makePosition(String frame) {
LinkedHashMap<String, Object> group = new LinkedHashMap<String, Object>();
group.put("@type", "korap:group");
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
index fc9cead..b989749 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
@@ -61,6 +61,7 @@
* @param node The currently processed node. The process(String query) method calls this method with the root.
* @throws QueryException
*/
+ @SuppressWarnings("unchecked")
private void processNode(ParseTree node) throws QueryException {
// Top-down processing
if (visited.contains(node)) return;
@@ -102,19 +103,36 @@
if (nodeCat.equals("sequence")) {
LinkedHashMap<String,Object> sequence = makeGroup("sequence");
- ParseTree emptyTokens = getFirstChildWithCat(node, "emptyTokenSequence");
- if (emptyTokens!=null) {
- int[] minmax = parseEmptySegments(emptyTokens);
+ ParseTree distanceNode = getFirstChildWithCat(node, "distance");
+ if (distanceNode!=null) {
+ int[] minmax = parseDistance(distanceNode);
LinkedHashMap<String,Object> distance = makeDistance("w", minmax[0], minmax[1]);
sequence.put("inOrder", true);
ArrayList<Object> distances = new ArrayList<Object>();
distances.add(distance);
sequence.put("distances", distances);
+ visited.add(distanceNode.getChild(0));
}
putIntoSuperObject(sequence);
objectStack.push(sequence);
stackedObjects++;
}
+
+ if (nodeCat.equals("emptyTokenSequence")) {
+ int[] minmax = parseEmptySegments(node);
+ LinkedHashMap<String,Object> object;
+ LinkedHashMap<String,Object> emptyToken = makeToken();
+ if (minmax[0] != 1 || minmax[1] != 1) {
+ object = makeRepetition(minmax[0], minmax[1]);
+ ((ArrayList<Object>) object.get("operands")).add(emptyToken);
+ } else {
+ object = emptyToken;
+ }
+ putIntoSuperObject(object);
+ objectStack.push(object);
+ stackedObjects++;
+ }
+
if (nodeCat.equals("token")) {
LinkedHashMap<String,Object> token = makeToken();
@@ -204,25 +222,6 @@
putIntoSuperObject(position);
objectStack.push(position);
stackedObjects++;
- // offsets
- if (hasChild(node, "emptyTokenSequence")) {
- LinkedHashMap<String,Object> sequence = makeGroup("sequence");
- ParseTree leftOffset = getNthChildWithCat(node, "emptyTokenSequence", 1);
- if (leftOffset!=null) {
- int[] minmax = parseEmptySegments(leftOffset);
- sequence.put("leftoffset-min", minmax[0]-1);
- sequence.put("leftoffset-max", minmax[1]-1);
- }
- ParseTree rightOffset = getNthChildWithCat(node, "emptyTokenSequence", 2);
- if (rightOffset!=null) {
- int[] minmax = parseEmptySegments(rightOffset);
- sequence.put("rightoffset-min", minmax[0]-1);
- sequence.put("rightoffset-max", minmax[1]-1);
- }
- putIntoSuperObject(sequence);
- objectStack.push(sequence);
- stackedObjects++;
- }
}
if (nodeCat.equals("spanclass")) {
@@ -426,7 +425,6 @@
// process possible flags
if (flagNode != null) {
String flag = getNodeCat(flagNode.getChild(0)).substring(1); //substring removes leading slash '/'
- System.err.println(flag);
if (flag.contains("i")) term.put("caseInsensitive", true);
else if (flag.contains("I")) term.put("caseInsensitive", false);
if (flag.contains("x")) {
@@ -489,9 +487,23 @@
}
}
+ /**
+ * Basically only increases the min and max counters as required by Poliqarp
+ * @param distanceNode
+ * @return
+ */
+ private int[] parseDistance(ParseTree distanceNode) {
+ int[] minmax = parseEmptySegments(distanceNode.getChild(0));
+ Integer min = minmax[0];
+ Integer max = minmax[1];
+ min = cropToMaxValue(min+1);
+ max = cropToMaxValue(max+1);
+ return new int[]{min, max};
+ }
+
private int[] parseEmptySegments(ParseTree emptySegments) {
- Integer min = 1;
- Integer max = 1;
+ Integer min = 0;
+ Integer max = 0;
ParseTree child;
for (int i = 0; i < emptySegments.getChildCount(); i++) {
child = emptySegments.getChild(i);
@@ -507,10 +519,27 @@
}
}
}
- if (max > MAXIMUM_DISTANCE) max = MAXIMUM_DISTANCE;
+ min = cropToMaxValue(min);
+ max = cropToMaxValue(max);
return new int[]{min, max};
}
+ /**
+ * Ensures that a distance or quantification value does not exceed the allowed maximum value.
+ * @param number
+ * @return The input number if it is below the allowed maximum value, else the maximum value.
+ */
+ private int cropToMaxValue(int number) {
+ if (number > MAXIMUM_DISTANCE) {
+ number = MAXIMUM_DISTANCE;
+ String warning = String.format("You specified a distance between two segments that is greater than " +
+ "the allowed max value of %d. Your query will be re-interpreted using a distance of %d.", MAXIMUM_DISTANCE, MAXIMUM_DISTANCE);
+ warningMsgs.add(warning);
+ log.warn(warning);
+ }
+ return number;
+ }
+
private ParserRuleContext parsePoliqarpQuery(String p) throws QueryException {
checkUnbalancedPars(p);
Lexer poliqarpLexer = new PoliqarpPlusLexer((CharStream) null);
@@ -562,7 +591,11 @@
"z.B./x",
"\".*?Mann.\"",
"\".*?Mann.*?\"",
- "[orth=\".*?l(au|ie)fen.*?*\"]"
+ "[orth=\".*?l(au|ie)fen.*?*\"]",
+ "[orth=Mann][][orth=Mann]",
+ "startswith(<s>, [][base=Mann])",
+ "[base=der][]{1,102}[base=Mann]",
+ "[base=geht][base=der][]*[base=Mann]"
};
// PoliqarpPlusTree.verbose=true;
for (String q : queries) {
diff --git a/src/test/java/PoliqarpPlusTreeTest.java b/src/test/java/PoliqarpPlusTreeTest.java
index 72edd37..18f2612 100644
--- a/src/test/java/PoliqarpPlusTreeTest.java
+++ b/src/test/java/PoliqarpPlusTreeTest.java
@@ -9,7 +9,8 @@
PoliqarpPlusTree ppt;
String map;
-
+ String expected;
+ String query;
private boolean equalsQueryContent(String res, String query) throws QueryException {
res = res.replaceAll(" ", "");
@@ -99,7 +100,6 @@
ppt = new PoliqarpPlusTree(query);
map = ppt.getRequestMap().get("query").toString();
assertEquals(re5.replaceAll(" ", ""), map.replaceAll(" ", ""));
-
}
@Test
@@ -176,7 +176,7 @@
}
@Test
- public void testEmptyTokens() throws QueryException {
+ public void testDistances() throws QueryException {
// [base=der][][base=Mann]
String et1 =
"{@type=korap:group, operation=operation:sequence, " +
@@ -217,17 +217,7 @@
assertEquals(et3.replaceAll(" ", ""), map.replaceAll(" ", ""));
- // startswith(<s>, [][base=Mann]
- String et4 =
- "{@type=korap:group, operation=operation:position, frame=frame:startswith, operands=[" +
- "{@type=korap:span, key=s}," +
- "{@type=korap:group, operation=operation:sequence, operands=[" +
- "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=Mann, match=match:eq}}" +
- "], leftOffset={@type=korap:boundary, min=1, max=1}}" +
- "]}";
- ppt = new PoliqarpPlusTree("startswith(<s>, [][base=Mann])");
- map = ppt.getRequestMap().get("query").toString();
- assertEquals(et4.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
// [base=der][]{2,5}[base=Mann][]?[][base=Frau] nested distances=
String et5 =
@@ -273,9 +263,159 @@
ppt = new PoliqarpPlusTree("[base=der][]+[base=Mann]");
map = ppt.getRequestMap().get("query").toString();
assertEquals(et7.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ // [base=der][]+[base=Mann]
+ String et8 =
+ "{@type=korap:group, operation=operation:sequence, " +
+ "operands=[" +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=der, match=match:eq}}," +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=Mann, match=match:eq}}" +
+ "], inOrder=true, distances=[" +
+ "{@type=korap:distance, key=w, min=2, max=100}" +
+ "]}";
+ ppt = new PoliqarpPlusTree("[base=der][]{1,102}[base=Mann]");
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(et8.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ // [base=geht][base=der][]*[base=Mann]
+ String et9 =
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=geht, match=match:eq}}," +
+ "{@type=korap:group, operation=operation:sequence, " +
+ "operands=[" +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=der, match=match:eq}}," +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=Mann, match=match:eq}}" +
+ "], inOrder=true, distances=[" +
+ "{@type=korap:distance, key=w, min=1, max=100}" +
+ "]}" +
+ "]}";
+ ppt = new PoliqarpPlusTree("[base=geht][base=der][]*[base=Mann]");
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(et9.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query = "[base=geht][base=der][]*[base=Mann][base=da]";
+ expected =
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=geht, match=match:eq}}," +
+ "{@type=korap:group, operation=operation:sequence, " +
+ "operands=[" +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=der, match=match:eq}}," +
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=Mann, match=match:eq}}," +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=da, match=match:eq}}" +
+ "]}" +
+ "], inOrder=true, distances=[" +
+ "{@type=korap:distance, key=w, min=1, max=100}" +
+ "]}" +
+ "]}";
+ ppt = new PoliqarpPlusTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query = "[base=geht][base=der][]*contains(<s>,<np>)";
+ expected =
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=geht, match=match:eq}}," +
+ "{@type=korap:group, operation=operation:sequence, " +
+ "operands=[" +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=der, match=match:eq}}," +
+ "{@type=korap:group, operation=operation:position, frame=frame:contains, operands=[" +
+ "{@type=korap:span, key=s}," +
+ "{@type=korap:span, key=np}" +
+ "]}" +
+ "], inOrder=true, distances=[" +
+ "{@type=korap:distance, key=w, min=1, max=100}" +
+ "]}" +
+ "]}";
+ ppt = new PoliqarpPlusTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
}
@Test
+ public void testLeadingTrailingEmptyTokens() throws QueryException {
+ // startswith(<s>, [][base=Mann]
+ String et1 =
+ "{@type=korap:group, operation=operation:position, frame=frame:startswith, operands=[" +
+ "{@type=korap:span, key=s}," +
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:token}," +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=Mann, match=match:eq}}" +
+ "]}" +
+ "]}";
+ ppt = new PoliqarpPlusTree("startswith(<s>, [][base=Mann])");
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(et1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query = "[][base=Mann]";
+ expected =
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:token}," +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=Mann, match=match:eq}}" +
+ "]}";
+ ppt = new PoliqarpPlusTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query = "[][][base=Mann]";
+ expected =
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:group, operation=operation:repetition, operands=[" +
+ "{@type=korap:token}" +
+ "], min=2, max=2}," +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=Mann, match=match:eq}}" +
+ "]}";
+ ppt = new PoliqarpPlusTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query = "[][]*[base=Mann]";
+ expected =
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:group, operation=operation:repetition, operands=[" +
+ "{@type=korap:token}" +
+ "], min=1, max=100}," +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=Mann, match=match:eq}}" +
+ "]}";
+ ppt = new PoliqarpPlusTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query = "[][]*[base=Mann][][]";
+ expected =
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:group, operation=operation:repetition, operands=[" +
+ "{@type=korap:token}" +
+ "], min=1, max=100}," +
+ "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=Mann, match=match:eq}}," +
+ "{@type=korap:group, operation=operation:repetition, operands=[" +
+ "{@type=korap:token}" +
+ "], min=2, max=2}" +
+ "]}";
+ ppt = new PoliqarpPlusTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query = "[][]*contains(<s>, <np>)[][]";
+ expected =
+ "{@type=korap:group, operation=operation:sequence, operands=[" +
+ "{@type=korap:group, operation=operation:repetition, operands=[" +
+ "{@type=korap:token}" +
+ "], min=1, max=100}," +
+ "{@type=korap:group, operation=operation:position, frame=frame:contains, operands=[" +
+ "{@type=korap:span, key=s}," +
+ "{@type=korap:span, key=np}" +
+ "]}," +
+ "{@type=korap:group, operation=operation:repetition, operands=[" +
+ "{@type=korap:token}" +
+ "], min=2, max=2}" +
+ "]}";
+ ppt = new PoliqarpPlusTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+ }
+
+ @Test
public void testCoordinatedFields() throws QueryException {
// [base=Mann&(cas=N|cas=A)]
String cof1 =