more cosmas features
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
index 89de56f..835e82a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.query.serialize;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
@@ -38,19 +39,6 @@
*/
String query;
LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
- LinkedHashMap<String,Object> queryMap = new LinkedHashMap<String,Object>();
- LinkedHashMap<String,Object> tokenGroup = new LinkedHashMap<String,Object>();
- ArrayList<Object> fieldGroup = new ArrayList<Object>();
- LinkedHashMap<String,Object> fieldMap;
- ArrayList<List<Object>> distantTokens;
- /**
- * Keeps track of active tokens.
- */
- LinkedList<LinkedHashMap<String,Object>> tokenStack = new LinkedList<LinkedHashMap<String,Object>>();
- /**
- * Marks the currently active token in order to know where to add flags (might already have been taken away from token stack).
- */
- LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>();
/**
* Keeps track of active object.
*/
@@ -92,8 +80,16 @@
Integer stackedObjects = 0;
private static boolean debug = false;
-
-
+ /**
+ * A list of node categories that can be sequenced (i.e. which can be in a sequence with any number of other nodes in this list)
+ */
+ private final List<String> sequentiableCats = Arrays.asList(new String[] {"OPWF", "OPLEM", "OPMORPH", "OPBEG", "OPEND", "OPIN"});
+ /**
+ * Keeps track of sequenced nodes, i.e. nodes that implicitly govern a sequence, as in (C2PQ (OPWF der) (OPWF Mann)).
+ * This is necessary in order to know when to take the sequence off the object stack, as the sequence is introduced by the
+ * first child but cannot be closed after this first child in order not to lose its siblings
+ */
+ private LinkedList<Tree> sequencedNodes = new LinkedList<Tree>();
/**
*
* @param tree The syntax tree as returned by ANTLR
@@ -168,19 +164,50 @@
/* ***************************************
* Processing individual node categories *
*****************************************/
+
+
+ // Check for potential implicit sequences as in (C2PQ (OPWF der) (OPWF Mann)). The sequence is introduced
+ // by the first child if it (and its siblings) is sequentiable.
+ if (sequentiableCats.contains(nodeCat)) {
+ // for each node, check if parent has more than one child (-> could be implicit sequence)
+ if (node.getParent().getChildCount()>1) {
+ // if node is first child of parent...
+ if (node == node.getParent().getChild(0)) {
+ // Step I: create sequence
+ LinkedHashMap<String, Object> sequence = new LinkedHashMap<String, Object>();
+ sequence.put("@type", "korap:sequence");
+ sequence.put("operands", new ArrayList<Object>());
+ // push sequence on object stack but don't increment stackedObjects counter since
+ // we've got to wait until the parent node is processed - therefore, add the parent
+ // to the sequencedNodes list and remove the sequence from the stack when the parent
+ // has been processed
+ objectStack.push(sequence);
+ sequencedNodes.push(node.getParent());
+ // Step II: decide where to put sequence
+ if (objectStack.size()>1) {
+ ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+ topObjectOperands.add(sequence);
+ } else {
+ requestMap.put("query", sequence);
+ }
+
+ }
+ }
+ }
+
+
// C2QP is tree root
if (nodeCat.equals("C2PQ")) {
- if (node.getChildCount()>1) {
- // Step I: create sequence
- LinkedHashMap<String, Object> sequence = new LinkedHashMap<String, Object>();
- sequence.put("@type", "korap:sequence");
- sequence.put("operands", new ArrayList<Object>());
- objectStack.push(sequence);
- stackedObjects++;
- // Step II: decide where to put sequence
- requestMap.put("query", sequence);
-
- }
+// if (node.getChildCount()>1) {
+// // Step I: create sequence
+// LinkedHashMap<String, Object> sequence = new LinkedHashMap<String, Object>();
+// sequence.put("@type", "korap:sequence");
+// sequence.put("operands", new ArrayList<Object>());
+// objectStack.push(sequence);
+// stackedObjects++;
+// // Step II: decide where to put sequence
+// requestMap.put("query", sequence);
+// }
}
// Nodes introducing tokens. Process all in the same manner, except for the fieldMap entry
@@ -189,27 +216,26 @@
//Step I: get info
LinkedHashMap<String, Object> token = new LinkedHashMap<String, Object>();
token.put("@type", "korap:token");
- tokenStack.push(token);
objectStack.push(token);
stackedObjects++;
// check if this token comes after a distant operator (like "/+w3:4") and if yes,
// insert the empty tokenGroups before the current token
- if (openNodeCats.get(1).equals("ARG2")) {
- if (openNodeCats.get(2).equals("OPPROX") && !distantTokensStack.isEmpty()) {
- for (List<Object> distantTokenGroup : distantTokensStack.pop()) {
-// if (tokenGroupsStack.isEmpty()) {
-// queryMap.put("token"+tokenGroupCount+"_1", distantTokenGroup);
-// } else {
- tokenStack.getFirst().put("token", distantTokenGroup);
-// }
-// tokenGroupCount++;
- }
- }
+// if (openNodeCats.get(1).equals("ARG2")) {
+// if (openNodeCats.get(2).equals("OPPROX") && !distantTokensStack.isEmpty()) {
+// for (List<Object> distantTokenGroup : distantTokensStack.pop()) {
+//// if (tokenGroupsStack.isEmpty()) {
+//// queryMap.put("token"+tokenGroupCount+"_1", distantTokenGroup);
+//// } else {
+// tokenStack.getFirst().put("token", distantTokenGroup);
+//// }
+//// tokenGroupCount++;
+// }
+// }
// check negation of token by preceding OPNOT
// else if (openNodeCats.get(2).equals("OPNOT")) {
// negate = true;
// }
- }
+// }
LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
token.put("@value", fieldMap);
@@ -233,7 +259,6 @@
} else {
fieldMap.put("relation", "=");
}
-
//Step II: decide where to put
if (objectStack.size()>1) {
ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
@@ -262,23 +287,23 @@
// negate = true;
// }
- if (nodeCat.equals("ARG1") || nodeCat.equals("ARG2")) {
- if (node.getChildCount()>1) {
- // Step I: create sequence
- LinkedHashMap<String, Object> sequence = new LinkedHashMap<String, Object>();
- sequence.put("@type", "korap:sequence");
- sequence.put("operands", new ArrayList<Object>());
- objectStack.push(sequence);
- stackedObjects++;
- // Step II: decide where to put sequence
- if (objectStack.size()>1) {
- ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
- topObjectOperands.add(sequence);
- } else {
- requestMap.put("query", sequence);
- }
- }
- }
+// if (nodeCat.equals("ARG1") || nodeCat.equals("ARG2")) {
+// if (node.getChildCount()>1) {
+// // Step I: create sequence
+// LinkedHashMap<String, Object> sequence = new LinkedHashMap<String, Object>();
+// sequence.put("@type", "korap:sequence");
+// sequence.put("operands", new ArrayList<Object>());
+// objectStack.push(sequence);
+// stackedObjects++;
+// // Step II: decide where to put sequence
+// if (objectStack.size()>1) {
+// ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+// topObjectOperands.add(sequence);
+// } else {
+// requestMap.put("query", sequence);
+// }
+// }
+// }
if (nodeCat.equals("OPOR") || nodeCat.equals("OPAND") || nodeCat.equals("OPNOT")) {
// Step I: create group
@@ -291,13 +316,6 @@
disjunction.put("operands", new ArrayList<Object>());
objectStack.push(disjunction);
stackedObjects++;
- if (tokenStack.isEmpty()) {
- queryMap.put("tokenGroup", tokenGroup);
- } else {
- tokenStack.getFirst().put("tokenGroup", tokenGroup);
- }
- tokenGroup.put("type", "disj");
- tokenStack.push(tokenGroup);
// Step II: decide where to put
if (objectStack.size()>1) {
@@ -327,7 +345,9 @@
min=max;
}
- proxGroup.put("@subtype", meas);
+ String subtype = typ.getChild(0).toStringTree().equals("PROX") ? "incl" : "excl";
+ proxGroup.put("@subtype", subtype);
+ proxGroup.put("measure", meas);
proxGroup.put("min", min);
proxGroup.put("max", max);
proxGroup.put("operands", new ArrayList<Object>());
@@ -343,12 +363,37 @@
}
}
- if (nodeCat.equals("OPIN")) {
+ // inlcusion or overlap
+ if (nodeCat.equals("OPIN") || nodeCat.equals("OPOV")) {
// Step I: create group
LinkedHashMap<String, Object> ingroup = new LinkedHashMap<String, Object>();
ingroup.put("@type", "korap:group");
- ingroup.put("relation", "in");
- ingroup.put("position", node.getChild(0).getChild(0).toStringTree());
+ String combination = nodeCat.equals("OPIN") ? "include" : "overlap";
+ ingroup.put("relation", combination);
+ // add optional position info, if present
+ if (QueryUtils.getNodeCat(node.getChild(0)).equals("POS")) {
+ ingroup.put("position", node.getChild(0).getChild(0).toStringTree());
+ }
+ ingroup.put("operands", new ArrayList<Object>());
+ objectStack.push(ingroup);
+ stackedObjects++;
+
+ // Step II: decide where to put
+ if (objectStack.size()>1) {
+ ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+ topObjectOperands.add(ingroup);
+ } else {
+ requestMap.put("query", ingroup);
+ }
+ }
+
+ if (nodeCat.equals("OPEND") || nodeCat.equals("OPBEG")) {
+ // Step I: create group
+ LinkedHashMap<String, Object> ingroup = new LinkedHashMap<String, Object>();
+ ingroup.put("@type", "korap:group");
+ ingroup.put("relation", "reduction");
+ String reduction = nodeCat.equals("OPEND") ? "end" : "begin";
+ ingroup.put("reduction", reduction);
ingroup.put("operands", new ArrayList<Object>());
objectStack.push(ingroup);
stackedObjects++;
@@ -375,16 +420,18 @@
}
objectsToPop.pop();
+ // remove sequence from object stack if node is implicitly sequenced
+ if (sequencedNodes.size()>0) {
+ if (node == sequencedNodes.getFirst()) {
+ objectStack.pop();
+ sequencedNodes.pop();
+ }
+ }
+
if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
negate = false;
}
- if (nodeCat.equals("OPAND") || nodeCat.equals("OPOR")) {
- tokenStack.pop();
-// tokenGroupCount--;
-// tokenCount=0;
- }
-
openNodeCats.pop();
}
@@ -443,12 +490,24 @@
// "(sonne und (stern oder mond)) /+w2 luna???",
// "(Tag /+w2 $offenen) /+w1 Tür",
// "heißt /+w2 \"und\" ,"
- "der",
- "der Mann",
- "Sonne nicht (Mond Stern)",
- "Sonne /+w1:4 Mond",
-// "wegen #IN(L) <s>"
+// "der",
+// "der Mann",
+// "Sonne nicht (Mond Stern)",
+// "Sonne /+w1:4 Mond",
+//// "wegen #IN(L) <s>"
"#BEG(<s>) /5w,s0 #END(<s>)",
+ "#RECHTS(ELEM(S))",
+ "#END(ELEM(S))",
+// "der Mann",
+// "Mond oder Sterne",
+// "(Sonne scheint) oder Mond"
+// "Sonne oder Mond oder Sterne",
+// "Mann #OV (der Mann)",
+// "Mann #OV(L) der Mann"
+ "*tür",
+ "#BED(tür,sa)",
+ "das %w3 Haus",
+ "das /w3 Haus"
};
CosmasTree.debug=true;
diff --git a/src/test/java/CosmasTreeTest.java b/src/test/java/CosmasTreeTest.java
index 9af11af..f111c56 100644
--- a/src/test/java/CosmasTreeTest.java
+++ b/src/test/java/CosmasTreeTest.java
@@ -69,6 +69,28 @@
map = ppt.getRequestMap().get("query").toString();
assertEquals(seq1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+ query="der Mann schläft";
+ String seq2 =
+ "{@type=korap:sequence, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:der, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mann, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:schläft, relation==}}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(seq2.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query="der Mann schläft lang";
+ String seq3 =
+ "{@type=korap:sequence, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:der, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mann, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:schläft, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:lang, relation==}}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(seq3.replaceAll(" ", ""), map.replaceAll(" ", ""));
}
@Test
@@ -83,13 +105,71 @@
map = ppt.getRequestMap().get("query").toString();
assertEquals(disj1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+ query="(Sonne scheint) oder Mond";
+ String disj2 =
+ "{@type=korap:group, relation=or, operands=[" +
+ "{@type=korap:sequence, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Sonne, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:scheint, relation==}}" +
+ "]}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mond, relation==}}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(disj2.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query="(Sonne scheint) oder (Mond scheint)";
+ String disj3 =
+ "{@type=korap:group, relation=or, operands=[" +
+ "{@type=korap:sequence, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Sonne, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:scheint, relation==}}" +
+ "]}," +
+ "{@type=korap:sequence, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mond, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:scheint, relation==}}" +
+ "]}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(disj3.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ }
+
+ @Test
+ public void testOPORAND() {
+ query="(Sonne oder Mond) und scheint";
+ String orand1 =
+ "{@type=korap:group, relation=and, operands=[" +
+ "{@type=korap:group, relation=or, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Sonne, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mond, relation==}}" +
+ "]}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:scheint, relation==}}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(orand1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query="scheint und (Sonne oder Mond)";
+ String orand2 =
+ "{@type=korap:group, relation=and, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:scheint, relation==}}," +
+ "{@type=korap:group, relation=or, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Sonne, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mond, relation==}}" +
+ "]}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(orand2.replaceAll(" ", ""), map.replaceAll(" ", ""));
}
@Test
public void testOPPROX() {
query="Sonne /+w1:4 Mond";
String prox1 =
- "{@type=korap:group, relation=distance, @subtype=w, min=1, max=4, operands=[" +
+ "{@type=korap:group, relation=distance, @subtype=incl, measure=w, min=1, max=4, operands=[" +
"{@type=korap:token, @value={@type=korap:term, @value=orth:Sonne, relation==}}," +
"{@type=korap:token, @value={@type=korap:term, @value=orth:Mond, relation==}}" +
"]}";
@@ -102,7 +182,45 @@
public void testOPIN() {
query="wegen #IN(L) <s>";
String opin1 =
- "{@type=korap:group, relation=in, position=L, operands=[" +
+ "{@type=korap:group, relation=include, position=L, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:wegen, relation==}}," +
+ "{@type=korap:element, @value=s}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(opin1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ // position argument is optional
+ query="wegen #IN <s>";
+ String opin2 =
+ "{@type=korap:group, relation=include, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:wegen, relation==}}," +
+ "{@type=korap:element, @value=s}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(opin2.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ // parentheses around 'wegen mir' are optional
+ query="wegen #IN (wegen mir)";
+ String opin3 =
+ "{@type=korap:group, relation=include, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:wegen, relation==}}," +
+ "{@type=korap:sequence, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:wegen, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:mir, relation==}}" +
+ "]}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(opin3.replaceAll(" ", ""), map.replaceAll(" ", ""));
+ }
+
+ @Test
+ public void testOPOV() {
+ query="wegen #OV <s>";
+ String opin1 =
+ "{@type=korap:group, relation=overlap, operands=[" +
"{@type=korap:token, @value={@type=korap:term, @value=orth:wegen, relation==}}," +
"{@type=korap:element, @value=s}" +
"]}";
@@ -131,5 +249,31 @@
// http://www.ids-mannheim.de/cosmas2/web-app/hilfe/suchanfrage/eingabe-zeile/syntax/rechts.html
// http://www.ids-mannheim.de/cosmas2/web-app/hilfe/suchanfrage/eingabe-zeile/thematische-bsp/bsp-satzlaenge.html
}
+
+
+ @Test
+ public void testELEM() {
+ // http://www.ids-mannheim.de/cosmas2/web-app/hilfe/suchanfrage/eingabe-zeile/syntax/elem.html
+ }
+
+ @Test
+ public void testOPALL() {
+
+ }
+
+ @Test
+ public void testOPNHIT() {
+
+ }
+
+ @Test
+ public void testOPBED() {
+
+ }
+
+ // TODO
+ /*
+ *
+ */
}