more Cosmas feautres: BEG, END, ALL, NHIT
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
index cf19977..5ca3cb8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
@@ -2,10 +2,12 @@
import java.util.Map;
+import de.ids_mannheim.korap.util.QueryException;
+
public abstract class AbstractSyntaxTree {
public abstract Map<String, Object> getRequestMap();
- public abstract void process(String query);
+ public abstract void process(String query) throws QueryException;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
index 0a34539..49e948d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
@@ -17,6 +17,7 @@
import de.ids_mannheim.korap.query.cosmas2.c2psLexer;
import de.ids_mannheim.korap.query.cosmas2.c2psParser;
import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
+import de.ids_mannheim.korap.util.QueryException;
/**
* Map representation of CosmasII syntax tree as returned by ANTLR
@@ -104,8 +105,9 @@
*
* @param tree The syntax tree as returned by ANTLR
* @param parser The ANTLR parser instance that generated the parse tree
+ * @throws QueryException
*/
- public CosmasTree(String query) {
+ public CosmasTree(String query) throws QueryException {
this.query = query;
process(query);
System.out.println(requestMap.get("query"));
@@ -144,8 +146,16 @@
}
@Override
- public void process(String query) {
- Tree tree = parseCosmasQuery(query);
+ public void process(String query) throws QueryException {
+ Tree tree = null;
+ try {
+ tree = parseCosmasQuery(query);
+ } catch (RecognitionException e) {
+ throw new QueryException("Your query could not be processed. Please make sure it is well-formed.");
+ } catch (NullPointerException e) {
+ throw new QueryException("Your query could not be processed. Please make sure it is well-formed.");
+ }
+
System.out.println("Processing Cosmas");
prepareContext();
processNode(tree);
@@ -342,6 +352,9 @@
proxGroup.put("relation", "distance");
objectStack.push(proxGroup);
stackedObjects++;
+ if (openNodeCats.get(1).equals("OPALL")) proxGroup.put("match", "all");
+ else if (openNodeCats.get(1).equals("OPNHIT")) proxGroup.put("match", "between");
+ else proxGroup.put("match", "operands");
ArrayList<Object> constraints = new ArrayList<Object>();
String subtype = typ.getChild(0).toStringTree().equals("PROX") ? "incl" : "excl";
proxGroup.put("@subtype", subtype);
@@ -355,7 +368,7 @@
String max = dist_list.getChild(0).getChild(1).getChild(1).toStringTree();
String meas = dist_list.getChild(0).getChild(2).getChild(0).toStringTree();
if (min.equals("VAL0")) {
- min=max;
+ min="0";
}
LinkedHashMap<String, Object> distance = new LinkedHashMap<String, Object>();
distance.put("@type", "korap:distance");
@@ -409,16 +422,14 @@
posgroup.put("@type", "korap:group");
String relation = nodeCat.equals("OPIN") ? "position" : "overlap";
posgroup.put("relation", relation);
- String position = "";
- // add optional position info, if present
- if (QueryUtils.getNodeCat(node.getChild(0)).equals("POS")) {
- String posinfo = node.getChild(0).getChild(0).toStringTree();
- position = posinfo.equals("L") ? "startswith" : "endswith";
+ if (nodeCat.equals("OPIN")) {
+ parseOPINOptions(node, posgroup);
} else {
- position = nodeCat.equals("OPIN") ? "contains" : "full";
+ parseOPOVOptions(node, posgroup);
}
- posgroup.put("position", position);
+
+
ArrayList<Object> posoperands = new ArrayList<Object>();
posgroup.put("operands", posoperands);
objectStack.push(posgroup);
@@ -430,7 +441,8 @@
putIntoSuperObject(shrinkgroup, 1);
}
- // wrap the first argument of an #IN operator in a class group
+
+ // Wrap the first argument of an #IN operator in a class group
if (nodeCat.equals("ARG1") && (openNodeCats.get(1).equals("OPIN") || openNodeCats.get(1).equals("OPOV"))) {
// Step I: create group
LinkedHashMap<String, Object> classGroup = new LinkedHashMap<String, Object>();
@@ -445,30 +457,16 @@
if (nodeCat.equals("OPALL") || nodeCat.equals("OPNHIT")) {
- // Step I: create group
- LinkedHashMap<String, Object> allgroup = new LinkedHashMap<String, Object>();
- allgroup.put("@type", "korap:group");
- String scope = nodeCat.equals("OPALL") ? "all" : "nhit";
- allgroup.put("relation", scope);
- // add optional position info, if present
- if (QueryUtils.getNodeCat(node.getChild(0)).equals("POS")) {
- allgroup.put("position", node.getChild(0).getChild(0).toStringTree());
- }
- allgroup.put("operands", new ArrayList<Object>());
- objectStack.push(allgroup);
- stackedObjects++;
-
- // Step II: decide where to put
- putIntoSuperObject(allgroup, 1);
+// proxGroupMatching = nodeCat.equals("OPALL") ? "all" : "exlcude";
}
if (nodeCat.equals("OPEND") || nodeCat.equals("OPBEG")) {
// Step I: create group
LinkedHashMap<String, Object> bedgroup = new LinkedHashMap<String, Object>();
bedgroup.put("@type", "korap:group");
- bedgroup.put("relation", "reduction");
- String reduction = nodeCat.equals("OPEND") ? "end" : "begin";
- bedgroup.put("reduction", reduction);
+ bedgroup.put("relation", "shrink");
+ String reduction = nodeCat.equals("OPBEG") ? "first" : "last";
+ bedgroup.put("shrink", reduction);
bedgroup.put("operands", new ArrayList<Object>());
objectStack.push(bedgroup);
stackedObjects++;
@@ -501,12 +499,24 @@
objectsToPop.push(stackedObjects);
- // recursion until 'query' node (root of tree) is processed
+ /*
+ ****************************************************************
+ ****************************************************************
+ * recursion until 'request' node (root of tree) is processed *
+ ****************************************************************
+ ****************************************************************
+ */
for (int i=0; i<node.getChildCount(); i++) {
Tree child = node.getChild(i);
processNode(child);
}
+ /*
+ **************************************************************
+ * Stuff that happens after processing the children of a node *
+ **************************************************************
+ */
+
// remove sequence from object stack if node is implicitly sequenced
if (sequencedNodes.size()>0) {
if (node == sequencedNodes.getFirst()) {
@@ -527,12 +537,6 @@
}
objectsToPop.pop();
-
-
-
-
-
-
if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
negate = false;
}
@@ -541,11 +545,116 @@
}
+
+
+ private void parseOPINOptions(Tree node, LinkedHashMap<String, Object> posgroup) {
+ Tree posnode = QueryUtils.getFirstChildWithCat(node, "POS");
+ Tree rangenode = QueryUtils.getFirstChildWithCat(node, "RANGE");
+ Tree exclnode = QueryUtils.getFirstChildWithCat(node, "EXCL");
+ Tree groupnode = QueryUtils.getFirstChildWithCat(node, "GROUP");
+ boolean negatePosition = false;
+
+ String position = "";
+ if (posnode != null) {
+ String value = posnode.getChild(0).toStringTree();
+ position = translateTextAreaArgument(value);
+ if (value.equals("N")) {
+ negatePosition = !negatePosition;
+ }
+ } else {
+ position = "contains";
+ }
+ posgroup.put("position", position);
+ position = openNodeCats.get(1).equals("OPIN") ? "contains" : "full";
+
+ if (rangenode != null) {
+ String range = rangenode.getChild(0).toStringTree();
+ posgroup.put("range", range.toLowerCase());
+ }
+
+ if (exclnode != null) {
+ if (exclnode.getChild(0).toStringTree().equals("YES")) {
+ negatePosition = !negatePosition;
+ }
+ }
+
+ if (negatePosition) {
+ posgroup.put("@subtype", "excl");
+ }
+
+ if (groupnode != null) {
+ String grouping = groupnode.getChild(0).toStringTree().equals("MAX") ? "true" : "false";
+ posgroup.put("grouping", grouping);
+ }
+ }
+
+ private void parseOPOVOptions(Tree node, LinkedHashMap<String, Object> posgroup) {
+ Tree posnode = QueryUtils.getFirstChildWithCat(node, "POS");
+ Tree exclnode = QueryUtils.getFirstChildWithCat(node, "EXCL");
+ Tree groupnode = QueryUtils.getFirstChildWithCat(node, "GROUP");
+
+ String position = "";
+ if (posnode != null) {
+ String value = posnode.getChild(0).toStringTree();
+ position = translateTextAreaArgument(value);
+ } else {
+ position = "any";
+ }
+ posgroup.put("position", position);
+ position = openNodeCats.get(1).equals("OPIN") ? "contains" : "full";
+
+ if (exclnode != null) {
+ if (exclnode.getChild(0).toStringTree().equals("YES")) {
+ posgroup.put("@subtype", "excl");
+ }
+ }
+ if (groupnode != null) {
+ String grouping = groupnode.getChild(0).toStringTree().equals("MAX") ? "true" : "false";
+ posgroup.put("grouping", grouping);
+ }
+
+ }
+
+ /**
+ * Translates the text area specifications (position option arguments) to terms used in serealisation.
+ * For the allowed argument types and their values for OPIN and OPOV, see
+ * http://www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/ARGUMENT_I.html or
+ * http://www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/ARGUMENT_O.html, respectively.
+ * @param argument
+ * @return
+ */
+ private String translateTextAreaArgument(String argument) {
+ String position = "";
+ switch (argument) {
+ case "L":
+ position = "startswith";
+ break;
+ case "R":
+ position = "endswith";
+ break;
+ case "F":
+ position = "leftrightmatch";
+ break;
+ case "FE":
+ position = "ident";
+ break;
+ case "FI":
+ position = "leftrightmatch-noident";
+ break;
+ case "N": // for OPIN only - exclusion constraint formulated in parseOPINOptions
+ position = "leftrightmatch";
+ break;
+ case "X": // for OPOV only
+ position = "residual";
+ break;
+ }
+ return position;
+ }
+
@SuppressWarnings("unchecked")
private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) {
if (objectStack.size()>objStackPosition) {
ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands");
- System.out.println("XXX: "+invertedOperandsLists.contains(topObjectOperands));
if (!invertedOperandsLists.contains(topObjectOperands)) {
topObjectOperands.add(object);
} else {
@@ -562,27 +671,20 @@
}
- private static Tree parseCosmasQuery(String p) {
+ private static Tree parseCosmasQuery(String p) throws RecognitionException {
Tree tree = null;
- ANTLRStringStream
- ss = new ANTLRStringStream(p);
- c2psLexer
- lex = new c2psLexer(ss);
- org.antlr.runtime.CommonTokenStream tokens = //v3
- new org.antlr.runtime.CommonTokenStream(lex);
+ ANTLRStringStream ss = new ANTLRStringStream(p);
+ c2psLexer lex = new c2psLexer(ss);
+ org.antlr.runtime.CommonTokenStream tokens = new org.antlr.runtime.CommonTokenStream(lex); //v3
cosmasParser = new c2psParser(tokens);
- c2psParser.c2ps_query_return
- c2Return = null;
- try
- {
- c2Return = cosmasParser.c2ps_query(); // statt t().
- }
- catch (RecognitionException e)
- {
- e.printStackTrace();
- }
+ c2psParser.c2ps_query_return c2Return = cosmasParser.c2ps_query(); // statt t().
// AST Tree anzeigen:
tree = (Tree)c2Return.getTree();
+
+ String treestring = tree.toStringTree();
+ if (treestring.contains("<mismatched token") || treestring.contains("<error") || treestring.contains("<unexpected")) {
+ throw new RecognitionException();
+ }
return tree;
}
@@ -591,7 +693,7 @@
*/
public static void main(String[] args) {
/*
- * For testing
+ * For debugging
*/
String[] queries = new String[] {
/* COSMAS 2 */
@@ -618,7 +720,7 @@
// "der Mann",
// "Sonne nicht (Mond Stern)",
// "Sonne /+w1:4 Mond",
-//// "wegen #IN(L) <s>"
+// "wegen #IN(L) <s>"
// "#BEG(<s>) /5w,s0 #END(<s>)",
// "der Mann",
// "Mond oder Sterne",
@@ -649,6 +751,14 @@
"wegen #OV #ELEM(S)",
"wegen #IN #ELEM(S)",
// "Sonne oder Mond"
+ "wegen #IN('FE,ALL,%,MIN') <s>",
+ "wegen #IN('FE,ALL,MIN') <s>",
+ "wegen #IN <s>",
+ "wegen #IN(L) <s>",
+ "wegen #IN(%, L) <s>",
+ "wegen #OV('FE,%,MIN') <s>",
+ "#BEG(der /+w1:2 Mann)",
+ "#BEG(der Mann /10w Mann)"
/*
* TODO
* http://www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/ARGUMENT_I.html
@@ -660,8 +770,15 @@
for (String q : queries) {
try {
System.out.println(q);
- System.out.println(parseCosmasQuery(q).toStringTree());
- CosmasTree act = new CosmasTree(q);
+ try {
+ System.out.println(parseCosmasQuery(q).toStringTree());
+ @SuppressWarnings("unused")
+ CosmasTree act = new CosmasTree(q);
+ } catch (RecognitionException e) {
+ e.printStackTrace();
+ } catch (QueryException e) {
+ e.printStackTrace();
+ }
System.out.println();
} catch (NullPointerException npe) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
index fd0c34a..bd58449 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
@@ -362,8 +362,20 @@
// add token to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
// take into account a possible 'occ' child
if (node.getParent().getChildCount()>1) {
- ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(onTopOfObjectStack).get("operands");
- topSequenceOperands.add(token);
+ if (node.getText().equals("[]")) {
+ LinkedHashMap<String, Object> sequence = objectStack.get(onTopOfObjectStack);
+ String offsetStr = (String) sequence.get("offset");
+ if (offsetStr == null) {
+ sequence.put("offset", "1");
+ } else {
+ Integer offset = Integer.parseInt(offsetStr);
+ sequence.put("offset", offset+1);
+ }
+
+ } else {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(onTopOfObjectStack).get("operands");
+ topSequenceOperands.add(token);
+ }
}
}
@@ -503,18 +515,23 @@
objectStack.push(token);
stackedObjects++;
// Step II: fill object (token values) and put into containing sequence
- token.put("@type", "korap:token");
- String word = node.getChild(0).toStringTree(poliqarpParser);
- LinkedHashMap<String,Object> tokenValues = new LinkedHashMap<String,Object>();
- token.put("@value", tokenValues);
- tokenValues.put("@type", "korap:term");
- tokenValues.put("@value", "orth:"+word);
- tokenValues.put("relation", "=");
- // add token to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the values from "field")
- if (node.getParent().getChildCount()>1) {
- ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
- topSequenceOperands.add(token);
+ if (node.getText().equals("[]")) {
+
+ } else {
+ token.put("@type", "korap:token");
+ String word = node.getChild(0).toStringTree(poliqarpParser);
+ LinkedHashMap<String,Object> tokenValues = new LinkedHashMap<String,Object>();
+ token.put("@value", tokenValues);
+ tokenValues.put("@type", "korap:term");
+ tokenValues.put("@value", "orth:"+word);
+ tokenValues.put("relation", "=");
+ // add token to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the values from "field")
+ if (node.getParent().getChildCount()>1) {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+ topSequenceOperands.add(token);
+ }
}
+ System.out.println(stackedObjects+" "+objectStack);
visited.add(node.getChild(0));
}
@@ -847,7 +864,8 @@
// "([base=bar][base=foo])*",
"([base=a]^[base=b])|[base=c]",
"Baum | Stein",
- "Haus/i"
+ "Haus/i",
+ "startswith(<s>,[]+[base=der][base=Mann])",
};
PoliqarpPlusTree.debug=true;
for (String q : queries) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
index 78300cf..db60b9e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
@@ -65,6 +65,25 @@
return false;
}
+ public static List<Tree> getChildrenWithCat(Tree node, String nodeCat) {
+ ArrayList<Tree> children = new ArrayList<Tree>();
+ for (int i=0; i<node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+ children.add(node.getChild(i));
+ }
+ }
+ return children;
+ }
+
+ public static Tree getFirstChildWithCat(Tree node, String nodeCat) {
+ for (int i=0; i<node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+ return node.getChild(i);
+ }
+ }
+ return null;
+ }
+
public static void checkUnbalancedPars(String q) throws QueryException {
int openingPars = StringUtils.countMatches(q, "(");
int closingPars = StringUtils.countMatches(q, ")");