integrated Cosmas2 antlr sources into modules, jsonify Cosmas, wrote some initial tests
diff --git a/pom.xml b/pom.xml
index cdd1185..b0df0c0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -41,6 +41,11 @@
<artifactId>KorAP-PoliqarpParser</artifactId>
<version>0.05</version>
</dependency>
+ <dependency>
+ <groupId>KorAP-modules</groupId>
+ <artifactId>KorAP-Cosmas2Parser</artifactId>
+ <version>0.01</version>
+ </dependency>
<dependency>
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-lucene-index</artifactId>
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
index 4ea14eb..58a5611 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
@@ -5,15 +5,17 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.Tree;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.cosmas2.c2psLexer;
import de.ids_mannheim.korap.query.cosmas2.c2psParser;
+import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
/**
* Map representation of CosmasII syntax tree as returned by ANTLR
@@ -22,6 +24,8 @@
*/
public class CosmasTree extends AbstractSyntaxTree {
+ Logger log = LoggerFactory.getLogger(CosmasTree.class);
+
private static c2psParser cosmasParser;
/*
* Following collections have the following functions:
@@ -40,6 +44,18 @@
LinkedHashMap<String,Object> fieldMap;
ArrayList<List<Object>> distantTokens;
/**
+ * Keeps track of active tokens.
+ */
+ LinkedList<LinkedHashMap<String,Object>> tokenStack = new LinkedList<LinkedHashMap<String,Object>>();
+ /**
+ * Marks the currently active token in order to know where to add flags (might already have been taken away from token stack).
+ */
+ LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>();
+ /**
+ * Keeps track of active object.
+ */
+ LinkedList<LinkedHashMap<String,Object>> objectStack = new LinkedList<LinkedHashMap<String,Object>>();
+ /**
* Makes it possible to store several distantTokenGroups
*/
LinkedList<ArrayList<List<Object>>> distantTokensStack = new LinkedList<ArrayList<List<Object>>>();
@@ -47,8 +63,6 @@
* Field for repetition query (Kleene + or * operations, or min/max queries: {2,4}
*/
String repetition = "";
- int tokenCount=0;
- int tokenGroupCount=0;
/**
* Keeps track of open node categories
*/
@@ -58,9 +72,9 @@
*/
LinkedList<ArrayList<Object>> openFieldGroups = new LinkedList<ArrayList<Object>>();
/**
- * Global control structure for tokenGroups, keeps track of open tokenGroups.
+ * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)}
*/
- LinkedList<LinkedHashMap<String,Object>> tokenGroupsStack = new LinkedList<LinkedHashMap<String,Object>>();
+ LinkedList<Integer> objectsToPop = new LinkedList<Integer>();
/**
* Flag that indicates whether token fields or meta fields are currently being processed
*/
@@ -74,6 +88,10 @@
* Keeps track of all visited nodes in a tree
*/
List<Tree> visited = new ArrayList<Tree>();
+
+ Integer stackedObjects = 0;
+
+ private static boolean debug = false;
/**
@@ -84,7 +102,7 @@
public CosmasTree(String query) {
this.query = query;
process(query);
- System.out.println(requestMap);
+ System.out.println(requestMap.get("query"));
}
@Override
@@ -92,13 +110,42 @@
return this.requestMap;
}
+ private void prepareContext() {
+ LinkedHashMap<String,Object> context = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> operands = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> relation = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> classMap = new LinkedHashMap<String,Object>();
+
+ operands.put("@id", "korap:operands");
+ operands.put("@container", "@list");
+
+ relation.put("@id", "korap:relation");
+ relation.put("@type", "korap:relation#types");
+
+ classMap.put("@id", "korap:class");
+ classMap.put("@type", "xsd:integer");
+
+ context.put("korap", "http://korap.ids-mannheim.de/ns/query");
+ context.put("@language", "de");
+ context.put("operands", operands);
+ context.put("relation", relation);
+ context.put("class", classMap);
+ context.put("query", "korap:query");
+ context.put("filter", "korap:filter");
+ context.put("meta", "korap:meta");
+
+ requestMap.put("@context", context);
+ }
+
@Override
public void process(String query) {
Tree tree = parseCosmasQuery(query);
System.out.println("Processing Cosmas");
+ prepareContext();
processNode(tree);
}
+ @SuppressWarnings("unchecked")
private void processNode(Tree node) {
// Top-down processing
@@ -106,36 +153,45 @@
else visited.add(node);
- String nodeCat = getNodeCat(node);
+ String nodeCat = QueryUtils.getNodeCat(node);
openNodeCats.push(nodeCat);
+ stackedObjects = 0;
+ if (debug) {
+// System.out.println(distantTokensStack);
+ System.err.println(" "+objectStack);
+ System.out.println(openNodeCats);
+ }
- System.out.println(openNodeCats);
- System.out.println(distantTokensStack);
/* ***************************************
* Processing individual node categories *
*****************************************/
// C2QP is tree root
if (nodeCat.equals("C2PQ")) {
- queryMap = new LinkedHashMap<String,Object>();
- requestMap.put("query", queryMap);
+ if (node.getChildCount()>1) {
+ // Step I: create sequence
+ LinkedHashMap<String, Object> sequence = new LinkedHashMap<String, Object>();
+ sequence.put("@type", "korap:sequence");
+ sequence.put("operands", new ArrayList<Object>());
+ objectStack.push(sequence);
+ stackedObjects++;
+ // Step II: decide where to put sequence
+ requestMap.put("query", sequence);
+
+ }
}
// Nodes introducing tokens. Process all in the same manner, except for the fieldMap entry
if (nodeCat.equals("OPWF") || nodeCat.equals("OPLEM") || nodeCat.equals("OPMORPH")) {
- if (tokenGroupsStack.isEmpty()) {
- tokenGroup = new LinkedHashMap<String, Object>();
- tokenCount=0;
- tokenGroupCount++;
- queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
- tokenGroupsStack.push(tokenGroup);
- } else {
- tokenGroup = tokenGroupsStack.getFirst();
- }
-
+ //Step I: get info
+ LinkedHashMap<String, Object> token = new LinkedHashMap<String, Object>();
+ token.put("@type", "korap:token");
+ tokenStack.push(token);
+ objectStack.push(token);
+ stackedObjects++;
// check if this token comes after a distant operator (like "/+w3:4") and if yes,
// insert the empty tokenGroups before the current token
if (openNodeCats.get(1).equals("ARG2")) {
@@ -144,8 +200,7 @@
// if (tokenGroupsStack.isEmpty()) {
// queryMap.put("token"+tokenGroupCount+"_1", distantTokenGroup);
// } else {
- tokenCount++;
- tokenGroupsStack.getFirst().put("token"+tokenGroupCount+"_"+tokenCount, distantTokenGroup);
+ tokenStack.getFirst().put("token", distantTokenGroup);
// }
// tokenGroupCount++;
}
@@ -155,31 +210,51 @@
// negate = true;
// }
}
+ LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
+ token.put("@value", fieldMap);
- fieldGroup = new ArrayList<Object>();
- tokenCount++;
- tokenGroup.put("token"+tokenGroupCount+"_"+tokenCount, fieldGroup);
-
- fieldMap = new LinkedHashMap<String, Object>();
- fieldGroup.add(fieldMap);
-
+ fieldMap.put("@type", "korap:term");
// make category-specific fieldMap entry
+ String value = "";
if (nodeCat.equals("OPWF")) {
- fieldMap.put("form", node.getChild(0).toStringTree());
+ value = "orth:"+node.getChild(0).toStringTree().replaceAll("\"", "");
}
if (nodeCat.equals("OPLEM")) {
- fieldMap.put("lemma", node.getChild(0).toStringTree());
+ value = "base:"+node.getChild(0).toStringTree().replaceAll("\"", "");
}
if (nodeCat.equals("OPMORPH")) {
- fieldMap.put("morph", node.toStringTree());
+ value = "morph:"+node.toStringTree();
//TODO decompose morphology query
}
+ fieldMap.put("@value", value);
// negate field (see above)
if (negate) {
fieldMap.put("relation", "!=");
+ } else {
+ fieldMap.put("relation", "=");
}
-// tokenGroupsStack.push(tokenGroup);
+ //Step II: decide where to put
+ if (objectStack.size()>1) {
+ ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+ topObjectOperands.add(token);
+ } else {
+ requestMap.put("query", token);
+ }
+ }
+
+ if (nodeCat.equals("OPLABEL")) {
+ // Step I: create element
+ LinkedHashMap<String, Object> elem = new LinkedHashMap<String, Object>();
+ elem.put("@type", "korap:element");
+ elem.put("@value", node.getChild(0).toStringTree().replaceAll("<|>", ""));
+ //Step II: decide where to put
+ if (objectStack.size()>0) {
+ ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(0).get("operands");
+ topObjectOperands.add(elem);
+ } else {
+ requestMap.put("query", elem);
+ }
}
// negate every token that's under OPNOT > ARG2
@@ -187,74 +262,105 @@
negate = true;
}
- if (nodeCat.equals("OPOR")) {
- tokenGroup = new LinkedHashMap<String, Object>();
- tokenCount=0;
- tokenGroupCount++;
- if (tokenGroupsStack.isEmpty()) {
- queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
+ if (nodeCat.equals("OPOR") || nodeCat.equals("OPAND")) {
+ // Step I: create group
+ LinkedHashMap<String, Object> disjunction = new LinkedHashMap<String, Object>();
+ disjunction.put("@type", "korap:group");
+ String relation = nodeCat.equals("OPOR") ? "or" : "and";
+ disjunction.put("relation", relation);
+ disjunction.put("operands", new ArrayList<Object>());
+ objectStack.push(disjunction);
+ stackedObjects++;
+ if (tokenStack.isEmpty()) {
+ queryMap.put("tokenGroup", tokenGroup);
} else {
- tokenGroupsStack.getFirst().put("tokenGroup"+tokenGroupCount, tokenGroup);
+ tokenStack.getFirst().put("tokenGroup", tokenGroup);
}
tokenGroup.put("type", "disj");
- tokenGroupsStack.push(tokenGroup);
- }
-
- if (nodeCat.equals("OPAND")) {
- tokenGroup = new LinkedHashMap<String, Object>();
- tokenCount=0;
- tokenGroupCount++;
- if (tokenGroupsStack.isEmpty()) {
- queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
+ tokenStack.push(tokenGroup);
+
+ // Step II: decide where to put
+ if (objectStack.size()>1) {
+ ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+ topObjectOperands.add(disjunction);
} else {
- tokenGroupsStack.getFirst().put("tokenGroup"+tokenGroupCount, tokenGroup);
+ requestMap.put("query", disjunction);
}
- tokenGroup.put("type", "conj");
- tokenGroupsStack.push(tokenGroup);
}
if (nodeCat.equals("OPPROX")) {
- distantTokens = new ArrayList<List<Object>>();
+ // Step I: create group
+ LinkedHashMap<String, Object> proxGroup = new LinkedHashMap<String, Object>();
+ proxGroup.put("@type", "korap:group");
+ proxGroup.put("relation", "distance");
+
+ // collect info
Tree prox_opts = node.getChild(0);
Tree typ = prox_opts.getChild(0);
- System.err.println(typ.getChild(0).toStringTree());
Tree dist_list = prox_opts.getChild(1);
- // get relevant information
String direction = dist_list.getChild(0).getChild(0).getChild(0).toStringTree();
String min = dist_list.getChild(0).getChild(1).getChild(0).toStringTree();
String max = dist_list.getChild(0).getChild(1).getChild(1).toStringTree();
+ String meas = dist_list.getChild(0).getChild(2).getChild(0).toStringTree();
+
if (min.equals("VAL0")) {
min=max;
}
- // create empty tokens and put them on the stack to place them between arg1 and arg2
- for (int i=0; i<Integer.parseInt(max)-1; i++) {
- ArrayList<Object> emptyToken = new ArrayList<Object>();
- LinkedHashMap<String,Object> emptyFieldMap = new LinkedHashMap<String,Object>();
- emptyToken.add(emptyFieldMap);
- tokenGroup.put("token"+tokenGroupCount+"_1", emptyToken);
- // mark all tokens between min and max optional
- if (i>=Integer.parseInt(min)) {
- emptyFieldMap.put("optional", "true");
- }
- distantTokens.add(emptyToken);
+
+ proxGroup.put("@subtype", meas);
+ proxGroup.put("min", min);
+ proxGroup.put("max", max);
+ proxGroup.put("operands", new ArrayList<Object>());
+ objectStack.push(proxGroup);
+ stackedObjects++;
+
+ // Step II: decide where to put
+ if (objectStack.size()>1) {
+ ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+ topObjectOperands.add(proxGroup);
+ } else {
+ requestMap.put("query", proxGroup);
}
- distantTokensStack.push(distantTokens);
}
+ if (nodeCat.equals("OPIN")) {
+ // Step I: create group
+ LinkedHashMap<String, Object> ingroup = new LinkedHashMap<String, Object>();
+ ingroup.put("@type", "korap:group");
+ ingroup.put("relation", "in");
+ ingroup.put("position", node.getChild(0).getChild(0).toStringTree());
+ ingroup.put("operands", new ArrayList<Object>());
+ objectStack.push(ingroup);
+ stackedObjects++;
+
+ // Step II: decide where to put
+ if (objectStack.size()>1) {
+ ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+ topObjectOperands.add(ingroup);
+ } else {
+ requestMap.put("query", ingroup);
+ }
+ }
-// System.err.println(tokenGroupsStack.size()+" "+tokenGroupsStack);
+ objectsToPop.push(stackedObjects);
+
// recursion until 'query' node (root of tree) is processed
for (int i=0; i<node.getChildCount(); i++) {
Tree child = node.getChild(i);
processNode(child);
}
+ for (int i=0; i<objectsToPop.get(0); i++) {
+ objectStack.pop();
+ }
+ objectsToPop.pop();
+
if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
negate = false;
}
if (nodeCat.equals("OPAND") || nodeCat.equals("OPOR")) {
- tokenGroupsStack.pop();
+ tokenStack.pop();
// tokenGroupCount--;
// tokenCount=0;
}
@@ -263,21 +369,8 @@
}
- /**
- * Returns the category (or 'label') of the root of a ParseTree.
- * @param node
- * @return
- */
- public String getNodeCat(Tree node) {
- String nodeCat = node.toStringTree();
- Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
- Matcher m = p.matcher(node.toStringTree());
- if (m.find()) {
- nodeCat = m.group(1);
- }
- return nodeCat;
- }
+
private static Tree parseCosmasQuery(String p) {
Tree tree = null;
ANTLRStringStream
@@ -325,12 +418,19 @@
// "Sonne nicht (Mond Stern)",
// "Sonne nicht (Mond oder Stern)",
// "Sonne /+w1:4 Mond",
- "(sonne und mond) oder sterne",
- "(stern oder (sonne und mond)) und MORPH(V PCP)",
- "(sonne und (stern oder mond)) /+w2 luna???",
- "(Tag /+w2 $offenen) /+w1 Tür",
- "heißt /+w2 \"und\" ,"
+// "(sonne und mond) oder sterne",
+// "(stern oder (sonne und mond)) und MORPH(V PCP)",
+// "(sonne und (stern oder mond)) /+w2 luna???",
+// "(Tag /+w2 $offenen) /+w1 Tür",
+// "heißt /+w2 \"und\" ,"
+ "der",
+ "der Mann",
+ "Sonne nicht (Mond Stern)",
+ "Sonne /+w1:4 Mond",
+// "wegen #IN(L) <s>"
+ "#BEG(<s>) /5w,s0 #END(<s>)"
};
+ CosmasTree.debug=true;
for (String q : queries) {
try {
System.out.println(q);
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
index 4a9f4a2..f291ead 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
@@ -7,8 +7,6 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.BailErrorStrategy;
@@ -18,7 +16,6 @@
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
-import org.apache.commons.lang.StringUtils;
import de.ids_mannheim.korap.query.PoliqarpPlusLexer;
import de.ids_mannheim.korap.query.PoliqarpPlusParser;
@@ -233,7 +230,7 @@
isAligned=true;
}
- String nodeCat = getNodeCat(node);
+ String nodeCat = QueryUtils.getNodeCat(node);
openNodeCats.push(nodeCat);
stackedObjects = 0;
@@ -260,10 +257,10 @@
cqHasOccSibling = false;
cqHasOccChild = false;
// disregard empty segments in simple queries (parsed by ANTLR as empty cq_segments)
- ignoreCq_segment = (node.getChildCount() == 1 && (node.getChild(0).toStringTree(poliqarpParser).equals(" ") || getNodeCat(node.getChild(0)).equals("spanclass") || getNodeCat(node.getChild(0)).equals("position")));
+ ignoreCq_segment = (node.getChildCount() == 1 && (node.getChild(0).toStringTree(poliqarpParser).equals(" ") || QueryUtils.getNodeCat(node.getChild(0)).equals("spanclass") || QueryUtils.getNodeCat(node.getChild(0)).equals("position")));
// ignore this node if it only serves as an aligned sequence container
if (node.getChildCount()>1) {
- if (getNodeCat(node.getChild(1)).equals("cq_segments") && hasChild(node.getChild(1), "align")) {
+ if (QueryUtils.getNodeCat(node.getChild(1)).equals("cq_segments") && QueryUtils.hasChild(node.getChild(1), "align")) {
ignoreCq_segment = true;
}
}
@@ -271,11 +268,11 @@
LinkedHashMap<String,Object> sequence = new LinkedHashMap<String,Object>();
// Step 0: cq_segments has 'occ' child -> introduce group as super group to the sequence/token/group
// this requires creating a group and inserting it at a suitable place
- if (node.getParent().getChildCount()>curChildIndex+2 && getNodeCat(node.getParent().getChild(curChildIndex+2)).equals("occ")) {
+ if (node.getParent().getChildCount()>curChildIndex+2 && QueryUtils.getNodeCat(node.getParent().getChild(curChildIndex+2)).equals("occ")) {
cqHasOccSibling = true;
createOccGroup(node);
}
- if (getNodeCat(node.getChild(node.getChildCount()-1)).equals("occ")) {
+ if (QueryUtils.getNodeCat(node.getChild(node.getChildCount()-1)).equals("occ")) {
cqHasOccChild = true;
}
// Step I: decide type of element (one or more elements? -> token or sequence)
@@ -290,9 +287,9 @@
} else {
// if only child, make the sequence a mere korap:token...
// ... but only if it has a real token/element beneath it
- if (getNodeCat(node.getChild(0)).equals("cq_segment")
- || getNodeCat(node.getChild(0)).equals("sq_segment")
- || getNodeCat(node.getChild(0)).equals("element") ) {
+ if (QueryUtils.getNodeCat(node.getChild(0)).equals("cq_segment")
+ || QueryUtils.getNodeCat(node.getChild(0)).equals("sq_segment")
+ || QueryUtils.getNodeCat(node.getChild(0)).equals("element") ) {
sequence.put("@type", "korap:token");
tokenStack.push(sequence);
stackedTokens++;
@@ -431,7 +428,7 @@
}
String value = "";
ParseTree valNode = node.getChild(2);
- String valType = getNodeCat(valNode);
+ String valType = QueryUtils.getNodeCat(valNode);
fieldMap.put("@type", "korap:term");
if (valType.equals("simple_query")) {
value = valNode.getChild(0).getChild(0).toStringTree(poliqarpParser); //e.g. (simple_query (sq_segment foo))
@@ -470,7 +467,7 @@
stackedFields++;
// Step I: get operator (& or |)
ParseTree operatorNode = node.getChild(1).getChild(0);
- String operator = getNodeCat(operatorNode);
+ String operator = QueryUtils.getNodeCat(operatorNode);
String relation = operator.equals("&") ? "and" : "or";
if (negField) {
relation = relation.equals("or") ? "and": "or";
@@ -605,7 +602,7 @@
String id = "0";
// Step I: get info
boolean hasId = false;
- if (getNodeCat(node.getChild(1)).equals("spanclass_id")) {
+ if (QueryUtils.getNodeCat(node.getChild(1)).equals("spanclass_id")) {
hasId = true;
id = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
id = id.substring(0, id.length()-1); // remove trailing colon ':'
@@ -639,7 +636,7 @@
stackedObjects++;
ArrayList<Object> posOperands = new ArrayList<Object>();
// Step I: get info
- String relation = getNodeCat(node.getChild(0));
+ String relation = QueryUtils.getNodeCat(node.getChild(0));
positionGroup.put("@type", "korap:group");
positionGroup.put("relation", "position");
positionGroup.put("position", relation.toLowerCase());
@@ -664,8 +661,8 @@
ArrayList<Object> shrinkOperands = new ArrayList<Object>();
// Step I: get info
String operandClass = "0";
- String type = getNodeCat(node.getChild(0));
- if (getNodeCat(node.getChild(2)).equals("spanclass_id")) {
+ String type = QueryUtils.getNodeCat(node.getChild(0));
+ if (QueryUtils.getNodeCat(node.getChild(2)).equals("spanclass_id")) {
operandClass = node.getChild(2).getChild(0).toStringTree(poliqarpParser);
operandClass = operandClass.substring(0, operandClass.length()-1); // remove trailing colon ':'
// only allow class id up to 255
@@ -703,7 +700,7 @@
// flags for case sensitivity and whole-word-matching
if (nodeCat.equals("flag")) {
- String flag = getNodeCat(node.getChild(0)).substring(1); //substring removes leading slash '/'
+ String flag = QueryUtils.getNodeCat(node.getChild(0)).substring(1); //substring removes leading slash '/'
// add to current token's value
((HashMap<String, Object>) curToken.get("@value")).put("flag", flag);
}
@@ -715,9 +712,9 @@
metaFilter.put("@type", "korap:meta");
}
- if (nodeCat.equals("within") && !getNodeCat(node.getParent()).equals("position")) {
+ if (nodeCat.equals("within") && !QueryUtils.getNodeCat(node.getParent()).equals("position")) {
ParseTree domainNode = node.getChild(2);
- String domain = getNodeCat(domainNode);
+ String domain = QueryUtils.getNodeCat(domainNode);
LinkedHashMap<String,Object> curObject = (LinkedHashMap<String, Object>) objectStack.getFirst();
curObject.put("within", domain);
visited.add(node.getChild(0));
@@ -788,54 +785,11 @@
}
}
- /**
- * Returns the category (or 'label') of the root of a (sub-)ParseTree.
- * @param node
- * @return
- */
- public String getNodeCat(ParseTree node) {
- String nodeCat = node.toStringTree(poliqarpParser);
- Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
- Matcher m = p.matcher(node.toStringTree(poliqarpParser));
- if (m.find()) {
- nodeCat = m.group(1);
- }
- return nodeCat;
- }
- /**
- * Tests whether a certain node has a child by a certain name
- * @param node The parent node.
- * @param childCat The category of the potential child.
- * @return true iff one or more children belong to the specified category
- */
- public boolean hasChild(ParseTree node, String childCat) {
- for (int i=0; i<node.getChildCount(); i++) {
- if (getNodeCat(node.getChild(i)).equals(childCat)) {
- return true;
- }
- }
- return false;
- }
-
- private static void checkUnbalancedPars(String q) throws QueryException {
- int openingPars = StringUtils.countMatches(q, "(");
- int closingPars = StringUtils.countMatches(q, ")");
- int openingBrkts = StringUtils.countMatches(q, "[");
- int closingBrkts = StringUtils.countMatches(q, "]");
- int openingBrcs = StringUtils.countMatches(q, "{");
- int closingBrcs = StringUtils.countMatches(q, "}");
- if (openingPars != closingPars) throw new QueryException(
- "Your query string contains an unbalanced number of parantheses.");
- if (openingBrkts != closingBrkts) throw new QueryException(
- "Your query string contains an unbalanced number of brackets.");
- if (openingBrcs != closingBrcs) throw new QueryException(
- "Your query string contains an unbalanced number of braces.");
-
- }
+
private static ParserRuleContext parsePoliqarpQuery (String p) throws QueryException {
- checkUnbalancedPars(p);
+ QueryUtils.checkUnbalancedPars(p);
Lexer poliqarpLexer = new PoliqarpPlusLexer((CharStream)null);
ParserRuleContext tree = null;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
index 6ea8db0..78300cf 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
@@ -4,6 +4,14 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.antlr.runtime.tree.Tree;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.apache.commons.lang.StringUtils;
+
+import de.ids_mannheim.korap.util.QueryException;
/**
* @author hanl
@@ -11,7 +19,66 @@
*/
public class QueryUtils {
-
+ /**
+ * Returns the category (or 'label') of the root of a (sub-)ParseTree.
+ * @param node
+ * @return
+ */
+ public static String getNodeCat(ParseTree node) {
+ String nodeCat = node.toStringTree(PoliqarpPlusTree.poliqarpParser);
+ Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+ Matcher m = p.matcher(node.toStringTree(PoliqarpPlusTree.poliqarpParser));
+ if (m.find()) {
+ nodeCat = m.group(1);
+ }
+ return nodeCat;
+ }
+
+ /**
+ * Returns the category (or 'label') of the root of a ParseTree.
+ * @param node
+ * @return
+ */
+ public static String getNodeCat(Tree node) {
+ String nodeCat = node.toStringTree();
+ Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+ Matcher m = p.matcher(node.toStringTree());
+ if (m.find()) {
+ nodeCat = m.group(1);
+ }
+ return nodeCat;
+ }
+
+
+ /**
+ * Tests whether a certain node has a child by a certain name
+ * @param node The parent node.
+ * @param childCat The category of the potential child.
+ * @return true iff one or more children belong to the specified category
+ */
+ public static boolean hasChild(ParseTree node, String childCat) {
+ for (int i=0; i<node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(childCat)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public static void checkUnbalancedPars(String q) throws QueryException {
+ int openingPars = StringUtils.countMatches(q, "(");
+ int closingPars = StringUtils.countMatches(q, ")");
+ int openingBrkts = StringUtils.countMatches(q, "[");
+ int closingBrkts = StringUtils.countMatches(q, "]");
+ int openingBrcs = StringUtils.countMatches(q, "{");
+ int closingBrcs = StringUtils.countMatches(q, "}");
+ if (openingPars != closingPars) throw new QueryException(
+ "Your query string contains an unbalanced number of parantheses.");
+ if (openingBrkts != closingBrkts) throw new QueryException(
+ "Your query string contains an unbalanced number of brackets.");
+ if (openingBrcs != closingBrcs) throw new QueryException(
+ "Your query string contains an unbalanced number of braces.");
+ }
public static String buildCypherQuery(String cypher, String ctypel, String ctyper,
diff --git a/src/test/java/CosmasTreeTest.java b/src/test/java/CosmasTreeTest.java
new file mode 100644
index 0000000..e4d8c5a
--- /dev/null
+++ b/src/test/java/CosmasTreeTest.java
@@ -0,0 +1,119 @@
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+
+import de.ids_mannheim.korap.query.serialize.CosmasTree;
+import de.ids_mannheim.korap.query.serialize.PoliqarpPlusTree;
+import de.ids_mannheim.korap.util.QueryException;
+
+public class CosmasTreeTest {
+
+ CosmasTree ppt;
+ String map;
+ String query;
+
+ private boolean equalsContent(String str, Object map) {
+ str = str.replaceAll(" ", "");
+ String mapStr = map.toString().replaceAll(" ", "");
+ return str.equals(mapStr);
+ }
+
+ private boolean equalsQueryContent(String res, String query) throws QueryException {
+ res = res.replaceAll(" ", "");
+ ppt = new CosmasTree(query);
+ String queryMap = ppt.getRequestMap().get("query").toString().replaceAll(" ", "");
+ return res.equals(queryMap);
+ }
+
+ @Test
+ public void testContext() throws QueryException {
+ String contextString = "{korap=http://korap.ids-mannheim.de/ns/query, @language=de, operands={@id=korap:operands, @container=@list}, relation={@id=korap:relation, @type=korap:relation#types}, class={@id=korap:class, @type=xsd:integer}, query=korap:query, filter=korap:filter, meta=korap:meta}";
+ ppt = new CosmasTree("Test");
+ assertTrue(equalsContent(contextString, ppt.getRequestMap().get("@context")));
+ }
+
+
+ @Test
+ public void testSingleToken() {
+ query="der";
+ String single1 =
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:der, relation==}}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(single1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query="Mann";
+ String single2 =
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mann, relation==}}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(single2.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query="&Mann";
+ String single3 =
+ "{@type=korap:token, @value={@type=korap:term, @value=base:Mann, relation==}}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(single3.replaceAll(" ", ""), map.replaceAll(" ", ""));
+ }
+
+ @Test
+ public void testSequence() {
+ query="der Mann";
+ String seq1 =
+ "{@type=korap:sequence, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:der, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mann, relation==}}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(seq1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ }
+
+ @Test
+ public void testOPOR() throws QueryException {
+ query="Sonne oder Mond";
+ String disj1 =
+ "{@type=korap:group, relation=or, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Sonne, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mond, relation==}}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(disj1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ }
+
+ @Test
+ public void testOPPROX() {
+ query="Sonne /+w1:4 Mond";
+ String prox1 =
+ "{@type=korap:group, relation=distance, @subtype=w, min=1, max=4, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Sonne, relation==}}," +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:Mond, relation==}}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(prox1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+ }
+
+ @Test
+ public void testOPIN() {
+ query="wegen #IN(L) <s>";
+ String disj1 =
+ "{@type=korap:group, relation=in, position=L, operands=[" +
+ "{@type=korap:token, @value={@type=korap:term, @value=orth:wegen, relation==}}," +
+ "{@type=korap:element, @value=s}" +
+ "]}";
+ ppt = new CosmasTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(disj1.replaceAll(" ", ""), map.replaceAll(" ", ""));
+ }
+
+ @Test
+ public void testOPNOT() {
+
+ }
+}
+