restructured classes: specific AST classes for Antlr3 and Antlr4 to provide routines for extending classes
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
index 5ca3cb8..562ce66 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
@@ -1,13 +1,133 @@
package de.ids_mannheim.korap.query.serialize;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.antlr.runtime.tree.Tree;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.apache.commons.lang.StringUtils;
import de.ids_mannheim.korap.util.QueryException;
public abstract class AbstractSyntaxTree {
-
+
public abstract Map<String, Object> getRequestMap();
public abstract void process(String query) throws QueryException;
+ protected LinkedHashMap<String, Object> makeSpan() {
+ LinkedHashMap<String, Object> span = new LinkedHashMap<String, Object>();
+ span.put("@type", "korap:span");
+ return span;
+ }
+
+ protected LinkedHashMap<String, Object> makeTerm() {
+ LinkedHashMap<String, Object> term = new LinkedHashMap<String, Object>();
+ term.put("@type", "korap:term");
+ return term;
+ }
+
+ protected LinkedHashMap<String, Object> makeToken() {
+ LinkedHashMap<String, Object> token = new LinkedHashMap<String, Object>();
+ token.put("@type", "korap:token");
+ return token;
+ }
+
+ protected LinkedHashMap<String, Object> makeGroup(String operation) {
+ LinkedHashMap<String, Object> group = new LinkedHashMap<String, Object>();
+ group.put("@type", "korap:group");
+ group.put("operation", "operation:"+operation);
+ group.put("operands", new ArrayList<Object>());
+ return group;
+ }
+
+ protected void addOperandsToGroup(LinkedHashMap<String, Object> group) {
+ ArrayList<Object> operands = new ArrayList<Object>();
+ group.put("operands", operands);
+ }
+
+ /**
+ * Returns the category (or 'label') of the root of a (sub-) ParseTree (ANTLR 3).
+ *
+ * @param node
+ * @return
+ */
+ public static String getNodeCat(Tree node) {
+ String nodeCat = node.toStringTree();
+ Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+ Matcher m = p.matcher(node.toStringTree());
+ if (m.find()) {
+ nodeCat = m.group(1);
+ }
+ return nodeCat;
+ }
+
+
+ /**
+ * Tests whether a certain node has a child by a certain name
+ *
+ * @param node The parent node.
+ * @param childCat The category of the potential child.
+ * @return true iff one or more children belong to the specified category
+ */
+ public static boolean hasChild(Tree node, String childCat) {
+ for (int i = 0; i < node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(childCat)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+
+
+ public static List<Tree> getChildrenWithCat(Tree node, String nodeCat) {
+ ArrayList<Tree> children = new ArrayList<Tree>();
+ for (int i = 0; i < node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+ children.add(node.getChild(i));
+ }
+ }
+ return children;
+ }
+
+
+ public static List<ParseTree> getChildren(ParseTree node) {
+ ArrayList<ParseTree> children = new ArrayList<ParseTree>();
+ for (int i = 0; i < node.getChildCount(); i++) {
+ children.add(node.getChild(i));
+ }
+ return children;
+ }
+
+ public static Tree getFirstChildWithCat(Tree node, String nodeCat) {
+ for (int i = 0; i < node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+ return node.getChild(i);
+ }
+ }
+ return null;
+ }
+
+
+
+ public static void checkUnbalancedPars(String q) throws QueryException {
+ int openingPars = StringUtils.countMatches(q, "(");
+ int closingPars = StringUtils.countMatches(q, ")");
+ int openingBrkts = StringUtils.countMatches(q, "[");
+ int closingBrkts = StringUtils.countMatches(q, "]");
+ int openingBrcs = StringUtils.countMatches(q, "{");
+ int closingBrcs = StringUtils.countMatches(q, "}");
+ if (openingPars != closingPars) throw new QueryException(
+ "Your query string contains an unbalanced number of parantheses.");
+ if (openingBrkts != closingBrkts) throw new QueryException(
+ "Your query string contains an unbalanced number of brackets.");
+ if (openingBrcs != closingBrcs) throw new QueryException(
+ "Your query string contains an unbalanced number of braces.");
+ }
+
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Antlr3AbstractSyntaxTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Antlr3AbstractSyntaxTree.java
new file mode 100644
index 0000000..a9d1054
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Antlr3AbstractSyntaxTree.java
@@ -0,0 +1,65 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.antlr.runtime.Parser;
+import org.antlr.runtime.tree.Tree;
+
+public abstract class Antlr3AbstractSyntaxTree extends AbstractSyntaxTree {
+
+ public Parser parser;
+
+ /**
+ * Returns the category (or 'label') of the root of a (sub-) ParseTree (ANTLR 3).
+ *
+ * @param node
+ * @return
+ */
+ public static String getNodeCat(Tree node) {
+ String nodeCat = node.toStringTree();
+ Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+ Matcher m = p.matcher(node.toStringTree());
+ if (m.find()) {
+ nodeCat = m.group(1);
+ }
+ return nodeCat;
+ }
+
+ /**
+ * Tests whether a certain node has a child by a certain name
+ *
+ * @param node The parent node.
+ * @param childCat The category of the potential child.
+ * @return true iff one or more children belong to the specified category
+ */
+ public static boolean hasChild(Tree node, String childCat) {
+ for (int i = 0; i < node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(childCat)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public static List<Tree> getChildrenWithCat(Tree node, String nodeCat) {
+ ArrayList<Tree> children = new ArrayList<Tree>();
+ for (int i = 0; i < node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+ children.add(node.getChild(i));
+ }
+ }
+ return children;
+ }
+
+ public static Tree getFirstChildWithCat(Tree node, String nodeCat) {
+ for (int i = 0; i < node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+ return node.getChild(i);
+ }
+ }
+ return null;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Antlr4AbstractSyntaxTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Antlr4AbstractSyntaxTree.java
new file mode 100644
index 0000000..c454f3c
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Antlr4AbstractSyntaxTree.java
@@ -0,0 +1,107 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.tree.ParseTree;
+
+public abstract class Antlr4AbstractSyntaxTree extends AbstractSyntaxTree {
+
+ public Parser parser;
+
+ /**
+ * Returns the category (or 'label') of the root of a (sub-) ParseTree (ANTLR 4).
+ *
+ * @param node
+ * @return
+ */
+ public String getNodeCat(ParseTree node) {
+ String nodeCat = node.toStringTree(parser);
+ Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+ Matcher m = p.matcher(node.toStringTree(parser));
+ if (m.find()) {
+ nodeCat = m.group(1);
+ }
+ return nodeCat;
+ }
+
+ /**
+ * Tests whether a certain node has a child by a certain name
+ *
+ * @param node The parent node.
+ * @param childCat The category of the potential child.
+ * @return true iff one or more children belong to the specified category
+ */
+ public boolean hasChild(ParseTree node, String childCat) {
+ for (int i = 0; i < node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(childCat)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public boolean hasDescendant(ParseTree node, String childCat) {
+ for (int i = 0; i < node.getChildCount(); i++) {
+ ParseTree child = node.getChild(i);
+ if (getNodeCat(child).equals(childCat)) {
+ return true;
+ }
+ if (hasDescendant(child, childCat)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+
+ public static List<ParseTree> getChildren(ParseTree node) {
+ ArrayList<ParseTree> children = new ArrayList<ParseTree>();
+ for (int i = 0; i < node.getChildCount(); i++) {
+ children.add(node.getChild(i));
+ }
+ return children;
+ }
+
+ public List<ParseTree> getChildrenWithCat(ParseTree node, String nodeCat) {
+ ArrayList<ParseTree> children = new ArrayList<ParseTree>();
+ for (int i = 0; i < node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+ children.add(node.getChild(i));
+ }
+ }
+ return children;
+ }
+
+ public ParseTree getFirstChildWithCat(ParseTree node, String nodeCat) {
+ for (int i = 0; i < node.getChildCount(); i++) {
+ if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+ return node.getChild(i);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Checks whether a node only serves as a container for another node (e.g. in (cq_segment ( cg_seg_occ ...)), the cq_segment node does not contain
+ * any information and only contains the cq_seg_occ node.
+ * @param node The node to check
+ * @return true iff the node is a container only.
+ */
+ public boolean isContainerOnly(ParseTree node) {
+ String[] validNodeNamesArray = "cq_segment sq_segment element empty_segments".split(" ");
+ List<String> validNodeNames = Arrays.asList(validNodeNamesArray);
+ List<ParseTree> children = getChildren(node);
+ for (ParseTree child : children) {
+ if (validNodeNames.contains(getNodeCat(child))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/AqlTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/AqlTree.java
index 8979e1c..0c29bfa 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/AqlTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/AqlTree.java
@@ -2,13 +2,10 @@
import java.lang.reflect.Method;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.BailErrorStrategy;
@@ -17,21 +14,18 @@
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
-import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.tree.ParseTree;
import de.ids_mannheim.korap.query.annis.AqlLexer;
import de.ids_mannheim.korap.query.annis.AqlParser;
-import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
import de.ids_mannheim.korap.util.QueryException;
/**
- * Map representation of Poliqarp syntax tree as returned by ANTLR
+ * Map representation of ANNIS QL syntax tree as returned by ANTLR
* @author joachim
*
*/
-public class AqlTree extends AbstractSyntaxTree {
-
+public class AqlTree extends Antlr4AbstractSyntaxTree {
/**
* Top-level map representing the whole request.
*/
@@ -47,7 +41,7 @@
/**
* Parser object deriving the ANTLR parse tree.
*/
- static Parser aqlParser;
+ Parser parser;
/**
* Keeps track of all visited nodes in a tree
*/
@@ -57,6 +51,14 @@
*/
LinkedList<LinkedHashMap<String,Object>> objectStack = new LinkedList<LinkedHashMap<String,Object>>();
/**
+ * Keeps track of explicitly (by #-var definition) or implicitly (number as reference) introduced entities (for later reference by #-operator)
+ */
+ Map<String, Object> variableReferences = new LinkedHashMap<String, Object>();
+ /**
+ * Counter for variable definitions.
+ */
+ Integer variableCounter = 1;
+ /**
* Marks the currently active token in order to know where to add flags (might already have been taken away from token stack).
*/
LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>();
@@ -77,6 +79,8 @@
*/
public AqlTree(String query) {
// prepareContext();
+// parseAnnisQuery(query);
+// super.parser = this.parser;
requestMap.put("@context", "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld");
try {
process(query);
@@ -86,6 +90,7 @@
System.out.println(">>> "+requestMap.get("query")+" <<<");
}
+ @SuppressWarnings("unused")
private void prepareContext() {
LinkedHashMap<String,Object> context = new LinkedHashMap<String,Object>();
LinkedHashMap<String,Object> operands = new LinkedHashMap<String,Object>();
@@ -121,7 +126,14 @@
@Override
public void process(String query) throws QueryException {
ParseTree tree = parseAnnisQuery(query);
+ if (this.parser != null) {
+ super.parser = this.parser;
+ } else {
+ throw new NullPointerException("Parser has not been instantiated!");
+ }
+
System.out.println("Processing Annis QL");
+ if (verbose) System.out.println(tree.toStringTree(parser));
processNode(tree);
}
@@ -148,8 +160,76 @@
****************************************************************
*/
if (nodeCat.equals("start")) {
+ }
+
+ if (nodeCat.equals("exprTop")) {
+ // has several andTopExpr as children delimited by OR (Disj normal form)
+ if (node.getChildCount() > 1) {
+ // TODO or-groups for every and
+ }
+ }
+
+ if (nodeCat.equals("andTopExpr")) {
+ if (node.getChildCount() > 1) {
+ LinkedHashMap<String, Object> andGroup = makeGroup("and");
+ objectStack.push(andGroup);
+ stackedObjects++;
+ putIntoSuperObject(andGroup,1);
+ }
+ }
+
+ if (nodeCat.equals("expr")) {
+ // establish new variables or relations between vars
}
+
+ if (nodeCat.equals("variableExpr")) {
+ // simplex word or complex assignment (like qname = textSpec)?
+ if (node.getChildCount()==1) { // simplex
+ String firstChildNodeCat = getNodeCat(node.getChild(0));
+ if (firstChildNodeCat.equals("node")) {
+ LinkedHashMap<String, Object> span = makeSpan();
+ putIntoSuperObject(span);
+ variableReferences.put(variableCounter.toString(), span);
+ variableCounter++;
+ } else if (firstChildNodeCat.equals("tok")) {
+ // TODO
+ } else if (firstChildNodeCat.equals("qName")) { // only (foundry/)?layer specified
+ // TODO may also be token!
+ LinkedHashMap<String, Object> span = makeSpan();
+ span.putAll(parseQNameNode(node.getChild(0)));
+ putIntoSuperObject(span);
+ variableReferences.put(variableCounter.toString(), span);
+ variableCounter++;
+ }
+ } else if (node.getChildCount() == 3) { // (foundry/)?layer=key specification
+ LinkedHashMap<String, Object> span = makeSpan();
+ // get foundry and layer
+ span.putAll(parseQNameNode(node.getChild(0)));
+ // get key
+ span.putAll(parseVarKey(node.getChild(2)));
+ // get relation (match or no match)
+ span.put("match", parseMatchOperator(node.getChild(1)));
+ putIntoSuperObject(span);
+ variableReferences.put(variableCounter.toString(), span);
+ variableCounter++;
+ }
+ }
+
+ if (nodeCat.equals("regex")) {
+ // mother node can be start or other
+ // if start: make token root of tree
+ // else: integrate into super object
+ if (openNodeCats.get(1).equals("start")) {
+ LinkedHashMap<String, Object> token = makeToken();
+ LinkedHashMap<String, Object> term = makeTerm();
+ token.put("wrap", term);
+ term.put("type", "type:regex");
+ term.put("key", node.getChild(1).toStringTree(parser));
+ }
+ }
+
+
objectsToPop.push(stackedObjects);
@@ -172,32 +252,63 @@
**************************************************************
*/
-
- for (int i=0; i<objectsToPop.pop(); i++) {
- objectStack.pop();
+ if (!objectsToPop.isEmpty()) {
+ for (int i=0; i<objectsToPop.pop(); i++) {
+ objectStack.pop();
+ }
}
+
openNodeCats.pop();
}
+
+
+
+
/**
- * Returns the category (or 'label') of the root of a ParseTree.
+ * Parses the match operator (= or !=)
* @param node
* @return
*/
- public String getNodeCat(ParseTree node) {
- String nodeCat = node.toStringTree(aqlParser);
- Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
- Matcher m = p.matcher(node.toStringTree(aqlParser));
- if (m.find()) {
- nodeCat = m.group(1);
- }
- return nodeCat;
+ private String parseMatchOperator(ParseTree node) {
+ return node.toStringTree(parser).equals("=") ? "match:eq" : "match:ne";
}
- @SuppressWarnings("unused")
+
+ /**
+ * Parses a textSpec node (which holds the 'key' field)
+ * @param node
+ * @return
+ */
+ private LinkedHashMap<String, Object> parseVarKey(ParseTree node) {
+ LinkedHashMap<String, Object> fields = new LinkedHashMap<String, Object>();
+ if (node.getChildCount() == 2) { // no content, empty quotes
+
+ } else if (node.getChildCount() == 3) {
+ fields.put("key", node.getChild(1).toStringTree(parser));
+ if (node.getChild(0).toStringTree(parser).equals("/") && // slashes -> regex
+ node.getChild(2).toStringTree(parser).equals("/")) {
+ fields.put("type", "type:regex");
+ }
+ }
+ return fields;
+ }
+
+
+ private LinkedHashMap<String, Object> parseQNameNode(ParseTree node) {
+ LinkedHashMap<String, Object> fields = new LinkedHashMap<String, Object>();
+ if (node.getChildCount() == 1) { // only layer specification
+ fields.put("layer", node.getChild(0).toStringTree(parser));
+ } else if (node.getChildCount() == 3) { // foundry / layer specification
+ fields.put("foundry", node.getChild(0).toStringTree(parser));
+ fields.put("layer", node.getChild(2).toStringTree(parser));
+ }
+ return fields;
+ }
+
private void putIntoSuperObject(LinkedHashMap<String, Object> object) {
putIntoSuperObject(object, 0);
}
@@ -218,7 +329,7 @@
}
}
- private static ParserRuleContext parseAnnisQuery (String p) throws QueryException {
+ private ParserRuleContext parseAnnisQuery (String p) throws QueryException {
Lexer poliqarpLexer = new AqlLexer((CharStream)null);
ParserRuleContext tree = null;
// Like p. 111
@@ -228,15 +339,15 @@
ANTLRInputStream input = new ANTLRInputStream(p);
poliqarpLexer.setInputStream(input);
CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
- aqlParser = new AqlParser(tokens);
+ parser = new AqlParser(tokens);
// Don't throw out erroneous stuff
- aqlParser.setErrorHandler(new BailErrorStrategy());
- aqlParser.removeErrorListeners();
+ parser.setErrorHandler(new BailErrorStrategy());
+ parser.removeErrorListeners();
// Get starting rule from parser
Method startRule = AqlParser.class.getMethod("start");
- tree = (ParserRuleContext) startRule.invoke(aqlParser, (Object[])null);
+ tree = (ParserRuleContext) startRule.invoke(parser, (Object[])null);
}
// Some things went wrong ...
@@ -272,16 +383,22 @@
"#1 ->LABEL[lbl=/foo/] #2",
"#1 ->LABEL[foundry/layer=\"foo\"] #2",
"#1 ->LABEL[foundry/layer=\"foo\"] #2",
- "node & node & #2 > #1",
+ "node & pos=\"VVFIN\" & #2 > #1",
+ "node & pos=\"VVFIN\" & #2 > #1",
+ "pos=\"VVFIN\" > cas=\"Nom\" ",
+ "pos=\"VVFIN\" >* cas=\"Nom\" ",
+ "tiger/pos=\"NN\" > node",
+ "ref#node & pos=\"NN\" > #ref",
+ "node & tree/pos=\"NN\"",
+ "/node/"
};
-// AqlTree.verbose=true;
+ AqlTree.verbose=true;
for (String q : queries) {
try {
System.out.println(q);
- System.out.println(AqlTree.parseAnnisQuery(q).toStringTree(AqlTree.aqlParser));
- @SuppressWarnings("unused")
+// System.out.println(AqlTree.parseAnnisQuery(q).toStringTree(AqlTree.parser));
AqlTree at = new AqlTree(q);
-// System.out.println(AqlTree.parseAnnisQuery(q).toStringTree(AqlTree.aqlParser));
+ System.out.println(at.parseAnnisQuery(q).toStringTree(at.parser));
System.out.println();
} catch (NullPointerException | QueryException npe) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
index 777d06a..f66fabf 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
@@ -18,7 +18,6 @@
import de.ids_mannheim.korap.query.cosmas2.c2psLexer;
import de.ids_mannheim.korap.query.cosmas2.c2psParser;
-import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
import de.ids_mannheim.korap.query.serialize.util.CosmasCondition;
import de.ids_mannheim.korap.util.QueryException;
@@ -27,11 +26,10 @@
* @author joachim
*
*/
-public class CosmasTree extends AbstractSyntaxTree {
+public class CosmasTree extends Antlr3AbstractSyntaxTree {
private static Logger log = LoggerFactory.getLogger(CosmasTree.class);
- private static c2psParser cosmasParser;
/*
* Following collections have the following functions:
* - the request is a map with two keys (meta/query): {meta=[], query=[]}
@@ -113,7 +111,8 @@
public CosmasTree(String query) throws QueryException {
this.query = query;
process(query);
- System.out.println(requestMap.get("query"));
+ System.out.println("\n"+requestMap.get("query"));
+ log.info(">>> " + requestMap.get("query") + " <<<");
}
@Override
@@ -136,7 +135,7 @@
System.out.println("Processing Cosmas");
requestMap.put("@context", "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld");
-// QueryUtils.prepareContext(requestMap);
+// prepareContext(requestMap);
processNode(tree);
}
@@ -147,7 +146,7 @@
else visited.add(node);
- String nodeCat = QueryUtils.getNodeCat(node);
+ String nodeCat = getNodeCat(node);
openNodeCats.push(nodeCat);
stackedObjects = 0;
@@ -166,7 +165,6 @@
// Check for potential implicit sequences as in (C2PQ (OPWF der) (OPWF Mann)). The sequence is introduced
// by the first child if it (and its siblings) is sequentiable.
if (sequentiableCats.contains(nodeCat)) {
- System.err.println(nodeCat);
// for each node, check if parent has more than one child (-> could be implicit sequence)
Tree parent = node.getParent();
if (parent.getChildCount()>1) {
@@ -174,7 +172,7 @@
if (node == parent.getChild(0)) {
hasSequentiableSiblings = false;
for (int i=1; i<parent.getChildCount() ;i++) {
- if (sequentiableCats.contains(QueryUtils.getNodeCat(parent.getChild(i)))) {
+ if (sequentiableCats.contains(getNodeCat(parent.getChild(i)))) {
hasSequentiableSiblings = true;
continue;
}
@@ -227,7 +225,7 @@
fieldMap.put("match", "match:eq");
}
//Step II: decide where to put
- if (! QueryUtils.hasChild(node, "TPOS")) {
+ if (! hasChild(node, "TPOS")) {
putIntoSuperObject(token, 1);
} else {
@@ -256,7 +254,7 @@
} else {
fieldMap.put("match", "match:eq");
}
-// List<String> morphValues = QueryUtils.parseMorph(node.getChild(0).toStringTree());
+// List<String> morphValues = parseMorph(node.getChild(0).toStringTree());
// System.err.println(morphValues);
// if (morphValues.size() == 1) {
// LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
@@ -649,10 +647,10 @@
private void parseOPINOptions(Tree node, LinkedHashMap<String, Object> posgroup) {
- Tree posnode = QueryUtils.getFirstChildWithCat(node, "POS");
- Tree rangenode = QueryUtils.getFirstChildWithCat(node, "RANGE");
- Tree exclnode = QueryUtils.getFirstChildWithCat(node, "EXCL");
- Tree groupnode = QueryUtils.getFirstChildWithCat(node, "GROUP");
+ Tree posnode = getFirstChildWithCat(node, "POS");
+ Tree rangenode = getFirstChildWithCat(node, "RANGE");
+ Tree exclnode = getFirstChildWithCat(node, "EXCL");
+ Tree groupnode = getFirstChildWithCat(node, "GROUP");
boolean negatePosition = false;
String position = "";
@@ -691,9 +689,9 @@
}
private void parseOPOVOptions(Tree node, LinkedHashMap<String, Object> posgroup) {
- Tree posnode = QueryUtils.getFirstChildWithCat(node, "POS");
- Tree exclnode = QueryUtils.getFirstChildWithCat(node, "EXCL");
- Tree groupnode = QueryUtils.getFirstChildWithCat(node, "GROUP");
+ Tree posnode = getFirstChildWithCat(node, "POS");
+ Tree exclnode = getFirstChildWithCat(node, "EXCL");
+ Tree groupnode = getFirstChildWithCat(node, "GROUP");
String position = "";
if (posnode != null) {
@@ -776,7 +774,7 @@
}
- private static Tree parseCosmasQuery(String q) throws RecognitionException {
+ private Tree parseCosmasQuery(String q) throws RecognitionException {
Pattern p = Pattern.compile("(\\w+):((\\+|-)?(sa|se|pa|pe|ta|te),?)+");
Matcher m = p.matcher(q);
@@ -799,8 +797,8 @@
ANTLRStringStream ss = new ANTLRStringStream(q);
c2psLexer lex = new c2psLexer(ss);
org.antlr.runtime.CommonTokenStream tokens = new org.antlr.runtime.CommonTokenStream(lex); //v3
- cosmasParser = new c2psParser(tokens);
- c2psParser.c2ps_query_return c2Return = cosmasParser.c2ps_query(); // statt t().
+ parser = new c2psParser(tokens);
+ c2psParser.c2ps_query_return c2Return = ((c2psParser) parser).c2ps_query(); // statt t().
// AST Tree anzeigen:
tree = (Tree)c2Return.getTree();
@@ -838,11 +836,8 @@
try {
System.out.println(q);
try {
- System.out.println(parseCosmasQuery(q).toStringTree());
@SuppressWarnings("unused")
CosmasTree act = new CosmasTree(q);
- } catch (RecognitionException e) {
- e.printStackTrace();
} catch (QueryException e) {
e.printStackTrace();
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
index 79393e0..403619d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
@@ -19,7 +19,6 @@
import de.ids_mannheim.korap.query.PoliqarpPlusLexer;
import de.ids_mannheim.korap.query.PoliqarpPlusParser;
-import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
import de.ids_mannheim.korap.util.QueryException;
import org.slf4j.Logger;
@@ -30,7 +29,7 @@
* @author joachim
*
*/
-public class PoliqarpPlusTree extends AbstractSyntaxTree {
+public class PoliqarpPlusTree extends Antlr4AbstractSyntaxTree {
Logger log = LoggerFactory.getLogger(PoliqarpPlusTree.class);
/**
@@ -77,7 +76,7 @@
/**
* Parser object deriving the ANTLR parse tree.
*/
- static Parser poliqarpParser;
+ Parser parser;
/**
* Keeps track of all visited nodes in a tree
*/
@@ -118,7 +117,7 @@
/**
* If true, print debug statements
*/
- public static boolean debug = false;
+ public static boolean verbose = false;
/**
* Index of the current child of its parent (needed for relating occ elements to their operands).
*/
@@ -173,9 +172,10 @@
System.exit(1);
}
}
+ super.parser = this.parser;
System.out.println("Processing PoliqarpPlus");
requestMap.put("@context", "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld");
-// QueryUtils.prepareContext(requestMap);
+// prepareContext(requestMap);
processNode(tree);
}
@@ -200,14 +200,14 @@
isAligned=true;
}
- String nodeCat = QueryUtils.getNodeCat(node);
+ String nodeCat = getNodeCat(node);
openNodeCats.push(nodeCat);
stackedObjects = 0;
stackedTokens= 0;
stackedFields = 0;
- if (debug) {
+ if (verbose) {
System.err.println(" "+objectStack);
// System.err.println(" "+tokenStack);
System.out.println(openNodeCats);
@@ -227,11 +227,11 @@
cqHasOccSibling = false;
cqHasOccChild = false;
// disregard empty segments in simple queries (parsed by ANTLR as empty cq_segments)
- ignoreCq_segment = (node.getChildCount() == 1 && (node.getChild(0).toStringTree(poliqarpParser).equals(" ") || QueryUtils.getNodeCat(node.getChild(0)).equals("spanclass") || QueryUtils.getNodeCat(node.getChild(0)).equals("position")));
+ ignoreCq_segment = (node.getChildCount() == 1 && (node.getChild(0).toStringTree(parser).equals(" ") || getNodeCat(node.getChild(0)).equals("spanclass") || getNodeCat(node.getChild(0)).equals("position")));
// ignore this node if it only serves as an aligned sequence container
if (node.getChildCount()>1) {
- if (QueryUtils.getNodeCat(node.getChild(1)).equals("cq_segments") && QueryUtils.hasChild(node.getChild(1), "alignment")) {
-// if (QueryUtils.getNodeCat(node.getChild(0)).equals("align")) {
+ if (getNodeCat(node.getChild(1)).equals("cq_segments") && hasChild(node.getChild(1), "alignment")) {
+// if (getNodeCat(node.getChild(0)).equals("align")) {
ignoreCq_segment = true;
}
}
@@ -239,18 +239,18 @@
LinkedHashMap<String,Object> sequence = new LinkedHashMap<String,Object>();
// Step 0: cq_segments has 'occ' child -> introduce group as super group to the sequence/token/group
// this requires creating a group and inserting it at a suitable place
- if (node.getParent().getChildCount()>curChildIndex+2 && QueryUtils.getNodeCat(node.getParent().getChild(curChildIndex+2)).equals("occ")) {
+ if (node.getParent().getChildCount()>curChildIndex+2 && getNodeCat(node.getParent().getChild(curChildIndex+2)).equals("occ")) {
cqHasOccSibling = true;
createOccGroup(node);
}
- if (QueryUtils.getNodeCat(node.getChild(node.getChildCount()-1)).equals("occ")) {
+ if (getNodeCat(node.getChild(node.getChildCount()-1)).equals("occ")) {
cqHasOccChild = true;
}
// Step I: decide type of element (one or more elements? -> token or sequence)
// take into account a possible 'occ' child with accompanying parantheses, therefore 3 extra children
int occExtraChildren = cqHasOccChild ? 3:0;
if (node.getChildCount()>1 + occExtraChildren) {
- ParseTree emptySegments = QueryUtils.getFirstChildWithCat(node, "empty_segments");
+ ParseTree emptySegments = getFirstChildWithCat(node, "empty_segments");
if (emptySegments != null && emptySegments != node.getChild(0)) {
String[] minmax = parseEmptySegments(emptySegments);
Integer min = Integer.parseInt(minmax[0]);
@@ -288,7 +288,7 @@
} else {
// if only child, make the sequence a mere token...
// ... but only if it has a real token/element beneath it
- if (! QueryUtils.isContainerOnly(node)) {
+ if (! isContainerOnly(node)) {
sequence.put("@type", "korap:token");
tokenStack.push(sequence);
stackedTokens++;
@@ -343,9 +343,8 @@
}
} else if (!objectStack.isEmpty()){
// embed in super sequence
- System.out.println(objectStack);
ArrayList<Object> topSequenceOperands;
- if (! QueryUtils.isContainerOnly(node)) {
+ if (! isContainerOnly(node)) {
try {
topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
topSequenceOperands.add(sequence);
@@ -410,7 +409,6 @@
stackedObjects++;
// add group to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
// take into account a possible 'occ' child
- System.out.println(objectStack);
// if (node.getParent().getChildCount()>1) {
if (objectStack.size()>1) {
ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
@@ -448,23 +446,23 @@
String key = null;
ParseTree fieldNameNode = node.getChild(0);
if (fieldNameNode.getChildCount() == 1) {
- layer = fieldNameNode.getChild(0).toStringTree(poliqarpParser); //e.g. (field_name base) (field_op !=) (re_query "bar*")
+ layer = fieldNameNode.getChild(0).toStringTree(parser); //e.g. (field_name base) (field_op !=) (re_query "bar*")
} else if (fieldNameNode.getChildCount() == 3) {
// layer is indicated, merge layer and field name (0th and 2nd children, 1st is "/")
- foundry = fieldNameNode.getChild(0).toStringTree(poliqarpParser);
- layer = fieldNameNode.getChild(2).toStringTree(poliqarpParser);
+ foundry = fieldNameNode.getChild(0).toStringTree(parser);
+ layer = fieldNameNode.getChild(2).toStringTree(parser);
// } else if (fieldNameNode.getChildCount() == 5) {
// // layer and value are indicated
// foundry = fieldNameNode.getChild(0).toStringTree(poliqarpParser);
// layer = fieldNameNode.getChild(2).toStringTree(poliqarpParser);
// value = fieldNameNode.getChild(4).toStringTree(poliqarpParser);
}
- if (QueryUtils.hasChild(node, "key")) {
- ParseTree keyNode = QueryUtils.getFirstChildWithCat(node, "key");
- key = keyNode.getChild(0).toStringTree(poliqarpParser);
+ if (hasChild(node, "key")) {
+ ParseTree keyNode = getFirstChildWithCat(node, "key");
+ key = keyNode.getChild(0).toStringTree(parser);
}
- String relation = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
+ String relation = node.getChild(1).getChild(0).toStringTree(parser);
if (negField) {
if (relation.startsWith("!")) {
relation = relation.substring(1);
@@ -479,14 +477,14 @@
}
ParseTree valNode;
- if (QueryUtils.hasChild(node, "key")) valNode = node.getChild(3);
+ if (hasChild(node, "key")) valNode = node.getChild(3);
else valNode = node.getChild(2);
- String valType = QueryUtils.getNodeCat(valNode);
+ String valType = getNodeCat(valNode);
fieldMap.put("@type", "korap:term");
if (valType.equals("simple_query")) {
- value = valNode.getChild(0).getChild(0).toStringTree(poliqarpParser); //e.g. (simple_query (sq_segment foo))
+ value = valNode.getChild(0).getChild(0).toStringTree(parser); //e.g. (simple_query (sq_segment foo))
} else if (valType.equals("re_query")) {
- value = valNode.getChild(0).toStringTree(poliqarpParser); //e.g. (re_query "bar*")
+ value = valNode.getChild(0).toStringTree(parser); //e.g. (re_query "bar*")
fieldMap.put("type", "type:regex");
value = value.substring(1,value.length()-1); //remove trailing quotes
}
@@ -530,7 +528,7 @@
// Step I: get operator (& or |)
ParseTree operatorNode = node.getChild(1).getChild(0);
- String operator = QueryUtils.getNodeCat(operatorNode);
+ String operator = getNodeCat(operatorNode);
String relation = operator.equals("&") ? "and" : "or";
if (negField) {
relation = relation.equals("or") ? "and": "or";
@@ -574,7 +572,7 @@
} else {
token.put("@type", "korap:token");
- String word = node.getChild(0).toStringTree(poliqarpParser);
+ String word = node.getChild(0).toStringTree(parser);
LinkedHashMap<String,Object> tokenValues = new LinkedHashMap<String,Object>();
token.put("wrap", tokenValues);
tokenValues.put("@type", "korap:term");
@@ -593,7 +591,7 @@
if (nodeCat.equals("re_query")) {
LinkedHashMap<String,Object> reQuery = new LinkedHashMap<String,Object>();
reQuery.put("type", "type:regex");
- String regex = node.getChild(0).toStringTree(poliqarpParser);
+ String regex = node.getChild(0).toStringTree(parser);
reQuery.put("key", regex);
reQuery.put("match", "match:"+"eq");
@@ -656,14 +654,14 @@
// Step II: fill object (token values) and put into containing sequence
elem.put("@type", "korap:span");
int valChildIdx = node.getChildCount()-2; // closing '>' is last child
- String value = node.getChild(valChildIdx).toStringTree(poliqarpParser);
- ParseTree foundryNode = QueryUtils.getFirstChildWithCat(node, "foundry");
- ParseTree layerNode = QueryUtils.getFirstChildWithCat(node, "layer");
+ String value = node.getChild(valChildIdx).toStringTree(parser);
+ ParseTree foundryNode = getFirstChildWithCat(node, "foundry");
+ ParseTree layerNode = getFirstChildWithCat(node, "layer");
if (foundryNode != null) {
- elem.put("foundry", foundryNode.getChild(0).toStringTree(poliqarpParser));
+ elem.put("foundry", foundryNode.getChild(0).toStringTree(parser));
}
if (layerNode != null) {
- elem.put("layer", layerNode.getChild(0).toStringTree(poliqarpParser));
+ elem.put("layer", layerNode.getChild(0).toStringTree(parser));
}
elem.put("key", value);
// add token to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
@@ -685,8 +683,8 @@
ArrayList<Object> spanOperands = new ArrayList<Object>();
// Step I: get info
int classId = 0;
- if (QueryUtils.getNodeCat(node.getChild(1)).equals("spanclass_id")) {
- String ref = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
+ if (getNodeCat(node.getChild(1)).equals("spanclass_id")) {
+ String ref = node.getChild(1).getChild(0).toStringTree(parser);
try {
classId = Integer.parseInt(ref);
} catch (NumberFormatException e) {
@@ -710,7 +708,7 @@
// ignore leading and trailing braces
visited.add(node.getChild(0));
visited.add(node.getChild(node.getChildCount()-1));
- if (QueryUtils.getNodeCat(node.getChild(1)).equals("spanclass_id")) {
+ if (getNodeCat(node.getChild(1)).equals("spanclass_id")) {
visited.add(node.getChild(1));
}
}
@@ -721,7 +719,7 @@
stackedObjects++;
ArrayList<Object> posOperands = new ArrayList<Object>();
// Step I: get info
- String relation = QueryUtils.getNodeCat(node.getChild(0));
+ String relation = getNodeCat(node.getChild(0));
positionGroup.put("@type", "korap:group");
positionGroup.put("operation", "operation:"+"position");
positionGroup.put("frame", "frame:"+relation.toLowerCase());
@@ -748,7 +746,7 @@
// Step I: get info
ArrayList<Integer> classRefs = new ArrayList<Integer>();
String classRefOp = null;
- if (QueryUtils.getNodeCat(node.getChild(2)).equals("spanclass_id")) {
+ if (getNodeCat(node.getChild(2)).equals("spanclass_id")) {
ParseTree spanNode = node.getChild(2);
for (int i=0; i<spanNode.getChildCount()-1; i++) {
String ref = spanNode.getChild(i).getText();
@@ -771,7 +769,7 @@
classRefs.add(0);
}
shrinkGroup.put("@type", "korap:group");
- String type = node.getChild(0).toStringTree(poliqarpParser);
+ String type = node.getChild(0).toStringTree(parser);
String operation = type.equals("shrink") ? "submatch" : "split";
shrinkGroup.put("operation", "operation:"+operation);
shrinkGroup.put("classRef", classRefs);
@@ -797,7 +795,7 @@
// repetition of token group
if (nodeCat.equals("occ")) {
ParseTree occChild = node.getChild(0);
- String repetition = occChild.toStringTree(poliqarpParser);
+ String repetition = occChild.toStringTree(parser);
int[] minmax = parseRepetition(repetition);
curOccGroup.put("operation", "operation:"+"repetition");
curOccGroup.put("min", minmax[0]);
@@ -807,7 +805,7 @@
// flags for case sensitivity and whole-word-matching
if (nodeCat.equals("flag")) {
- String flag = QueryUtils.getNodeCat(node.getChild(0)).substring(1); //substring removes leading slash '/'
+ String flag = getNodeCat(node.getChild(0)).substring(1); //substring removes leading slash '/'
// add to current token's value
if (flag.contains("i")) ((HashMap<String, Object>) curToken.get("wrap")).put("caseInsensitive", true);
else if (flag.contains("I")) ((HashMap<String, Object>) curToken.get("wrap")).put("caseInsensitive", false);
@@ -821,9 +819,9 @@
metaFilter.put("@type", "korap:meta");
}
- if (nodeCat.equals("within") && !QueryUtils.getNodeCat(node.getParent()).equals("position")) {
+ if (nodeCat.equals("within") && !getNodeCat(node.getParent()).equals("position")) {
ParseTree domainNode = node.getChild(2);
- String domain = QueryUtils.getNodeCat(domainNode);
+ String domain = getNodeCat(domainNode);
LinkedHashMap<String,Object> curObject = (LinkedHashMap<String, Object>) objectStack.getFirst();
curObject.put("within", domain);
visited.add(node.getChild(0));
@@ -875,12 +873,6 @@
openNodeCats.pop();
}
- private void markAllChildrenVisited(ParseTree node) {
- for (int i=0; i<node.getChildCount(); i++) {
- visited.add(node.getChild(i));
- }
- }
-
private int[] parseRepetition(String repetition) {
if (repetition.equals("*")) {
return new int[] {0, 100};
@@ -961,8 +953,8 @@
- private static ParserRuleContext parsePoliqarpQuery (String p) throws QueryException {
- QueryUtils.checkUnbalancedPars(p);
+ private ParserRuleContext parsePoliqarpQuery (String p) throws QueryException {
+ checkUnbalancedPars(p);
Lexer poliqarpLexer = new PoliqarpPlusLexer((CharStream)null);
ParserRuleContext tree = null;
@@ -973,15 +965,15 @@
ANTLRInputStream input = new ANTLRInputStream(p);
poliqarpLexer.setInputStream(input);
CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
- poliqarpParser = new PoliqarpPlusParser(tokens);
+ parser = new PoliqarpPlusParser(tokens);
// Don't throw out erroneous stuff
- poliqarpParser.setErrorHandler(new BailErrorStrategy());
- poliqarpParser.removeErrorListeners();
+ parser.setErrorHandler(new BailErrorStrategy());
+ parser.removeErrorListeners();
// Get starting rule from parser
Method startRule = PoliqarpPlusParser.class.getMethod("request");
- tree = (ParserRuleContext) startRule.invoke(poliqarpParser, (Object[])null);
+ tree = (ParserRuleContext) startRule.invoke(parser, (Object[])null);
}
// Some things went wrong ...
@@ -1010,12 +1002,13 @@
"contains(<cnx/c=np>, [mate/pos=NE])",
"matches(<A>,[pos=N]*)",
"[base=Auto]matches(<A>,[][pos=N]{4})",
+ "[base=der][][base=Mann]"
};
- PoliqarpPlusTree.debug=true;
+// PoliqarpPlusTree.verbose=true;
for (String q : queries) {
try {
System.out.println(q);
- System.out.println(PoliqarpPlusTree.parsePoliqarpQuery(q).toStringTree(PoliqarpPlusTree.poliqarpParser));
+// System.out.println(PoliqarpPlusTree.parsePoliqarpQuery(q).toStringTree(PoliqarpPlusTree.parser));
@SuppressWarnings("unused")
PoliqarpPlusTree pt = new PoliqarpPlusTree(q);
System.out.println(q);
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
index 2e71ec3..1d23f9e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
@@ -30,7 +30,7 @@
* @author joachim
*
*/
-public class PoliqarpTree extends AbstractSyntaxTree {
+public class PoliqarpTree extends Antlr4AbstractSyntaxTree {
/**
* Top-level map representing the whole request.
@@ -383,20 +383,20 @@
}
- /**
- * Returns the category (or 'label') of the root of a ParseTree.
- * @param node
- * @return
- */
- public String getNodeCat(ParseTree node) {
- String nodeCat = node.toStringTree(poliqarpParser);
- Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
- Matcher m = p.matcher(node.toStringTree(poliqarpParser));
- if (m.find()) {
- nodeCat = m.group(1);
- }
- return nodeCat;
- }
+// /**
+// * Returns the category (or 'label') of the root of a ParseTree.
+// * @param node
+// * @return
+// */
+// public String getNodeCat(ParseTree node) {
+// String nodeCat = node.toStringTree(poliqarpParser);
+// Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+// Matcher m = p.matcher(node.toStringTree(poliqarpParser));
+// if (m.find()) {
+// nodeCat = m.group(1);
+// }
+// return nodeCat;
+// }
private static ParserRuleContext parsePoliqarpQuery (String p) {
Lexer poliqarpLexer = new PoliqarpLexer((CharStream)null);
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
index 0ef4e95..e7a5a9b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
@@ -1,165 +1,160 @@
package de.ids_mannheim.korap.query.serialize;
import de.ids_mannheim.korap.util.QueryException;
-import org.antlr.runtime.tree.Tree;
-import org.antlr.v4.runtime.tree.ParseTree;
import org.apache.commons.lang.StringUtils;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
/**
* @author hanl
* @date 10/12/2013
*/
public class QueryUtils {
-
- /**
- * Returns the category (or 'label') of the root of a ParseTree (ANTLR 4).
- *
- * @param node
- * @return
- */
- public static String getNodeCat(ParseTree node) {
- String nodeCat = node.toStringTree(PoliqarpPlusTree.poliqarpParser);
- Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
- Matcher m = p.matcher(node.toStringTree(PoliqarpPlusTree.poliqarpParser));
- if (m.find()) {
- nodeCat = m.group(1);
- }
- return nodeCat;
- }
-
- /**
- * Returns the category (or 'label') of the root of a ParseTree (ANTLR 3).
- *
- * @param node
- * @return
- */
- public static String getNodeCat(Tree node) {
- String nodeCat = node.toStringTree();
- Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
- Matcher m = p.matcher(node.toStringTree());
- if (m.find()) {
- nodeCat = m.group(1);
- }
- return nodeCat;
- }
-
-
- /**
- * Tests whether a certain node has a child by a certain name
- *
- * @param node The parent node.
- * @param childCat The category of the potential child.
- * @return true iff one or more children belong to the specified category
- */
- public static boolean hasChild(Tree node, String childCat) {
- for (int i = 0; i < node.getChildCount(); i++) {
- if (getNodeCat(node.getChild(i)).equals(childCat)) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Tests whether a certain node has a child by a certain name
- *
- * @param node The parent node.
- * @param childCat The category of the potential child.
- * @return true iff one or more children belong to the specified category
- */
- public static boolean hasChild(ParseTree node, String childCat) {
- for (int i = 0; i < node.getChildCount(); i++) {
- if (getNodeCat(node.getChild(i)).equals(childCat)) {
- return true;
- }
- }
- return false;
- }
-
- public static boolean hasDescendant(ParseTree node, String childCat) {
- for (int i = 0; i < node.getChildCount(); i++) {
- ParseTree child = node.getChild(i);
- if (getNodeCat(child).equals(childCat)) {
- return true;
- }
- if (hasDescendant(child, childCat)) {
- return true;
- }
- }
- return false;
- }
-
- public static List<Tree> getChildrenWithCat(Tree node, String nodeCat) {
- ArrayList<Tree> children = new ArrayList<Tree>();
- for (int i = 0; i < node.getChildCount(); i++) {
- if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
- children.add(node.getChild(i));
- }
- }
- return children;
- }
-
- public static List<ParseTree> getChildrenWithCat(ParseTree node, String nodeCat) {
- ArrayList<ParseTree> children = new ArrayList<ParseTree>();
- for (int i = 0; i < node.getChildCount(); i++) {
- if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
- children.add(node.getChild(i));
- }
- }
- return children;
- }
-
- public static List<ParseTree> getChildren(ParseTree node) {
- ArrayList<ParseTree> children = new ArrayList<ParseTree>();
- for (int i = 0; i < node.getChildCount(); i++) {
- children.add(node.getChild(i));
- }
- return children;
- }
-
- public static Tree getFirstChildWithCat(Tree node, String nodeCat) {
- for (int i = 0; i < node.getChildCount(); i++) {
- if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
- return node.getChild(i);
- }
- }
- return null;
- }
-
- public static ParseTree getFirstChildWithCat(ParseTree node, String nodeCat) {
- for (int i = 0; i < node.getChildCount(); i++) {
- if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
- return node.getChild(i);
- }
- }
- return null;
- }
-
- /**
- * Checks whether a node only serves as a container for another node (e.g. in (cq_segment ( cg_seg_occ ...)), the cq_segment node does not contain
- * any information and only contains the cq_seg_occ node.
- * @param node The node to check
- * @return true iff the node is a container only.
- */
- public static boolean isContainerOnly(ParseTree node) {
- String[] validNodeNamesArray = "cq_segment sq_segment element empty_segments".split(" ");
- List<String> validNodeNames = Arrays.asList(validNodeNamesArray);
- List<ParseTree> children = getChildren(node);
- for (ParseTree child : children) {
- if (validNodeNames.contains(getNodeCat(child))) {
- return false;
- }
- }
- return true;
- }
+//
+// /**
+// * Returns the category (or 'label') of the root of a ParseTree (ANTLR 4).
+// *
+// * @param node
+// * @return
+// */
+// public static String getNodeCat(ParseTree node) {
+// String nodeCat = node.toStringTree(parser);
+// Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+// Matcher m = p.matcher(node.toStringTree(parser));
+// if (m.find()) {
+// nodeCat = m.group(1);
+// }
+// return nodeCat;
+// }
+//
+// /**
+// * Returns the category (or 'label') of the root of a ParseTree (ANTLR 3).
+// *
+// * @param node
+// * @return
+// */
+// public static String getNodeCat(Tree node) {
+// String nodeCat = node.toStringTree();
+// Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+// Matcher m = p.matcher(node.toStringTree());
+// if (m.find()) {
+// nodeCat = m.group(1);
+// }
+// return nodeCat;
+// }
+//
+//
+// /**
+// * Tests whether a certain node has a child by a certain name
+// *
+// * @param node The parent node.
+// * @param childCat The category of the potential child.
+// * @return true iff one or more children belong to the specified category
+// */
+// public static boolean hasChild(Tree node, String childCat) {
+// for (int i = 0; i < node.getChildCount(); i++) {
+// if (getNodeCat(node.getChild(i)).equals(childCat)) {
+// return true;
+// }
+// }
+// return false;
+// }
+//
+// /**
+// * Tests whether a certain node has a child by a certain name
+// *
+// * @param node The parent node.
+// * @param childCat The category of the potential child.
+// * @return true iff one or more children belong to the specified category
+// */
+// public static boolean hasChild(ParseTree node, String childCat) {
+// for (int i = 0; i < node.getChildCount(); i++) {
+// if (getNodeCat(node.getChild(i)).equals(childCat)) {
+// return true;
+// }
+// }
+// return false;
+// }
+//
+// public static boolean hasDescendant(ParseTree node, String childCat) {
+// for (int i = 0; i < node.getChildCount(); i++) {
+// ParseTree child = node.getChild(i);
+// if (getNodeCat(child).equals(childCat)) {
+// return true;
+// }
+// if (hasDescendant(child, childCat)) {
+// return true;
+// }
+// }
+// return false;
+// }
+//
+// public static List<Tree> getChildrenWithCat(Tree node, String nodeCat) {
+// ArrayList<Tree> children = new ArrayList<Tree>();
+// for (int i = 0; i < node.getChildCount(); i++) {
+// if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+// children.add(node.getChild(i));
+// }
+// }
+// return children;
+// }
+//
+// public static List<ParseTree> getChildrenWithCat(ParseTree node, String nodeCat) {
+// ArrayList<ParseTree> children = new ArrayList<ParseTree>();
+// for (int i = 0; i < node.getChildCount(); i++) {
+// if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+// children.add(node.getChild(i));
+// }
+// }
+// return children;
+// }
+//
+// public static List<ParseTree> getChildren(ParseTree node) {
+// ArrayList<ParseTree> children = new ArrayList<ParseTree>();
+// for (int i = 0; i < node.getChildCount(); i++) {
+// children.add(node.getChild(i));
+// }
+// return children;
+// }
+//
+// public static Tree getFirstChildWithCat(Tree node, String nodeCat) {
+// for (int i = 0; i < node.getChildCount(); i++) {
+// if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+// return node.getChild(i);
+// }
+// }
+// return null;
+// }
+//
+// public static ParseTree getFirstChildWithCat(ParseTree node, String nodeCat) {
+// for (int i = 0; i < node.getChildCount(); i++) {
+// if (getNodeCat(node.getChild(i)).equals(nodeCat)) {
+// return node.getChild(i);
+// }
+// }
+// return null;
+// }
+//
+// /**
+// * Checks whether a node only serves as a container for another node (e.g. in (cq_segment ( cg_seg_occ ...)), the cq_segment node does not contain
+// * any information and only contains the cq_seg_occ node.
+// * @param node The node to check
+// * @return true iff the node is a container only.
+// */
+// public static boolean isContainerOnly(ParseTree node) {
+// String[] validNodeNamesArray = "cq_segment sq_segment element empty_segments".split(" ");
+// List<String> validNodeNames = Arrays.asList(validNodeNamesArray);
+// List<ParseTree> children = getChildren(node);
+// for (ParseTree child : children) {
+// if (validNodeNames.contains(getNodeCat(child))) {
+// return false;
+// }
+// }
+// return true;
+// }
public static void checkUnbalancedPars(String q) throws QueryException {
int openingPars = StringUtils.countMatches(q, "(");
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java b/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java
new file mode 100644
index 0000000..e98ca7b
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java
@@ -0,0 +1,272 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.antlr.v4.runtime.ANTLRInputStream;
+import org.antlr.v4.runtime.BailErrorStrategy;
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.ParserRuleContext;
+import org.antlr.v4.runtime.tree.ParseTree;
+
+import de.ids_mannheim.korap.query.annis.AqlLexer;
+import de.ids_mannheim.korap.query.annis.AqlParser;
+import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
+import de.ids_mannheim.korap.util.QueryException;
+
+/**
+ * Map representation of syntax tree as returned by ANTLR
+ * @author joachim
+ *
+ */
+public class TreeTemplate extends Antlr4AbstractSyntaxTree {
+ /**
+ * Top-level map representing the whole request.
+ */
+ LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
+ /**
+ * Keeps track of open node categories
+ */
+ LinkedList<String> openNodeCats = new LinkedList<String>();
+ /**
+ * Flag that indicates whether token fields or meta fields are currently being processed
+ */
+ boolean inMeta = false;
+ /**
+ * Parser object deriving the ANTLR parse tree.
+ */
+ static Parser qlParser;
+ /**
+ * Keeps track of all visited nodes in a tree
+ */
+ List<ParseTree> visited = new ArrayList<ParseTree>();
+ /**
+ * Keeps track of active object.
+ */
+ LinkedList<LinkedHashMap<String,Object>> objectStack = new LinkedList<LinkedHashMap<String,Object>>();
+ /**
+ * Marks the currently active token in order to know where to add flags (might already have been taken away from token stack).
+ */
+ LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>();
+
+ private LinkedList<ArrayList<ArrayList<Object>>> distributedOperandsLists = new LinkedList<ArrayList<ArrayList<Object>>>();
+
+ /**
+ * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)}
+ */
+ LinkedList<Integer> objectsToPop = new LinkedList<Integer>();
+ Integer stackedObjects = 0;
+ public static boolean verbose = false;
+
+ /**
+ *
+ * @param tree The syntax tree as returned by ANTLR
+ * @param parser The ANTLR parser instance that generated the parse tree
+ */
+ public TreeTemplate(String query) {
+// prepareContext();
+ requestMap.put("@context", "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld");
+ try {
+ process(query);
+ } catch (QueryException e) {
+ e.printStackTrace();
+ }
+ System.out.println(">>> "+requestMap.get("query")+" <<<");
+ }
+
+ private void prepareContext() {
+ LinkedHashMap<String,Object> context = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> operands = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> relation = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> classMap = new LinkedHashMap<String,Object>();
+
+ operands.put("@id", "korap:operands");
+ operands.put("@container", "@list");
+
+ relation.put("@id", "korap:relation");
+ relation.put("@type", "korap:relation#types");
+
+ classMap.put("@id", "korap:class");
+ classMap.put("@type", "xsd:integer");
+
+ context.put("korap", "http://korap.ids-mannheim.de/ns/query");
+ context.put("@language", "de");
+ context.put("operands", operands);
+ context.put("relation", relation);
+ context.put("class", classMap);
+ context.put("query", "korap:query");
+ context.put("filter", "korap:filter");
+ context.put("meta", "korap:meta");
+
+ requestMap.put("@context", context);
+ }
+
+ @Override
+ public Map<String, Object> getRequestMap() {
+ return requestMap;
+ }
+
+ @Override
+ public void process(String query) throws QueryException {
+ ParseTree tree = parseAnnisQuery(query);
+ System.out.println("Processing Annis QL");
+ processNode(tree);
+ }
+
+ private void processNode(ParseTree node) {
+ // Top-down processing
+ if (visited.contains(node)) return;
+ else visited.add(node);
+
+ String nodeCat = getNodeCat(node);
+ openNodeCats.push(nodeCat);
+
+ stackedObjects = 0;
+
+ if (verbose) {
+ System.err.println(" "+objectStack);
+ System.out.println(openNodeCats);
+ }
+
+ /*
+ ****************************************************************
+ ****************************************************************
+ * Processing individual node categories *
+ ****************************************************************
+ ****************************************************************
+ */
+
+ objectsToPop.push(stackedObjects);
+
+ /*
+ ****************************************************************
+ ****************************************************************
+ * recursion until 'request' node (root of tree) is processed *
+ ****************************************************************
+ ****************************************************************
+ */
+ for (int i=0; i<node.getChildCount(); i++) {
+ ParseTree child = node.getChild(i);
+ processNode(child);
+ }
+
+
+ /*
+ **************************************************************
+ * Stuff that happens after processing the children of a node *
+ **************************************************************
+ */
+
+
+ for (int i=0; i<objectsToPop.pop(); i++) {
+ objectStack.pop();
+ }
+
+
+ openNodeCats.pop();
+
+ }
+
+// /**
+// * Returns the category (or 'label') of the root of a ParseTree.
+// * @param node
+// * @return
+// */
+// public String getNodeCat(ParseTree node) {
+// String nodeCat = node.toStringTree(qlParser);
+// Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+// Matcher m = p.matcher(node.toStringTree(qlParser));
+// if (m.find()) {
+// nodeCat = m.group(1);
+// }
+// return nodeCat;
+// }
+
+ @SuppressWarnings("unused")
+ private void putIntoSuperObject(LinkedHashMap<String, Object> object) {
+ putIntoSuperObject(object, 0);
+ }
+
+ @SuppressWarnings({ "unchecked" })
+ private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) {
+ if (distributedOperandsLists.size()>0) {
+ ArrayList<ArrayList<Object>> distributedOperands = distributedOperandsLists.pop();
+ for (ArrayList<Object> operands : distributedOperands) {
+ operands.add(object);
+ }
+ } else if (objectStack.size()>objStackPosition) {
+ ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands");
+ topObjectOperands.add(0, object);
+
+ } else {
+ requestMap.put("query", object);
+ }
+ }
+
+ private static ParserRuleContext parseAnnisQuery (String p) throws QueryException {
+ Lexer poliqarpLexer = new AqlLexer((CharStream)null);
+ ParserRuleContext tree = null;
+ // Like p. 111
+ try {
+
+ // Tokenize input data
+ ANTLRInputStream input = new ANTLRInputStream(p);
+ poliqarpLexer.setInputStream(input);
+ CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
+ qlParser = new AqlParser(tokens);
+
+ // Don't throw out erroneous stuff
+ qlParser.setErrorHandler(new BailErrorStrategy());
+ qlParser.removeErrorListeners();
+
+ // Get starting rule from parser
+ Method startRule = AqlParser.class.getMethod("start");
+ tree = (ParserRuleContext) startRule.invoke(qlParser, (Object[])null);
+ }
+
+ // Some things went wrong ...
+ catch (Exception e) {
+ System.err.println( e.getMessage() );
+ }
+
+ if (tree == null) {
+ throw new QueryException("Could not parse query. Make sure it is correct QL syntax.");
+ }
+
+ // Return the generated tree
+ return tree;
+ }
+
+ public static void main(String[] args) {
+ /*
+ * For testing
+ */
+ String[] queries = new String[] {
+ };
+ TreeTemplate.verbose=true;
+ for (String q : queries) {
+ try {
+ System.out.println(q);
+ System.out.println(TreeTemplate.parseAnnisQuery(q).toStringTree(TreeTemplate.qlParser));
+ @SuppressWarnings("unused")
+ TreeTemplate at = new TreeTemplate(q);
+// System.out.println(TreeTemplate.parseAnnisQuery(q).toStringTree(TreeTemplate.aqlParser));
+ System.out.println();
+
+ } catch (NullPointerException | QueryException npe) {
+ npe.printStackTrace();
+ }
+ }
+ }
+
+}
\ No newline at end of file