blob: 298d45d3e780878ef6c17d5dbbbfad1b13cfc897 [file] [log] [blame]
package de.ids_mannheim.korap.query.serialize;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import de.ids_mannheim.korap.query.parse.annis.AqlLexer;
import de.ids_mannheim.korap.query.parse.annis.AqlParser;
import de.ids_mannheim.korap.query.serialize.util.Antlr4DescriptiveErrorListener;
import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
/**
* Processor class for ANNIS QL queries. This class uses an ANTLR v4 grammar
* for query parsing, it therefore extends {@link Antlr4AbstractQueryProcessor}.
* The parser object is inherited from the parent class and instantiated in
* {@link #parseAnnisQuery(String)} as an {@link AqlParser}.
*
* @see http://annis-tools.org/aql.html
*
* @author Joachim Bingel (bingel@ids-mannheim.de)
* @version 0.1.0
* @since 0.1.0
*/
public class AnnisQueryProcessor extends Antlr4AbstractQueryProcessor {
private static Logger log = LoggerFactory.getLogger(AnnisQueryProcessor.class);
/**
* Flag that indicates whether token fields or meta fields are currently
* being processed
*/
boolean inMeta = false;
/**
* Keeps track of operands that are to be integrated into yet uncreated
* objects.
*/
LinkedList<LinkedHashMap<String,Object>> operandStack =
new LinkedList<LinkedHashMap<String,Object>>();
/**
* Keeps track of explicitly (by #-var definition) or implicitly (number
* as reference) introduced entities (for later reference by #-operator)
*/
Map<String, LinkedHashMap<String,Object>> nodeVariables =
new LinkedHashMap<String, LinkedHashMap<String,Object>>();
/**
* Keeps track of explicitly (by #-var definition) or implicitly (number
* as reference) introduced entities (for later reference by #-operator)
*/
Map<ParseTree, String> nodes2refs= new LinkedHashMap<ParseTree, String>();
/**
* Counter for variable definitions.
*/
Integer variableCount = 1;
/**
* Marks the currently active token in order to know where to add flags
* (might already have been taken away from token stack).
*/
LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>();
/**
* Keeps track of operands lists that are to be serialised in an inverted
* order (e.g. the IN() operator) compared to their AST representation.
*/
private LinkedList<ArrayList<Object>> invertedOperandsLists =
new LinkedList<ArrayList<Object>>();
/**
* Keeps track of operation:class numbers.
*/
int classCounter = 1;
/**
* Keeps track of numers of relations processed (important when dealing
* with multiple predications).
*/
int relationCounter = 0;
/**
* Keeps track of references to nodes that are operands of groups (e.g.
* tree relations). Those nodes appear on the top level of the parse tree
* but are to be integrated into the AqlTree at a later point (namely as
* operands of the respective group). Therefore, store references to these
* nodes here and exclude the operands from being written into the query
* map individually.
*/
private int totalRelationCount = 0;
/**
* Keeps a record of reference-class-mapping, i.e. which 'class' has been
* assigned to which #n reference. This is important when introducing
* korap:reference spans to refer back to previously established classes for
* entities.
*/
private LinkedHashMap<String, Integer> refClassMapping =
new LinkedHashMap<String, Integer>();
/**
* Keeps a record of unary relations on spans/tokens.
*/
private LinkedHashMap<String, ArrayList<ParseTree>> unaryRelations =
new LinkedHashMap<String, ArrayList<ParseTree>>();
/**
* Keeps track of the number of references to a node/token by means of #n.
* E.g. in the query <tt>tok="x" & tok="y" & tok="z" & #1 . #2 & #2 . #3</tt>,
* the 2nd token ("y") is referenced twice, the others once.
*/
private LinkedHashMap<String, Integer> nodeReferencesTotal =
new LinkedHashMap<String, Integer>();
/**
* Keeps track of the number of references to a node/token that have
* already been processed.
*/
private LinkedHashMap<String, Integer> nodeReferencesProcessed =
new LinkedHashMap<String, Integer>();
/**
* Keeps track of queued relations. Relations sometimes cannot be processed
* directly, namely in case it does not share any operands with the
* previous relation. Then wait until a relation with a shared operand has
* been processed.
*/
private LinkedList<ParseTree> queuedRelations = new LinkedList<ParseTree>();
/**
* For some objects, it may be decided in the initial scan
* ({@link #processAndTopExpr(ParseTree)} that they need to be wrapped in a
* class operation when retrieved later. This map stores this information.
* More precisely, it stores for every node in the tree which class ID its
* derived KoralQuery object will receive.
*/
private LinkedHashMap<ParseTree, Integer> objectsToWrapInClass =
new LinkedHashMap<ParseTree, Integer>();
public AnnisQueryProcessor(String query) {
KoralObjectGenerator.setQueryProcessor(this);
process(query);
}
@Override
public void process(String query) {
ParseTree tree = parseAnnisQuery(query);
if (this.parser != null) {
super.parser = this.parser;
} else {
throw new NullPointerException("Parser has not been instantiated!");
}
log.info("Processing Annis query: "+query);
if (tree != null) {
log.debug("ANTLR parse tree: "+tree.toStringTree(parser));
processNode(tree);
// Last check to see if all relations have left the queue
if (!queuedRelations.isEmpty()) {
ParseTree queued = queuedRelations.pop();
if (verbose) System.out.println("Taking off queue (last rel): "
+ queued.getText());
if (checkOperandsProcessedPreviously(queued)) {
processNode(queued);
} else {
addError(StatusCodes.UNBOUND_ANNIS_RELATION,
"The relation " +queued.getText()+
" is not bound to any other relations.");
requestMap.put("query",
new LinkedHashMap<String, Object>());
}
}
}
}
/**
* Traverses the parse tree by recursively calling itself, starting with
* the root node of the tree and calling itself with the children of its
* current node in a depth-first, left-to-right fashion. In each call,
* depending on the category of the current node, special processor
* methods for the respective node category are called to process the node.
* @param node The node currently visited in the parse tree traversal.
*/
private void processNode(ParseTree node) {
String nodeCat = getNodeCat(node);
// Top-down processing
if (visited.contains(node)) return;
openNodeCats.push(nodeCat);
stackedObjects = 0;
// Before doing anything else, check if any relations are queued
// and need to be processed first
if (nodeCat.equals("n_ary_linguistic_term")) {
if (!queuedRelations.isEmpty()) {
ParseTree queued = queuedRelations.getFirst();
if (checkOperandsProcessedPreviously(queued)) {
if (verbose) System.out.println("Taking off queue: "+
queued.getText());
queuedRelations.removeFirst();
processNode(queued);
}
}
}
if (verbose) {
System.err.println(" "+objectStack);
System.out.println(openNodeCats);
}
/*
****************************************************************
****************************************************************
* Processing individual node categories *
****************************************************************
****************************************************************
*/
if (nodeCat.equals("exprTop")) {
processExprTop(node);
}
if (nodeCat.equals("andTopExpr")) {
processAndTopExpr(node);
}
if (nodeCat.equals("n_ary_linguistic_term")) {
processN_ary_linguistic_term(node);
}
objectsToPop.push(stackedObjects);
/*
****************************************************************
****************************************************************
* recursion until 'request' node (root of tree) is processed *
****************************************************************
****************************************************************
*/
for (int i=0; i<node.getChildCount(); i++) {
ParseTree child = node.getChild(i);
processNode(child);
}
/*
**************************************************************
* Stuff that happens after processing the children of a node *
**************************************************************
*/
if (!objectsToPop.isEmpty()) {
for (int i=0; i<objectsToPop.pop(); i++) {
objectStack.pop();
}
}
openNodeCats.pop();
}
/**
* Processes an <tt>andTopExpr</tt> node. This is a child of the root
* and contains a set of expressions connected by logical conjunction.
* Several of these nodes are possibly connected via disjunction.
* @param node The current parse tree node (must be of category
* <tt>andTopExpr</tt>).
*/
private void processAndTopExpr(ParseTree node) {
// Before processing any child expr node, check if it has one or more
// "*ary_linguistic_term" nodes.
// Those nodes may use references to earlier established operand nodes.
// Those operand nodes are not to be included into the query map
// individually but naturally as operands of the relations/groups
// introduced by the node. For that purpose, this section mines all
// used references and stores them in a list for later reference.
for (ParseTree unaryTermNode :
getDescendantsWithCat(node, "unary_linguistic_term")) {
String ref = getNodeCat(unaryTermNode.getChild(0)).substring(1);
ArrayList<ParseTree> unaryTermsForRef = unaryRelations.get(ref);
if (unaryTermsForRef == null) unaryTermsForRef =
new ArrayList<ParseTree>();
unaryTermsForRef.add(unaryTermNode);
unaryRelations.put(ref, unaryTermsForRef);
}
for (ParseTree lingTermNode :
getDescendantsWithCat(node, "n_ary_linguistic_term")) {
for (ParseTree refOrNode :
getChildrenWithCat(lingTermNode, "refOrNode")) {
String refOrNodeString =
refOrNode.getChild(0).toStringTree(parser);
if (refOrNodeString.startsWith("#")) {
String ref = refOrNode.getChild(0).toStringTree(parser).
substring(1);
if (nodeReferencesTotal.containsKey(ref)) {
nodeReferencesTotal.put(ref,
nodeReferencesTotal.get(ref)+1);
} else {
nodeReferencesTotal.put(ref, 1);
nodeReferencesProcessed.put(ref, 0);
}
}
}
totalRelationCount++;
}
// Then, mine all object definitions.
for (ParseTree variableExprNode :
getDescendantsWithCat(node, "variableExpr")) {
String ref;
// might be a ref label rather than a counting number
ParseTree varDef =
getFirstChildWithCat(variableExprNode.getParent(),"varDef");
if (varDef != null) {
// remove trailing #
ref = varDef.getText().replaceFirst("#", "");
} else {
ref = variableCount.toString();
}
nodes2refs.put(variableExprNode, ref);
LinkedHashMap<String,Object> object =
processVariableExpr(variableExprNode);
nodeVariables.put(ref, object);
variableCount++;
// Check if this object definition is part of a "direct declaration
// relation", i.e. a relation which declares its operands directly
// rather than using references to earlier declared objects. These
// objects must still be available for later reference, handle this
// here. Direct declaration relation is present when grandparent is
// n_ary_linguistic_term node.
if (getNodeCat(variableExprNode.getParent().getParent()).
equals("n_ary_linguistic_term")) {
if (nodeReferencesTotal.containsKey(ref)) {
nodeReferencesTotal.put(ref,nodeReferencesTotal.get(ref)+1);
} else {
nodeReferencesTotal.put(ref, 1);
}
// This is important for later relations wrapping the present
// relation. If the object isn't registered as processed, it
// won't be available for referencing.
nodeReferencesProcessed.put(ref, 1);
// Register this node for latter wrapping in class.
if (nodeReferencesTotal.get(ref) > 1) {
refClassMapping.put(ref, classCounter+128);
objectsToWrapInClass.put(variableExprNode, 128+classCounter++);
}
}
}
}
private void processExprTop(ParseTree node) {
List<ParseTree> andTopExprs = getChildrenWithCat(node, "andTopExpr");
if (andTopExprs.size() > 1) {
LinkedHashMap<String, Object> topOr =
KoralObjectGenerator.makeGroup("disjunction");
requestMap.put("query", topOr);
objectStack.push(topOr);
}
}
@SuppressWarnings("unchecked")
private LinkedHashMap<String, Object> processVariableExpr(ParseTree node) {
// simplex word or complex assignment (like qname = textSpec)?
String firstChildNodeCat = getNodeCat(node.getChild(0));
LinkedHashMap<String, Object> object = null;
if (firstChildNodeCat.equals("node")) {
object = KoralObjectGenerator.makeSpan();
} else if (firstChildNodeCat.equals("tok")) {
object = KoralObjectGenerator.makeToken();
if (node.getChildCount() > 1) { // empty tokens do not wrap a term
LinkedHashMap<String, Object> term =
KoralObjectGenerator.makeTerm();
term.put("layer", "orth");
object.put("wrap", term);
}
} else if (firstChildNodeCat.equals("qName")) {
// Only (foundry/)?layer specified.
// May be token or span, depending on indicated layer!
// (e.g. cnx/cat=NP vs mate/pos=NN)
// TODO generalize the list below -> look up layers associated with
// tokens rather than spans somewhere
HashMap<String, Object> qNameParse =
parseQNameNode(node.getChild(0));
if (Arrays.asList(new String[]{"p", "lemma", "m", "orth"}).
contains(qNameParse.get("layer"))) {
object = KoralObjectGenerator.makeToken();
LinkedHashMap<String, Object> term =
KoralObjectGenerator.makeTerm();
object.put("wrap", term);
term.putAll(qNameParse);
} else {
object = KoralObjectGenerator.makeSpan();
object.putAll(qNameParse);
}
} else if (firstChildNodeCat.equals("textSpec")) {
object = KoralObjectGenerator.makeToken();
LinkedHashMap<String, Object> term =
KoralObjectGenerator.makeTerm();
object.put("wrap", term);
term.put("layer", "orth");
term.putAll(parseTextSpec(node.getChild(0)));
}
if (node.getChildCount() == 3) {
// (foundry/)?layer=key specification
if (object.get("@type").equals("korap:token")) {
HashMap<String, Object> term = (HashMap<String, Object>)
object.get("wrap");
term.putAll(parseTextSpec(node.getChild(2)));
term.put("match", parseMatchOperator(
getFirstChildWithCat(node, "eqOperator")));
} else {
object.putAll(parseTextSpec(node.getChild(2)));
object.put("match", parseMatchOperator(
getFirstChildWithCat(node, "eqOperator")));
}
}
// Check if there's a unary relation defined for this node
// If yes, parse and retrieve it and put it in the object.
String ref = nodes2refs.get(node);
if (unaryRelations.containsKey(ref)) {
ArrayList<ParseTree> unaryTermsForRef = unaryRelations.get(ref);
if (unaryTermsForRef.size() == 1) {
object.put("attr",
parseUnaryOperator(unaryTermsForRef.get(0)));
} else {
LinkedHashMap<String, Object> termGroup =
KoralObjectGenerator.makeTermGroup("and");
ArrayList<Object> operands = (ArrayList<Object>)
termGroup.get("operands");
for (ParseTree unaryTerm : unaryTermsForRef) {
operands.add(parseUnaryOperator(unaryTerm));
}
object.put("attr", termGroup);
}
}
if (object != null) {
// query: object only, no relation
if (totalRelationCount == 0) {
putIntoSuperObject(object);
}
ParseTree parentsFirstChild = node.getParent().getChild(0);
if (getNodeCat(parentsFirstChild).endsWith("#")) {
nodeVariables.put(getNodeCat(parentsFirstChild).
replaceAll("#", ""), object);
}
if (objectsToWrapInClass.containsKey(node)) {
int classId = objectsToWrapInClass.get(node);
object = KoralObjectGenerator.wrapInClass(object, classId);
}
}
return object;
}
/**
* Processes an operand node, creating a map for the operand containing
* all its information given in the node definition (referenced via '#').
* If this node has been referred to and used earlier, a reference is
* created in its place. The operand will be wrapped in a class group if
* necessary.
* @param operandNode The operand node of a relation, e.g. '#1'
* @return A map object with the appropriate KoralQuery representation
* of the operand
*/
private LinkedHashMap<String, Object> retrieveOperand(ParseTree operandNode) {
LinkedHashMap<String, Object> operand = null;
if (!getNodeCat(operandNode.getChild(0)).equals("variableExpr")) {
String ref =
operandNode.getChild(0).toStringTree(parser).substring(1);
operand = nodeVariables.get(ref);
if (nodeReferencesTotal.get(ref) > 1) {
if (nodeReferencesProcessed.get(ref)==0) {
refClassMapping.put(ref, classCounter+128);
operand = KoralObjectGenerator.
wrapInClass(operand, 128+classCounter++);
} else if (nodeReferencesProcessed.get(ref)>0 &&
nodeReferencesTotal.get(ref)>1) {
try {
operand = KoralObjectGenerator.wrapInReference(
operandStack.pop(), refClassMapping.get(ref));
} catch (NoSuchElementException e) {
operand = KoralObjectGenerator.makeReference(
refClassMapping.get(ref));
}
}
nodeReferencesProcessed.put(ref,
nodeReferencesProcessed.get(ref)+1);
}
} else {
operand = processVariableExpr(operandNode.getChild(0));
}
return operand;
}
/**
* @param node
* @return
*/
private boolean checkOperandsProcessedPreviously(ParseTree node) {
// We can assume two operands.
ParseTree operand1 = node.getChild(0);
ParseTree operand2 = node.getChild(2);
if (checkOperandProcessedPreviously(operand1) ||
checkOperandProcessedPreviously(operand2)) {
return true;
}
return false;
}
/**
* @param operand
* @return
*/
private boolean checkOperandProcessedPreviously(ParseTree operand) {
String operandRef = operand.getText();
if (operandRef.startsWith("#")) {
operandRef = operandRef.substring(1, operandRef.length());
if (nodeReferencesProcessed.get(operandRef) > 0) {
return true;
}
}
return false;
}
@SuppressWarnings("unchecked")
private void processN_ary_linguistic_term(ParseTree node) {
relationCounter++;
// Get operator and determine type of group (sequence/treeRelation/
// relation/...). It's possible in Annis QL to concatenate operatiors,
// so there may be several operators under one n_ary_linguistic_term
// node. Counter 'i' will iteratively point to all operator nodes
// (odd-numbered children) under this node.
for (int i=1; i<node.getChildCount(); i = i+2) {
ParseTree operandTree1 = node.getChild(i-1);
ParseTree operandTree2 = node.getChild(i+1);
String reltype = getNodeCat(node.getChild(i).getChild(0));
LinkedHashMap<String,Object> group = null;
ArrayList<Object> operands = null;
// make sure one of the operands has already been put into a
// relation (if this is not the 1st relation). If none of the
// operands has been ingested at a lower level (and is therefore
// unavailable for refrencing), queue this relation for later
// processing.
if (relationCounter != 1) {
if (! checkOperandsProcessedPreviously(node)) {
queuedRelations.add(node);
relationCounter--;
if (verbose) {
System.out.println("Adding to queue: "+node.getText());
}
objectsToPop.push(stackedObjects);
return;
}
}
// Retrieve operands.
LinkedHashMap<String, Object> operand1 =
retrieveOperand(operandTree1);
LinkedHashMap<String, Object> operand2 =
retrieveOperand(operandTree2);
// 'Proper' n_ary_linguistic_operators receive a considerably
// different serialisation than 'commonparent' and 'commonancestor'
// For the latter cases, a dummy span is introduced and declared as
// a span class that has a dominance relation towards the two
// operands, one after the other, thus resulting in two nested
// relations! A Poliqarp+ equivalent for A $ B would be
// contains(focus(1:contains({1:<>},A)), B).
// This is modeled here...
if (reltype.equals("commonparent") ||
reltype.equals("commonancestor")) {
// make an (outer) group and an inner group containing the dummy
// node or previous relations
group = KoralObjectGenerator.makeGroup("relation");
LinkedHashMap<String,Object> innerGroup =
KoralObjectGenerator.makeGroup("relation");
LinkedHashMap<String,Object> relation =
KoralObjectGenerator.makeRelation();
LinkedHashMap<String,Object> term =
KoralObjectGenerator.makeTerm();
term.put("layer", "c");
relation.put("wrap", term);
// commonancestor is an indirect commonparent relation
if (reltype.equals("commonancestor")) relation.put("boundary",
KoralObjectGenerator.makeBoundary(1, null));
group.put("relation", relation);
innerGroup.put("relation", relation);
// Get operands list before possible re-assignment of 'group'
// (see following 'if')
ArrayList<Object> outerOperands =
(ArrayList<Object>) group.get("operands");
ArrayList<Object> innerOperands =
(ArrayList<Object>) innerGroup.get("operands");
// for lowest level, add the underspecified node as first
// operand and wrap it in a class group
if (i == 1) {
innerOperands.add(KoralObjectGenerator.wrapInClass(
KoralObjectGenerator.makeSpan(), classCounter+128));
// add the first operand and wrap the whole group in a
// focusing reference
innerOperands.add(operand1);
innerGroup = KoralObjectGenerator.
wrapInReference(innerGroup, classCounter+128);
outerOperands.add(innerGroup);
} else {
outerOperands.add(operandStack.pop());
}
// Lookahead: if next operator is not commonparent or
// commonancestor, wrap in class for accessibility
if (i < node.getChildCount()-2 && !getNodeCat(
node.getChild(i+2).getChild(0)).startsWith("common")) {
operand2 = KoralObjectGenerator.wrapInClass(
operand2, ++classCounter+128);
}
outerOperands.add(operand2);
// Wrap in another reference object in case other relations
// are following
if (i < node.getChildCount()-2) {
group = KoralObjectGenerator.wrapInReference(
group, classCounter+128);
}
// All other n-ary linguistic relations have special 'relation'
// attributes defined in KoralQ. and can be handled more easily
} else {
LinkedHashMap<String, Object> operatorGroup =
parseOperatorNode(node.getChild(i).getChild(0));
String groupType;
try {
groupType = (String) operatorGroup.get("groupType");
} catch (ClassCastException | NullPointerException n) {
groupType = "relation";
}
if (groupType.equals("relation") ||
groupType.equals("treeRelation")) {
group = KoralObjectGenerator.makeGroup(groupType);
LinkedHashMap<String, Object> relation =
new LinkedHashMap<String, Object>();
putAllButGroupType(relation, operatorGroup);
group.put("relation", relation);
} else if (groupType.equals("sequence")) {
group = KoralObjectGenerator.makeGroup(groupType);
putAllButGroupType(group, operatorGroup);
} else if (groupType.equals("position")) {
group = new LinkedHashMap<String,Object>();
putAllButGroupType(group, operatorGroup);
}
// Get operands list before possible re-assignment of 'group'
// (see following 'if')
operands = (ArrayList<Object>) group.get("operands");
ParseTree leftChildSpec = getFirstChildWithCat(
node.getChild(i).getChild(0), "@l");
ParseTree rightChildSpec = getFirstChildWithCat(
node.getChild(i).getChild(0), "@r");
if (leftChildSpec != null || rightChildSpec != null) {
String frame = (leftChildSpec!=null) ?
"frames:startsWith" : "frames:endsWith";
LinkedHashMap<String,Object> positionGroup =
KoralObjectGenerator.
makePosition(new String[]{frame});
operand2 = KoralObjectGenerator.wrapInClass(
operand2, ++classCounter+128);
((ArrayList<Object>) positionGroup.get("operands")).
add(group);
((ArrayList<Object>) positionGroup.get("operands")).
add(KoralObjectGenerator.
makeReference(classCounter+128));
group = positionGroup;
}
// Wrap in reference object in case other relations follow
if (i < node.getChildCount()-2) {
group = KoralObjectGenerator.wrapInReference(
group, classCounter+128);
}
// Inject operands.
// -> Case distinction:
if (node.getChildCount()==3) {
// Things are easy when there's just one operator
// (thus 3 children incl. operands)...
if (operand1 != null) operands.add(operand1);
if (operand2 != null) operands.add(operand2);
} else {
// ... but things get a little more complicated here. The
// AST is of this form: (operand1 operator1 operand2
// operator2 operand3 operator3 ...), but we'll have
// to serialize it in a nested, binary way: (((operand1
// operator1 operand2) operator2 operand3) operator3 ...).
// The following code will do just that:
if (i == 1) {
// for the first operator, include both operands
if (operand1 != null) operands.add(operand1);
if (operand2 != null) operands.add(KoralObjectGenerator.
wrapInClass(operand2, 128+classCounter++));
// Don't put this into the super object directly but
// store on operandStack (because this group will have
// to be an operand of a subsequent operator)
operandStack.push(group);
// for all subsequent operators, only take 2nd operand
// (1st was already added by previous operator)
} else if (i < node.getChildCount()-2) {
// for all intermediate operators, include other
// previous groups and 2nd operand. Store this on the
// operandStack, too.
if (operand2 != null) operands.add(KoralObjectGenerator.
wrapInClass(operand2, 128+classCounter++));
operands.add(0, operandStack.pop());
operandStack.push(group);
} else if (i == node.getChildCount()-2) {
// This is the last operator. Include 2nd operand only
if (operand2 != null) operands.add(operand2);
}
}
}
// Final step: decide what to do with the 'group' object, depending
// on whether all relations have been processed
if (i == node.getChildCount()-2 &&
relationCounter == totalRelationCount) {
putIntoSuperObject(group);
if (!operandStack.isEmpty()) {
operands.add(0, operandStack.pop());
}
objectStack.push(group);
stackedObjects++;
} else {
operandStack.push(group);
}
}
}
/**
* Parses a unary_linguistic_operator node. Possible operators are:
* root, arity, tokenarity. Operators are embedded into a korap:term,
* in turn wrapped by an 'attr' property in a korap:span.
* @param node The unary_linguistic_operator node
* @return A map containing the attr key, to be inserted into korap:span
*/
private LinkedHashMap<String, Object> parseUnaryOperator(ParseTree node) {
LinkedHashMap<String, Object> term = KoralObjectGenerator.makeTerm();
String op = node.getChild(1).toStringTree(parser).substring(1);
if (op.equals("arity") || op.equals("tokenarity")) {
LinkedHashMap<String, Object> boundary =
boundaryFromRangeSpec(node.getChild(3), false);
term.put(op, boundary);
} else {
term.put(op, true);
}
return term;
}
@SuppressWarnings("unchecked")
private LinkedHashMap<String, Object> parseOperatorNode(
ParseTree operatorNode) {
LinkedHashMap<String, Object> relation = null;
String operator = getNodeCat(operatorNode);
// DOMINANCE
if (operator.equals("dominance")) {
relation = KoralObjectGenerator.makeRelation();
relation.put("groupType", "relation");
ParseTree qName = getFirstChildWithCat(
operatorNode, "qName");
ParseTree edgeSpecNode = getFirstChildWithCat(
operatorNode, "edgeSpec");
ParseTree star = getFirstChildWithCat(operatorNode, "*");
ParseTree rangeSpec = getFirstChildWithCat(
operatorNode, "rangeSpec");
LinkedHashMap<String,Object> term= KoralObjectGenerator.makeTerm();
term.put("layer", "c");
if (qName != null) term = parseQNameNode(qName);
if (edgeSpecNode != null) {
LinkedHashMap<String,Object> edgeSpec =
parseEdgeSpec(edgeSpecNode);
String edgeSpecType = (String) edgeSpec.get("@type");
if (edgeSpecType.equals("korap:termGroup")) {
((ArrayList<Object>) edgeSpec.get("operands")).add(term);
term = edgeSpec;
} else {
term = KoralObjectGenerator.makeTermGroup("and");
ArrayList<Object> termGroupOperands =
(ArrayList<Object>) term.get("operands");
termGroupOperands.add(edgeSpec);
LinkedHashMap<String,Object> constTerm =
KoralObjectGenerator.makeTerm();
constTerm.put("layer", "c");
termGroupOperands.add(constTerm);
}
}
if (star != null) relation.put("boundary",
KoralObjectGenerator.makeBoundary(0, null));
if (rangeSpec != null) relation.put("boundary",
boundaryFromRangeSpec(rangeSpec));
relation.put("wrap", term);
}
else if (operator.equals("pointing")) {
relation = KoralObjectGenerator.makeRelation();
relation.put("groupType", "relation");
ParseTree qName = getFirstChildWithCat(operatorNode, "qName");
ParseTree edgeSpec =
getFirstChildWithCat(operatorNode, "edgeSpec");
ParseTree star = getFirstChildWithCat(operatorNode, "*");
ParseTree rangeSpec =
getFirstChildWithCat(operatorNode, "rangeSpec");
LinkedHashMap<String,Object> term= KoralObjectGenerator.makeTerm();
if (qName != null) term.putAll(parseQNameNode(qName));
if (edgeSpec != null) term.putAll(parseEdgeSpec(edgeSpec));
if (star != null) relation.put("boundary",
KoralObjectGenerator.makeBoundary(0, null));
if (rangeSpec != null) relation.put("boundary",
boundaryFromRangeSpec(rangeSpec));
relation.put("wrap", term);
}
else if (operator.equals("precedence")) {
relation = new LinkedHashMap<String, Object>();
relation.put("groupType", "sequence");
ParseTree rangeSpec =
getFirstChildWithCat(operatorNode, "rangeSpec");
ParseTree star = getFirstChildWithCat(operatorNode, "*");
ArrayList<Object> distances = new ArrayList<Object>();
if (star != null) {
distances.add(KoralObjectGenerator.
makeDistance("w", 0, null));
relation.put("distances", distances);
}
if (rangeSpec != null) {
distances.add(parseDistance(rangeSpec));
relation.put("distances", distances);
}
relation.put("inOrder", true);
}
else if (operator.equals("spanrelation")) {
String reltype = operatorNode.getChild(0).toStringTree(parser);
String[] frames = new String[]{};
switch (reltype) {
case "_=_":
frames = new String[]{"frames:matches"};
break;
case "_l_":
frames = new String[]{"frames:startsWith",
"frames:matches"};
break;
case "_r_":
frames = new String[]{"frames:endsWith",
"frames:matches"};
break;
case "_i_":
frames = new String[]{"frames:isAround"};
break;
case "_o_":
frames = new String[]{"frames:overlapsLeft",
"frames:overlapsRight"};
break;
case "_ol_":
frames = new String[]{"frames:overlapsLeft"};
break;
case "_or_":
frames = new String[]{"frames:overlapsRight"};
break;
}
relation = KoralObjectGenerator.makePosition(frames);
relation.put("groupType", "position");
}
else if (operator.equals("near")) {
relation = new LinkedHashMap<String, Object>();
relation.put("groupType", "sequence");
ParseTree rangeSpec =
getFirstChildWithCat(operatorNode, "rangeSpec");
ParseTree star = getFirstChildWithCat(operatorNode, "*");
ArrayList<Object> distances = new ArrayList<Object>();
if (star != null) {
distances.add(KoralObjectGenerator.
makeDistance("w", 0, null));
relation.put("distances", distances);
}
if (rangeSpec != null) {
distances.add(parseDistance(rangeSpec));
relation.put("distances", distances);
}
relation.put("inOrder", false);
}
else if (operator.equals("identity")) {
//TODO since ANNIS v. 3.1.6
}
else if (operator.equals("equalvalue")) {
//TODO since ANNIS v. 3.1.6
}
else if (operator.equals("notequalvalue")) {
//TODO since ANNIS v. 3.1.6
}
return relation;
}
@SuppressWarnings("unchecked")
private LinkedHashMap<String,Object> parseEdgeSpec(ParseTree edgeSpec) {
List<ParseTree> annos = getChildrenWithCat(edgeSpec, "edgeAnno");
if (annos.size() == 1) return parseEdgeAnno(annos.get(0));
else {
LinkedHashMap<String,Object> termGroup =
KoralObjectGenerator.makeTermGroup("and");
ArrayList<Object> operands =
(ArrayList<Object>) termGroup.get("operands");
for (ParseTree anno : annos) {
operands.add(parseEdgeAnno(anno));
}
return termGroup;
}
}
private LinkedHashMap<String, Object> parseEdgeAnno(
ParseTree edgeAnnoSpec) {
LinkedHashMap<String, Object> edgeAnno =
new LinkedHashMap<String, Object>();
edgeAnno.put("@type", "korap:term");
ParseTree textSpecNode= getFirstChildWithCat(edgeAnnoSpec, "textSpec");
ParseTree layerNode = getFirstChildWithCat(edgeAnnoSpec, "layer");
ParseTree foundryNode = getFirstChildWithCat(edgeAnnoSpec, "foundry");
ParseTree matchOperatorNode =
getFirstChildWithCat(edgeAnnoSpec, "eqOperator");
if (foundryNode!=null) edgeAnno.put("foundry",
foundryNode.getChild(0).toStringTree(parser));
if (layerNode!=null) edgeAnno.put("layer",
layerNode.getChild(0).toStringTree(parser));
edgeAnno.putAll(parseTextSpec(textSpecNode));
edgeAnno.put("match", parseMatchOperator(matchOperatorNode));
return edgeAnno;
}
private LinkedHashMap<String, Object> boundaryFromRangeSpec(
ParseTree rangeSpec) {
return boundaryFromRangeSpec(rangeSpec, true);
}
private LinkedHashMap<String, Object> boundaryFromRangeSpec(
ParseTree rangeSpec, boolean expandToMax) {
Integer min = Integer.parseInt(
rangeSpec.getChild(0).toStringTree(parser));
Integer max = min;
if (expandToMax) max = null;
if (rangeSpec.getChildCount()==3)
max = Integer.parseInt(
rangeSpec.getChild(2).toStringTree(parser));
return KoralObjectGenerator.makeBoundary(min, max);
}
private LinkedHashMap<String, Object> parseDistance(ParseTree rangeSpec) {
Integer min =
Integer.parseInt(rangeSpec.getChild(0).toStringTree(parser));
Integer max = null;
if (rangeSpec.getChildCount()==3)
max = Integer.parseInt(rangeSpec.getChild(2).toStringTree(parser));
return KoralObjectGenerator.makeDistance("w", min, max);
}
private LinkedHashMap<String, Object> parseTextSpec(ParseTree node) {
LinkedHashMap<String, Object> term = new LinkedHashMap<String, Object>();
if (hasChild(node, "regex")) {
term.put("type", "type:regex");
term.put("key", node.getChild(0).getChild(0).toStringTree(parser).
replaceAll("/", ""));
} else {
term.put("key", node.getChild(1).toStringTree(parser));
}
term.put("match", "match:eq");
return term;
}
/**
* Parses the match operator (= or !=)
* @param node
* @return
*/
private String parseMatchOperator(ParseTree node) {
if (node.getChildCount()>0) {
return node.getChild(0).getText().equals("=") ?
"match:eq" : "match:ne";
}
return "match:eq";
}
private LinkedHashMap<String, Object> parseQNameNode(ParseTree node) {
LinkedHashMap<String, Object> fields =
new LinkedHashMap<String, Object>();
ParseTree layerNode = getFirstChildWithCat(node, "layer");
ParseTree foundryNode = getFirstChildWithCat(node, "foundry");
if (foundryNode != null) fields.put("foundry",
foundryNode.getChild(0).toStringTree(parser));
String layer = layerNode.getChild(0).toStringTree(parser);
if (layer.equals("pos")) layer = "p";
if (layer.equals("cat")) layer = "c";
fields.put("layer", layer);
return fields;
}
private void putIntoSuperObject(LinkedHashMap<String, Object> object) {
putIntoSuperObject(object, 0);
}
@SuppressWarnings({ "unchecked" })
private void putIntoSuperObject(
LinkedHashMap<String, Object> object, int objStackPosition) {
if (objectStack.size()>objStackPosition) {
ArrayList<Object> topObjectOperands = (ArrayList<Object>)
objectStack.get(objStackPosition).get("operands");
if (!invertedOperandsLists.contains(topObjectOperands)) {
topObjectOperands.add(object);
} else {
topObjectOperands.add(0, object);
}
} else {
requestMap.put("query", object);
}
}
private void putAllButGroupType(
Map<String, Object> container, Map<String, Object> input) {
for (String key : input.keySet()) {
if (!key.equals("groupType")) {
container.put(key, input.get(key));
}
}
}
private ParserRuleContext parseAnnisQuery (String query) {
Lexer lexer = new AqlLexer((CharStream)null);
ParserRuleContext tree = null;
Antlr4DescriptiveErrorListener errorListener =
new Antlr4DescriptiveErrorListener(query);
// Like p. 111
try {
// Tokenize input data
ANTLRInputStream input = new ANTLRInputStream(query);
lexer.setInputStream(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
parser = new AqlParser(tokens);
// Don't throw out erroneous stuff
parser.setErrorHandler(new BailErrorStrategy());
lexer.removeErrorListeners();
lexer.addErrorListener(errorListener);
parser.removeErrorListeners();
parser.addErrorListener(errorListener);
// Get starting rule from parser
Method startRule = AqlParser.class.getMethod("start");
tree = (ParserRuleContext)startRule.invoke(parser, (Object[])null);
}
// Some things went wrong ...
catch (Exception e) {
log.error("Could not parse query. "
+ "Please make sure it is well-formed.");
log.error(errorListener.generateFullErrorMsg().toString());
addError(errorListener.generateFullErrorMsg());
}
return tree;
}
}