| package de.ids_mannheim.korap.query.serialize; |
| |
| import de.ids_mannheim.korap.query.object.ClassRefOp; |
| import de.ids_mannheim.korap.query.object.KoralFrame; |
| import de.ids_mannheim.korap.query.object.KoralMatchOperator; |
| import de.ids_mannheim.korap.query.object.KoralOperation; |
| import de.ids_mannheim.korap.query.object.KoralTermGroupRelation; |
| import de.ids_mannheim.korap.query.parse.poliqarpplus.PoliqarpPlusLexer; |
| import de.ids_mannheim.korap.query.parse.poliqarpplus.PoliqarpPlusParser; |
| import de.ids_mannheim.korap.query.serialize.util.Antlr4DescriptiveErrorListener; |
| import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator; |
| import de.ids_mannheim.korap.query.serialize.util.StatusCodes; |
| |
| import org.antlr.v4.runtime.*; |
| import org.antlr.v4.runtime.tree.ParseTree; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import java.lang.reflect.Method; |
| import java.util.*; |
| |
| /** |
| * Map representation of PoliqarpPlus syntax tree as returned by |
| * ANTLR. |
| * Most centrally, this class maintains a set of nested maps and |
| * lists which represent the JSON tree, which is built by the JSON |
| * serialiser on basis of the {@link #requestMap} at the root of |
| * the tree. <br/> |
| * The class further maintains a set of stacks which effectively |
| * keep track of which objects to embed in which containing |
| * objects. |
| * |
| * This class expects the Poliqarp+ ANTLR grammar shipped with Koral |
| * v0.3.0. |
| * |
| * @author Joachim Bingel (bingel@ids-mannheim.de) |
| * @author Eliza Margaretha (margaretha@ids-mannheim.de) |
| * @version 0.3.0 |
| * @since 0.1.0 |
| */ |
| public class PoliqarpPlusQueryProcessor extends Antlr4AbstractQueryProcessor { |
| |
| private static Logger log = LoggerFactory |
| .getLogger(PoliqarpPlusQueryProcessor.class); |
| private int classCounter = 1; |
| |
| Map<ParseTree, Integer> classWrapRegistry = new HashMap<ParseTree, Integer>(); |
| |
| |
| /** |
| * Constructor |
| * |
| * @param query |
| * The syntax tree as returned by ANTLR |
| */ |
| public PoliqarpPlusQueryProcessor (String query) { |
| KoralObjectGenerator.setQueryProcessor(this); |
| process(query); |
| log.info(">>> " + requestMap.get("query") + " <<<"); |
| } |
| |
| |
| @Override |
| public void process (String query) { |
| ParseTree tree; |
| tree = parsePoliqarpQuery(query); |
| // fixme: not required!? |
| super.parser = this.parser; |
| log.info("Processing PoliqarpPlus query: " + query); |
| if (tree != null) { |
| log.debug("ANTLR parse tree: " + tree.toStringTree(parser)); |
| processNode(tree); |
| } |
| else { |
| addError(StatusCodes.MALFORMED_QUERY, |
| "Could not parse query >>> " + query + " <<<."); |
| } |
| } |
| |
| |
| /** |
| * Recursively calls itself with the children of the currently |
| * active node, traversing the tree nodes in a top-down, |
| * depth-first fashion. A list is maintained that contains all |
| * visited nodes which have been directly addressed by their |
| * (grand-/grand-grand-/...) parent nodes, such that some |
| * processing time is saved, as these node will not be processed. |
| * This method is effectively a list of if-statements that are |
| * responsible for treating the different node types correctly and |
| * filling the respective maps/lists. |
| * |
| * @param node |
| * The currently processed node. The process(String |
| * query) method calls this method with the root. |
| */ |
| private void processNode (ParseTree node) { |
| // Top-down processing |
| if (visited.contains(node)) |
| return; |
| else |
| visited.add(node); |
| |
| String nodeCat = getNodeCat(node); |
| openNodeCats.push(nodeCat); |
| |
| stackedObjects = 0; |
| |
| if (verbose) { |
| System.err.println(" " + objectStack); |
| System.out.println(openNodeCats); |
| } |
| |
| // Check if (the translation of) this node is registered to be wrapped |
| // in a class, e.g. by an alignment operation |
| if (classWrapRegistry.containsKey(node)) { |
| Integer classId = classWrapRegistry.get(node); |
| Map<String, Object> spanClass = KoralObjectGenerator |
| .makeSpanClass(classId); |
| putIntoSuperObject(spanClass); |
| objectStack.push(spanClass); |
| stackedObjects++; |
| } |
| |
| /* |
| **************************************************************** |
| **************************************************************** |
| * Processing individual node categories * |
| **************************************************************** |
| **************************************************************** |
| */ |
| if (nodeCat.equals("segment")) { |
| processSegment(node); |
| } |
| |
| if (nodeCat.equals("sequence")) { |
| processSequence(node); |
| } |
| |
| if (nodeCat.equals("emptyTokenSequence")) { |
| processEmptyTokenSequence(node); |
| } |
| |
| if (nodeCat.equals("emptyTokenSequenceClass")) { |
| processEmptyTokenSequenceClass(node); |
| } |
| |
| if (nodeCat.equals("token")) { |
| processToken(node); |
| } |
| |
| if (nodeCat.equals("alignment")) { |
| processAlignment(node); |
| } |
| |
| if (nodeCat.equals("span")) { |
| processSpan(node); |
| } |
| |
| if (nodeCat.equals("disjunction")) { |
| processDisjunction(node); |
| } |
| |
| if (nodeCat.equals("position")) { |
| processPosition(node); |
| } |
| |
| if (nodeCat.equals("relation")) { |
| processRelation(node); |
| } |
| |
| if (nodeCat.equals("spanclass")) { |
| processSpanclass(node); |
| } |
| |
| if (nodeCat.equals("matching")) { |
| processMatching(node); |
| } |
| |
| if (nodeCat.equals("submatch")) { |
| processSubmatch(node); |
| } |
| |
| if (nodeCat.equals("meta")) { |
| processMeta(node); |
| } |
| |
| if (nodeCat.equals("within") |
| && !getNodeCat(node.getParent()).equals("position")) { |
| processWithin(node); |
| } |
| |
| objectsToPop.push(stackedObjects); |
| |
| /* |
| **************************************************************** |
| **************************************************************** |
| * Recursion until 'request' node (root of tree) is processed * |
| **************************************************************** |
| **************************************************************** |
| */ |
| for (int i = 0; i < node.getChildCount(); i++) { |
| ParseTree child = node.getChild(i); |
| processNode(child); |
| } |
| |
| // Stuff that happens when leaving a node (taking items off stacks) |
| for (int i = 0; i < objectsToPop.get(0); i++) { |
| objectStack.pop(); |
| } |
| objectsToPop.pop(); |
| openNodeCats.pop(); |
| } |
| |
| |
| /** |
| * Processes a 'segment' node. |
| * |
| * @param node |
| */ |
| private void processSegment (ParseTree node) { |
| // Cover possible quantification (i.e. repetition) of segment |
| ParseTree quantification = getFirstChildWithCat(node, "repetition"); |
| if (quantification != null) { |
| Map<String, Object> quantGroup = KoralObjectGenerator |
| .makeGroup(KoralOperation.REPETITION); |
| Integer[] minmax = parseRepetition(quantification); |
| quantGroup.put("boundary", |
| KoralObjectGenerator.makeBoundary(minmax[0], minmax[1])); |
| putIntoSuperObject(quantGroup); |
| objectStack.push(quantGroup); |
| stackedObjects++; |
| } |
| } |
| |
| |
| /** |
| * Process a 'sequence' node. |
| * |
| * @param node |
| */ |
| private void processSequence (ParseTree node) { |
| // skip in case of emptyTokenSequence or emptyTokenSequenceClass |
| if (node.getChildCount() == 1 && getNodeCat(node.getChild(0)) |
| .startsWith("emptyTokenSequence")) { |
| return; |
| } |
| // skip in case this sequence is just a container for an alignment |
| // node with just one child |
| if (node.getChildCount() == 1 |
| && getNodeCat(node.getChild(0)).equals("alignment")) { |
| ParseTree alignmentNode = node.getChild(0); |
| if (alignmentNode.getChildCount() == 2) { // one child is the |
| // alignment operator (^), the other a segment |
| return; |
| } |
| } |
| Map<String, Object> sequence = KoralObjectGenerator |
| .makeGroup(KoralOperation.SEQUENCE); |
| |
| putIntoSuperObject(sequence); |
| objectStack.push(sequence); |
| stackedObjects++; |
| } |
| |
| |
| @SuppressWarnings("unchecked") |
| /** |
| * empty tokens at beginning/end of sequence |
| * |
| * @param node |
| */ |
| private void processEmptyTokenSequence (ParseTree node) { |
| Integer[] minmax = parseEmptySegments(node); |
| // object will be either a repetition group or a single empty |
| // token |
| Map<String, Object> object; |
| Map<String, Object> emptyToken = KoralObjectGenerator |
| .makeToken(); |
| if (minmax[0] != 1 || minmax[1] == null || minmax[1] != 1) { |
| object = KoralObjectGenerator.makeRepetition(minmax[0], minmax[1]); |
| ((ArrayList<Object>) object.get("operands")).add(emptyToken); |
| } |
| else { |
| object = emptyToken; |
| } |
| putIntoSuperObject(object); |
| objectStack.push(object); |
| stackedObjects++; |
| } |
| |
| |
| private void processEmptyTokenSequenceClass (ParseTree node) { |
| int classId = 1; |
| if (hasChild(node, "spanclass_id")) { |
| classId = Integer.parseInt( |
| node.getChild(1).getChild(0).toStringTree(parser)); |
| } |
| Map<String, Object> classGroup = KoralObjectGenerator |
| .makeSpanClass(classId); |
| addHighlightClass(classId); |
| putIntoSuperObject(classGroup); |
| objectStack.push(classGroup); |
| stackedObjects++; |
| } |
| |
| |
| private void processToken (ParseTree node) { |
| Map<String, Object> token = KoralObjectGenerator.makeToken(); |
| // handle negation |
| List<ParseTree> negations = getChildrenWithCat(node, "!"); |
| int termOrTermGroupChildId = 1; |
| boolean negated = false; |
| boolean isRegex = false; |
| if (negations.size() % 2 == 1) { |
| negated = true; |
| termOrTermGroupChildId += negations.size(); |
| } |
| |
| if (getNodeCat(node.getChild(0)).equals("key")) { |
| // no 'term' child, but direct key specification: process here |
| Map<String, Object> term = KoralObjectGenerator |
| .makeTerm(); |
| |
| String key = node.getChild(0).getText(); |
| |
| if (getNodeCat(node.getChild(0).getChild(0)).equals("regex")) { |
| isRegex = true; |
| term.put("type", "type:regex"); |
| |
| // fixme: use stream with offset to get text! |
| // TokenStream stream = parser.getTokenStream(); |
| // key = stream.getText(node.getChild(0).getSourceInterval()); |
| key = key.substring(1, key.length() - 1); |
| } |
| term.put("layer", "orth"); |
| term.put("key", key); |
| KoralMatchOperator matches = negated ? KoralMatchOperator.NOT_EQUALS |
| : KoralMatchOperator.EQUALS; |
| term.put("match", matches.toString()); |
| ParseTree flagNode = getFirstChildWithCat(node, "flag"); |
| if (flagNode != null) { |
| ArrayList<String> flags = new ArrayList<String>(); |
| // substring removes leading slash '/' |
| String flag = getNodeCat(flagNode.getChild(0)).substring(1); |
| if (flag.contains("i")) |
| flags.add("flags:caseInsensitive"); |
| if (flag.contains("x")) { |
| term.put("type", "type:regex"); |
| if (!isRegex) { |
| key = QueryUtils.escapeRegexSpecialChars(key); |
| } |
| // overwrite key |
| term.put("key", ".*?" + key + ".*?"); |
| } |
| if (!flags.isEmpty()) { |
| term.put("flags", flags); |
| } |
| } |
| token.put("wrap", term); |
| } |
| else { |
| // child is 'term' or 'termGroup' -> process in extra method |
| Map<String, Object> termOrTermGroup = parseTermOrTermGroup( |
| node.getChild(termOrTermGroupChildId), negated); |
| token.put("wrap", termOrTermGroup); |
| } |
| putIntoSuperObject(token); |
| visited.addAll(getChildren(node)); |
| } |
| |
| |
| /** |
| * Processes an 'alignment' node. These nodes represent alignment |
| * anchors |
| * which introduce an alignment ruler in KWIC display. The |
| * serialization |
| * for this expects the two segments to the left and to the right |
| * of each |
| * anchor to be wrapped in classes, then these classes are |
| * referenced in |
| * the <tt>alignment</tt> array of the request tree. |
| * |
| * @param node |
| */ |
| private void processAlignment (ParseTree node) { |
| int i = 1; |
| if (node.getChild(0).getText().equals("^")) { |
| i = 0; // if there is no first child (anchor is at extreme left or |
| // right of segment), start counting at 0 in the loop |
| } |
| // for every alignment anchor, get its left and right child and register |
| // these to be wrapped in classes. |
| for (; i < node.getChildCount(); i += 2) { |
| int alignmentFirstArg = -1; |
| int alignmentSecondArg = -1; |
| ParseTree leftChild = node.getChild(i - 1); |
| ParseTree rightChild = node.getChild(i + 1); |
| if (leftChild != null) { |
| if (!classWrapRegistry.containsKey(leftChild)) { |
| alignmentFirstArg = classCounter++; |
| classWrapRegistry.put(leftChild, alignmentFirstArg); |
| } |
| else { |
| alignmentFirstArg = classWrapRegistry.get(leftChild); |
| } |
| } |
| if (rightChild != null) { |
| if (!classWrapRegistry.containsKey(rightChild)) { |
| alignmentSecondArg = classCounter++; |
| classWrapRegistry.put(rightChild, alignmentSecondArg); |
| } |
| else { |
| alignmentSecondArg = classWrapRegistry.get(rightChild); |
| } |
| } |
| addAlignment(alignmentFirstArg, alignmentSecondArg); |
| } |
| } |
| |
| |
| private void processSpan (ParseTree node) { |
| List<ParseTree> negations = getChildrenWithCat(node, "!"); |
| boolean negated = false; |
| if (negations.size() % 2 == 1) |
| negated = true; |
| Map<String, Object> span = KoralObjectGenerator.makeSpan(); |
| Map<String, Object> wrappedTerm = KoralObjectGenerator |
| .makeTerm(); |
| span.put("wrap", wrappedTerm); |
| ParseTree keyNode = getFirstChildWithCat(node, "key"); |
| ParseTree layerNode = getFirstChildWithCat(node, "layer"); |
| ParseTree foundryNode = getFirstChildWithCat(node, "foundry"); |
| ParseTree termOpNode = getFirstChildWithCat(node, "termOp"); |
| ParseTree termNode = getFirstChildWithCat(node, "term"); |
| ParseTree termGroupNode = getFirstChildWithCat(node, "termGroup"); |
| if (foundryNode != null) |
| wrappedTerm.put("foundry", foundryNode.getText()); |
| if (layerNode != null) { |
| String layer = layerNode.getText(); |
| if (layer.equals("base")) |
| layer = "lemma"; |
| wrappedTerm.put("layer", layer); |
| } |
| String key = keyNode.getText(); |
| // check if key is regular expression |
| if (hasChild(keyNode, "regex")) { |
| // remove leading/trailing double quotes |
| key = key.substring(1, key.length() - 1); |
| wrappedTerm.put("type", "type:regex"); |
| } |
| wrappedTerm.put("key", key); |
| if (termOpNode != null) { |
| String termOp = termOpNode.getText(); |
| if (termOp.equals("==")) |
| wrappedTerm.put("match", KoralMatchOperator.EQUALS.toString()); |
| else if (termOp.equals("!=")) |
| wrappedTerm.put("match", KoralMatchOperator.NOT_EQUALS.toString()); |
| } |
| if (termNode != null) { |
| Map<String, Object> termOrTermGroup = parseTermOrTermGroup( |
| termNode, negated, "span"); |
| span.put("attr", termOrTermGroup); |
| } |
| if (termGroupNode != null) { |
| Map<String, Object> termOrTermGroup = parseTermOrTermGroup( |
| termGroupNode, negated, "span"); |
| span.put("attr", termOrTermGroup); |
| } |
| putIntoSuperObject(span); |
| objectStack.push(span); |
| stackedObjects++; |
| } |
| |
| |
| private void processDisjunction (ParseTree node) { |
| Map<String, Object> disjunction = KoralObjectGenerator |
| .makeGroup(KoralOperation.DISJUNCTION); |
| putIntoSuperObject(disjunction); |
| objectStack.push(disjunction); |
| stackedObjects++; |
| } |
| |
| |
| private void processPosition (ParseTree node) { |
| Map<String, Object> position = parseFrame(node.getChild(0)); |
| putIntoSuperObject(position); |
| objectStack.push(position); |
| stackedObjects++; |
| } |
| |
| |
| private void processRelation (ParseTree node) { |
| Map<String, Object> relationGroup = KoralObjectGenerator |
| .makeGroup(KoralOperation.RELATION); |
| Map<String, Object> relation = KoralObjectGenerator |
| .makeRelation(); |
| Map<String, Object> term = KoralObjectGenerator.makeTerm(); |
| relationGroup.put("relation", relation); |
| relation.put("wrap", term); |
| if (node.getChild(0).getText().equals("dominates")) { |
| term.put("layer", "c"); |
| } |
| else if (node.getChild(0).getText().equals("dependency")) { |
| term.put("layer", "d"); |
| } |
| ParseTree relSpec = getFirstChildWithCat(node, "relSpec"); |
| ParseTree repetition = getFirstChildWithCat(node, "repetition"); |
| if (relSpec != null) { |
| ParseTree foundry = getFirstChildWithCat(relSpec, "foundry"); |
| ParseTree layer = getFirstChildWithCat(relSpec, "layer"); |
| ParseTree key = getFirstChildWithCat(relSpec, "key"); |
| if (foundry != null) |
| term.put("foundry", foundry.getText()); |
| if (layer != null) |
| term.put("layer", layer.getText()); |
| if (key != null) |
| term.put("key", key.getText()); |
| } |
| if (repetition != null) { |
| Integer[] minmax = parseRepetition(repetition); |
| relation.put("boundary", |
| KoralObjectGenerator.makeBoundary(minmax[0], minmax[1])); |
| } |
| putIntoSuperObject(relationGroup); |
| objectStack.push(relationGroup); |
| stackedObjects++; |
| } |
| |
| |
| private void processSpanclass (ParseTree node) { |
| // Step I: get info |
| int classId = 1; |
| if (getNodeCat(node.getChild(1)).equals("spanclass_id")) { |
| String ref = node.getChild(1).getChild(0).toStringTree(parser); |
| try { |
| classId = Integer.parseInt(ref); |
| } |
| catch (NumberFormatException e) { |
| String msg = "The specified class reference in the " |
| + "focus/split-Operator is not a number: " + ref; |
| classId = 0; |
| log.error(msg); |
| addError(StatusCodes.INVALID_CLASS_REFERENCE, msg); |
| } |
| } |
| Map<String, Object> classGroup = KoralObjectGenerator |
| .makeSpanClass(classId); |
| addHighlightClass(classId); |
| putIntoSuperObject(classGroup); |
| objectStack.push(classGroup); |
| stackedObjects++; |
| |
| } |
| |
| |
| private void processMatching (ParseTree node) { |
| // Step I: get info |
| ArrayList<Integer> classRefs = new ArrayList<Integer>(); |
| ClassRefOp classRefOp = null; |
| if (getNodeCat(node.getChild(2)).equals("spanclass_id")) { |
| ParseTree spanNode = node.getChild(2); |
| for (int i = 0; i < spanNode.getChildCount() - 1; i++) { |
| String ref = spanNode.getChild(i).getText(); |
| if (ref.equals("|") || ref.equals("&")) { |
| classRefOp = ref.equals("|") ? ClassRefOp.INTERSECTION : ClassRefOp.UNION; |
| } |
| else { |
| try { |
| int classRef = Integer.parseInt(ref); |
| classRefs.add(classRef); |
| } |
| catch (NumberFormatException e) { |
| String err = "The specified class reference in the " |
| + "shrink/split-Operator is not a number."; |
| addError(StatusCodes.INVALID_CLASS_REFERENCE, err); |
| } |
| } |
| } |
| } |
| else { |
| classRefs.add(1); // default |
| } |
| Map<String, Object> referenceGroup = KoralObjectGenerator |
| .makeReference(classRefs); |
| |
| String type = node.getChild(0).toStringTree(parser); |
| // Default is focus(), if deviating catch here |
| if (type.equals("split")) |
| referenceGroup.put("operation", "operation:split"); |
| if (classRefOp != null) { |
| referenceGroup.put("classRefOp", classRefOp.toString()); |
| } |
| ArrayList<Object> referenceOperands = new ArrayList<Object>(); |
| referenceGroup.put("operands", referenceOperands); |
| // Step II: decide where to put the group |
| putIntoSuperObject(referenceGroup); |
| objectStack.push(referenceGroup); |
| stackedObjects++; |
| visited.add(node.getChild(0)); |
| } |
| |
| |
| private void processSubmatch (ParseTree node) { |
| Map<String, Object> submatch = KoralObjectGenerator |
| .makeReference(null); |
| submatch.put("operands", new ArrayList<Object>()); |
| ParseTree startpos = getFirstChildWithCat(node, "startpos"); |
| ParseTree length = getFirstChildWithCat(node, "length"); |
| ArrayList<Integer> spanRef = new ArrayList<Integer>(); |
| spanRef.add(Integer.parseInt(startpos.getText())); |
| if (length != null) { |
| spanRef.add(Integer.parseInt(length.getText())); |
| } |
| submatch.put("spanRef", spanRef); |
| putIntoSuperObject(submatch); |
| objectStack.push(submatch); |
| stackedObjects++; |
| visited.add(node.getChild(0)); |
| } |
| |
| |
| /** |
| * Creates meta field in requestMap, later filled by terms |
| * |
| * @param node |
| */ |
| private void processMeta (ParseTree node) { |
| addWarning("You used the 'meta' keyword in a PoliqarpPlus query. This" |
| + " feature is currently not supported. Please use virtual " |
| + "collections to restrict documents by metadata."); |
| CollectionQueryProcessor cq = new CollectionQueryProcessor( |
| node.getChild(1).getText()); |
| requestMap.put("collection", cq.getRequestMap().get("collection")); |
| for (ParseTree child : getChildren(node)) { |
| visited.add(child); |
| } |
| } |
| |
| |
| @SuppressWarnings("unchecked") |
| private void processWithin (ParseTree node) { |
| ParseTree domainNode = node.getChild(1); |
| String domain = getNodeCat(domainNode); |
| Map<String, Object> span = KoralObjectGenerator |
| .makeSpan(domain); |
| Map<String, Object> queryObj = (Map<String, Object>) requestMap |
| .get("query"); |
| ArrayList<KoralFrame> frames = new ArrayList<KoralFrame>(); |
| frames.add(KoralFrame.IS_AROUND); |
| Map<String, Object> contains = KoralObjectGenerator |
| .makePosition(frames); |
| ArrayList<Object> operands = (ArrayList<Object>) contains |
| .get("operands"); |
| operands.add(span); |
| |
| operands.add(queryObj); |
| requestMap.put("query", contains); |
| visited.add(node.getChild(0)); |
| visited.add(node.getChild(1)); |
| } |
| |
| |
| /** |
| * Parses a repetition node |
| * |
| * @param node |
| * @return A two-element array, of which the first element is an |
| * int representing the minimal number of repetitions of |
| * the quantified element, and the second element |
| * representing the maximal number of repetitions |
| */ |
| private Integer[] parseRepetition (ParseTree node) { |
| Integer min = 0, max = 0; |
| boolean maxInfinite = false; |
| // (repetition) node can be of two types: 'kleene' or 'range' |
| ParseTree repetitionTypeNode = node.getChild(0); |
| String repetitionType = getNodeCat(repetitionTypeNode); |
| if (repetitionType.equals("kleene")) { |
| // kleene operators (+ and *) as well as optionality (?) |
| String kleeneOp = repetitionTypeNode.getText(); |
| if (kleeneOp.equals("*")) { |
| maxInfinite = true; |
| } |
| else if (kleeneOp.equals("+")) { |
| min = 1; |
| maxInfinite = true; |
| } |
| if (kleeneOp.equals("?")) { |
| max = 1; |
| } |
| } |
| else { |
| // Range node of form "{ min , max }" or "{ max }" or |
| // "{ , max }" or "{ min , }" |
| ParseTree minNode = getFirstChildWithCat(repetitionTypeNode, "min"); |
| ParseTree maxNode = getFirstChildWithCat(repetitionTypeNode, "max"); |
| if (maxNode != null) |
| max = Integer.parseInt(maxNode.getText()); |
| else |
| maxInfinite = true; |
| // min is optional: if not specified, min = max |
| if (minNode != null) |
| min = Integer.parseInt(minNode.getText()); |
| else if (hasChild(repetitionTypeNode, ",")) |
| min = 0; |
| else { |
| min = max; |
| // addWarning("Your query contains a segment of the form {n}, where n is some number. This expression is ambiguous. " + |
| // "It could mean a repetition (\"Repeat the previous element n times!\") or a word form that equals the number, "+ |
| // "enclosed by a \"class\" (which is denoted by braces like '{x}', see the documentation on classes)."+ |
| // "KorAP has by default interpreted the segment as a repetition statement. If you want to express the"+ |
| // "number as a word form inside a class, use the non-shorthand form {[orth=n]}."); |
| } |
| } |
| if (maxInfinite) { |
| max = null; |
| } |
| return new Integer[] { min, max }; |
| } |
| |
| |
| private Map<String, Object> parseFrame (ParseTree node) { |
| String operator = node.toStringTree(parser).toLowerCase(); |
| ArrayList<KoralFrame> frames = new ArrayList<KoralFrame>(); |
| switch (operator) { |
| case "contains": |
| frames.add(KoralFrame.IS_AROUND); |
| break; |
| case "matches": |
| frames.add(KoralFrame.MATCHES); |
| break; |
| case "startswith": |
| frames.add(KoralFrame.STARTS_WITH); |
| frames.add(KoralFrame.MATCHES); |
| break; |
| case "endswith": |
| frames.add(KoralFrame.ENDS_WITH); |
| frames.add(KoralFrame.MATCHES); |
| break; |
| case "overlaps": |
| frames.add(KoralFrame.OVERLAPS_LEFT); |
| frames.add(KoralFrame.OVERLAPS_RIGHT); |
| break; |
| } |
| return KoralObjectGenerator.makePosition(frames); |
| } |
| |
| |
| private Map<String, Object> parseTermOrTermGroup (ParseTree node, |
| boolean negated) { |
| return parseTermOrTermGroup(node, negated, "token"); |
| } |
| |
| |
| /** |
| * Parses a (term) or (termGroup) node |
| * |
| * @param node |
| * @param negatedGlobal |
| * Indicates whether the term/termGroup is globally |
| * negated, e.g. through a negation operator preceding |
| * the related token like "![base=foo]". Global |
| * negation affects the term's "match" parameter. |
| * @param mode |
| * 'token' or 'span' (tokens and spans are treated |
| * differently). |
| * @return A term or termGroup object, depending on input |
| */ |
| @SuppressWarnings("unchecked") |
| private Map<String, Object> parseTermOrTermGroup (ParseTree node, |
| boolean negatedGlobal, String mode) { |
| |
| String nodeCat = getNodeCat(node); |
| |
| if (nodeCat.equals("term")) { |
| |
| // Term is defined recursive with non-necessary brackets |
| if (getNodeCat(node.getChild(0)).equals("(")) { |
| return parseTermOrTermGroup(node.getChild(1), negatedGlobal, |
| mode); |
| }; |
| |
| String key = null; |
| Map<String, Object> term = KoralObjectGenerator |
| .makeTerm(); |
| // handle negation |
| boolean negated = negatedGlobal; |
| boolean isRegex = false; |
| List<ParseTree> negations = getChildrenWithCat(node, "!"); |
| if (negations.size() % 2 == 1) |
| negated = !negated; |
| // retrieve possible nodes |
| ParseTree keyNode = getFirstChildWithCat(node, "key"); |
| ParseTree valueNode = getFirstChildWithCat(node, "value"); |
| ParseTree layerNode = getFirstChildWithCat(node, "layer"); |
| ParseTree foundryNode = getFirstChildWithCat(node, "foundry"); |
| ParseTree termOpNode = getFirstChildWithCat(node, "termOp"); |
| ParseTree flagNode = getFirstChildWithCat(node, "flag"); |
| // process foundry |
| if (foundryNode != null) |
| term.put("foundry", foundryNode.getText()); |
| // process key: 'normal' or regex? |
| key = keyNode.getText(); |
| if (getNodeCat(keyNode.getChild(0)).equals("regex")) { |
| isRegex = true; |
| term.put("type", "type:regex"); |
| // remove leading and trailing quotes |
| key = key.substring(1, key.length() - 1); |
| } |
| if (mode.equals("span")) |
| term.put("value", key); |
| else |
| term.put("key", key); |
| // process layer: map "base" -> "lemma" |
| if (layerNode != null) { |
| String layer = layerNode.getText(); |
| if (mode.equals("span")) { |
| term.put("key", layer); |
| } |
| else if (mode.equals("token")) { |
| if (layer.equals("base")) { |
| layer = "lemma"; |
| } |
| else if (layer.equals("punct")) { |
| layer = "orth"; |
| // will override "type":"type:regex" |
| term.put("type", "type:punct"); |
| } |
| term.put("layer", layer); |
| } |
| } |
| // process value |
| if (valueNode != null) |
| term.put("value", valueNode.getText()); |
| // process operator ("match" property) |
| if (termOpNode != null) { |
| String termOp = termOpNode.getText(); |
| negated = termOp.contains("!") ? !negated : negated; |
| if (!negated) |
| term.put("match", "match:eq"); |
| else |
| term.put("match", "match:ne"); |
| } |
| // process possible flags |
| if (flagNode != null) { |
| ArrayList<String> flags = new ArrayList<String>(); |
| // substring removes leading slash |
| String flag = getNodeCat(flagNode.getChild(0)).substring(1); |
| |
| if (flag.contains("i")) |
| flags.add("flags:caseInsensitive"); |
| if (flag.contains("x")) { |
| if (!isRegex) { |
| key = QueryUtils.escapeRegexSpecialChars(key); |
| } |
| // flag 'x' allows submatches: |
| // overwrite key with appended .*? |
| term.put("key", ".*?" + key + ".*?"); // |
| term.put("type", "type:regex"); |
| } |
| if (!flags.isEmpty()) { |
| term.put("flags", flags); |
| } |
| } |
| return term; |
| } |
| else if (nodeCat.equals("termGroup")) { |
| |
| // TermGroup is defined recursive with non-necessary brackets |
| if (getNodeCat(node.getChild(0)).equals("(")) { |
| return parseTermOrTermGroup(node.getChild(1), negatedGlobal, |
| mode); |
| }; |
| |
| // For termGroups, establish a boolean relation between |
| // operands and recursively call this function with |
| // the term or termGroup operands |
| Map<String, Object> termGroup = null; |
| ParseTree leftOp = null; |
| ParseTree rightOp = null; |
| // check for leading/trailing parantheses |
| if (!getNodeCat(node.getChild(0)).equals("(")) |
| leftOp = node.getChild(0); |
| else |
| leftOp = node.getChild(1); |
| if (!getNodeCat(node.getChild(node.getChildCount() - 1)) |
| .equals(")")) |
| rightOp = node.getChild(node.getChildCount() - 1); |
| else |
| rightOp = node.getChild(node.getChildCount() - 2); |
| // establish boolean relation |
| ParseTree boolOp = getFirstChildWithCat(node, "boolOp"); |
| if (boolOp.getText().equals("&")) { |
| termGroup = KoralObjectGenerator |
| .makeTermGroup(KoralTermGroupRelation.AND); |
| } |
| else { |
| termGroup = KoralObjectGenerator |
| .makeTermGroup(KoralTermGroupRelation.OR); |
| } |
| ArrayList<Object> operands = (ArrayList<Object>) termGroup |
| .get("operands"); |
| // recursion with left/right operands |
| operands.add(parseTermOrTermGroup(leftOp, negatedGlobal, mode)); |
| operands.add(parseTermOrTermGroup(rightOp, negatedGlobal, mode)); |
| return termGroup; |
| } |
| return null; |
| } |
| |
| |
| /** |
| * Puts an object into the operands list of its governing (or |
| * "super") object which had been placed on the |
| * {@link #objectStack} before and is still on top of the stack. |
| * If this is the top object of the tree, it is put there instead |
| * of into some (non-existent) operand stack. |
| * |
| * @param object |
| * The object to be inserted |
| */ |
| private void putIntoSuperObject (Map<String, Object> object) { |
| putIntoSuperObject(object, 0); |
| } |
| |
| |
| /** |
| * Puts an object into the operands list of its governing (or |
| * "super") object which had been placed on the |
| * {@link #objectStack} before. If this is the top object of the |
| * tree, it is put there instead of into some (non-existent) |
| * operand stack. |
| * |
| * @param object |
| * The object to be inserted |
| * @param objStackPosition |
| * Indicated the position of the super object on the |
| * {@link #objectStack} (in case not the top element of |
| * the stack is the super object. |
| */ |
| @SuppressWarnings({ "unchecked" }) |
| private void putIntoSuperObject (Map<String, Object> object, |
| int objStackPosition) { |
| if (objectStack.size() > objStackPosition) { |
| ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack |
| .get(objStackPosition).get("operands"); |
| topObjectOperands.add(object); |
| } |
| else { |
| requestMap.put("query", object); |
| } |
| } |
| |
| |
| /** |
| * Parses the min and max attributes for a boundary object as |
| * defined in |
| * a distance node. |
| * |
| * @param distanceNode |
| * A node of category 'distance' |
| * @return An array of two fields, where the first is the min |
| * value and the |
| * second is the max value and may be null. |
| */ |
| private Integer[] parseDistance (ParseTree distanceNode) { |
| int emptyTokenSeqIndex = getNodeCat(distanceNode).equals("distance") ? 0 |
| : 2; |
| Integer[] minmax = parseEmptySegments( |
| distanceNode.getChild(emptyTokenSeqIndex)); |
| Integer min = minmax[0]; |
| Integer max = minmax[1]; |
| // min++; |
| // if (max != null) |
| // max++; |
| return new Integer[] { min, max }; |
| } |
| |
| |
| private Integer[] parseEmptySegments (ParseTree emptySegments) { |
| Integer min = 0; |
| Integer max = 0; |
| ParseTree child; |
| for (int i = 0; i < emptySegments.getChildCount(); i++) { |
| child = emptySegments.getChild(i); |
| ParseTree nextSibling = emptySegments.getChild(i + 1); |
| if (child.toStringTree(parser).equals("(emptyToken [ ])")) { |
| if (nextSibling != null |
| && getNodeCat(nextSibling).equals("repetition")) { |
| Integer[] minmax = parseRepetition(nextSibling); |
| min += minmax[0]; |
| if (minmax[1] != null) { |
| max += minmax[1]; |
| } |
| else { |
| max = null; |
| } |
| } |
| else { |
| min++; |
| max++; |
| } |
| } |
| } |
| // min = cropToMaxValue(min); |
| // max = cropToMaxValue(max); |
| return new Integer[] { min, max }; |
| } |
| |
| |
| private ParserRuleContext parsePoliqarpQuery (String query) { |
| Lexer lexer = new PoliqarpPlusLexer((CharStream) null); |
| ParserRuleContext tree = null; |
| Antlr4DescriptiveErrorListener errorListener = new Antlr4DescriptiveErrorListener( |
| query); |
| // Like p. 111 |
| try { |
| // Tokenize input data |
| ANTLRInputStream input = new ANTLRInputStream(query); |
| lexer.setInputStream(input); |
| CommonTokenStream tokens = new CommonTokenStream(lexer); |
| parser = new PoliqarpPlusParser(tokens); |
| |
| // Don't throw out erroneous stuff |
| parser.setErrorHandler(new BailErrorStrategy()); |
| lexer.removeErrorListeners(); |
| lexer.addErrorListener(errorListener); |
| parser.removeErrorListeners(); |
| parser.addErrorListener(errorListener); |
| |
| // Get starting rule from parser |
| Method startRule = PoliqarpPlusParser.class.getMethod("request"); |
| tree = (ParserRuleContext) startRule.invoke(parser, |
| (Object[]) null); |
| } |
| // Some things went wrong ... |
| catch (Exception e) { |
| log.error("Could not parse query. " |
| + "Please make sure it is well-formed."); |
| log.error(errorListener.generateFullErrorMsg().toString()); |
| addError(errorListener.generateFullErrorMsg()); |
| } |
| return tree; |
| } |
| } |