alignment according to new spec
Change-Id: Ia612e5ce238afabc4ed94424ecfdb322e50ded5b
diff --git a/.classpath b/.classpath
new file mode 100644
index 0000000..9c865fb
--- /dev/null
+++ b/.classpath
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" output="target/classes" path="src/main/java">
+ <attributes>
+ <attribute name="optional" value="true"/>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="src" output="target/test-classes" path="src/test/java">
+ <attributes>
+ <attribute name="optional" value="true"/>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/.project b/.project
new file mode 100644
index 0000000..53c51ae
--- /dev/null
+++ b/.project
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>Koral</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.m2e.core.maven2Builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ <nature>org.eclipse.m2e.core.maven2Nature</nature>
+ </natures>
+</projectDescription>
diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..ec4300d
--- /dev/null
+++ b/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.7
diff --git a/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4 b/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
index 4137f02..9e9c595 100644
--- a/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
+++ b/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
@@ -150,7 +150,7 @@
;
alignment
-: CARET (segment|sequence)
+: segment? (CARET segment)* CARET?
;
disjunction
@@ -194,15 +194,18 @@
| LRPAREN segment RRPAREN
)
repetition?
-;
+ ;
sequence
: segment* (emptyTokenSequence|emptyTokenSequenceClass) // ordering important! this subrule must precede any 'distance'-subrules to give precedence to repetition-interpretation of numbers in braces (could be mistaken for number tokens in spanclass), e.g. {2}.
| (emptyTokenSequence|emptyTokenSequenceClass) (segment+ | sequence) (emptyTokenSequence|emptyTokenSequenceClass)?
+| alignment segment* // give precedence to this subrule over the next to make sure preceding segments come into 'alignment'
+| segment+ alignment segment*
| segment segment+
| segment (distance|emptyTokenSequenceClass) segment
| segment (distance|emptyTokenSequenceClass)? sequence
-| segment+ alignment
+
+//| alignment (segment|sequence) alignment?
;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractQueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractQueryProcessor.java
index d53cb14..5c15114 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractQueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractQueryProcessor.java
@@ -84,6 +84,11 @@
* Indicates which classes are to be highlighted in KWIC view.
*/
private ArrayList<Integer> highlightClasses = new ArrayList<Integer>();
+
+ /**
+ * Indicates positions of alignment rulers in KWIC view.
+ */
+ private ArrayList<List<Integer>> alignments = new ArrayList<List<Integer>>();
AbstractQueryProcessor() {
requestMap.put("@context",
@@ -168,6 +173,12 @@
highlightClasses.add(classId);
meta.put("highlight", highlightClasses);
}
+
+ public void addAlignment(int leftClassId, int rightClassId) {
+ List<Integer> alignment = Arrays.asList(new Integer[]{leftClassId, rightClassId});
+ alignments.add(alignment);
+ meta.put("alignment", alignments);
+ }
/**
* Getter method for the {@link #requestMap}, which represents the
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessor.java
index eab29f8..f905beb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessor.java
@@ -15,7 +15,16 @@
import java.util.*;
/**
- * Map representation of PoliqarpPlus syntax tree as returned by ANTLR
+ * Map representation of PoliqarpPlus syntax tree as returned by ANTLR.
+ * Most centrally, this class maintains a set of nested maps and
+ * lists which represent the JSON tree, which is built by the JSON
+ * serialiser on basis of the {@link #requestMap} at the root of
+ * the tree. <br/>
+ * The class further maintains a set of stacks which effectively
+ * keep track of which objects to embed in which containing
+ * objects.
+ *
+ * This class expects the Poliqarp+ ANTLR grammar shipped with Koral v0.3.0.
*
* @author Joachim Bingel (bingel@ids-mannheim.de)
* @version 0.3.0
@@ -27,15 +36,10 @@
.getLogger(PoliqarpPlusQueryProcessor.class);
private int classCounter = 1;
+ LinkedHashMap<ParseTree, Integer> classWrapRegistry = new LinkedHashMap<ParseTree, Integer>();
+
/**
- * Most centrally, this class maintains a set of nested maps and
- * lists which represent the JSON tree, which is built by the JSON
- * serialiser on basis of the {@link #requestMap} at the root of
- * the tree. <br/>
- * The class further maintains a set of stacks which effectively
- * keep track of which objects to embed in which containing
- * objects.
- *
+ * Constructor
* @param query
* The syntax tree as returned by ANTLR
* @throws QueryException
@@ -95,6 +99,17 @@
System.out.println(openNodeCats);
}
+ // Check if (the translation of) this node is registered to be wrapped
+ // in a class, e.g. by an alignment operation
+ if (classWrapRegistry.containsKey(node)) {
+ Integer classId = classWrapRegistry.get(node);
+ LinkedHashMap<String, Object> spanClass =
+ KoralObjectGenerator.makeSpanClass(classId);
+ putIntoSuperObject(spanClass);
+ objectStack.push(spanClass);
+ stackedObjects++;
+ }
+
/*
****************************************************************
****************************************************************
@@ -158,12 +173,6 @@
processMeta(node);
}
- // if (nodeCat.equals("term") || nodeCat.equals("termGroup"))
- // {
- // if (inMeta ) putIntoSuperObject(parseTermOrTermGroup(node,
- // false));
- // }
-
if (nodeCat.equals("within")
&& !getNodeCat(node.getParent()).equals("position")) {
processWithin(node);
@@ -191,6 +200,10 @@
openNodeCats.pop();
}
+ /**
+ * Processes a 'segment' node.
+ * @param node
+ */
private void processSegment(ParseTree node) {
// Cover possible quantification (i.e. repetition) of segment
ParseTree quantification = getFirstChildWithCat(node, "repetition");
@@ -206,6 +219,10 @@
}
}
+ /**
+ * Process a 'sequence' node.
+ * @param node
+ */
private void processSequence(ParseTree node) {
// skip in case of emptyTokenSequence or emptyTokenSequenceClass
if (node.getChildCount() == 1
@@ -213,6 +230,17 @@
.startsWith("emptyTokenSequence")) {
return;
}
+ // skip in case this sequence is just a container for an alignment
+ // node with just one child
+ if (node.getChildCount() == 1
+ && getNodeCat(node.getChild(0))
+ .equals("alignment")) {
+ ParseTree alignmentNode = node.getChild(0);
+ if (alignmentNode.getChildCount() == 2) { // one child is the
+ // alignment operator (^), the other a segment
+ return;
+ }
+ }
LinkedHashMap<String, Object> sequence = KoralObjectGenerator
.makeGroup("sequence");
ParseTree distanceNode = getFirstChildWithCat(node, "distance");
@@ -328,30 +356,47 @@
visited.addAll(getChildren(node));
}
- @SuppressWarnings("unchecked")
+ /**
+ * Processes an 'alignment' node. These nodes represent alignment anchors
+ * which introduce an alignment ruler in KWIC display. The serialization
+ * for this expects the two segments to the left and to the right of each
+ * anchor to be wrapped in classes, then these classes are referenced in
+ * the <tt>alignment</tt> array of the request tree.
+ * @param node
+ */
private void processAlignment(ParseTree node) {
- LinkedHashMap<String, Object> alignClass = KoralObjectGenerator
- .makeSpanClass(classCounter);
- LinkedHashMap<String, Object> metaMap =
- (LinkedHashMap<String, Object>) requestMap.get("meta");
- if (metaMap.containsKey("alignment")) {
- ArrayList<Integer> alignedClasses = new ArrayList<Integer>();
- try {
- alignedClasses = (ArrayList<Integer>) metaMap.get("alignment");
- }
- catch (ClassCastException cce) {
- alignedClasses.add((Integer) metaMap.get("alignment"));
- }
- alignedClasses.add(classCounter);
- metaMap.put("alignment", alignedClasses);
+ int i=1;
+ if (node.getChild(0).getText().equals("^")) {
+ i = 0; // if there is no first child (anchor is at extreme left or
+ // right of segment), start counting at 0 in the loop
}
- else {
- metaMap.put("alignment", classCounter);
+ // for every alignment anchor, get its left and right child and register
+ // these to be wrapped in classes.
+ for (; i<node.getChildCount(); i+=2) {
+ int alignmentFirstArg = -1;
+ int alignmentSecondArg = -1;
+ ParseTree leftChild = node.getChild(i-1);
+ ParseTree rightChild = node.getChild(i+1);
+ if (leftChild != null) {
+ System.out.println(leftChild.getText());
+ if (! classWrapRegistry.containsKey(leftChild)) {
+ alignmentFirstArg = classCounter++;
+ classWrapRegistry.put(leftChild, alignmentFirstArg);
+ } else {
+ alignmentFirstArg = classWrapRegistry.get(leftChild);
+ }
+ }
+ if (rightChild != null) {
+ System.out.println(rightChild.getText());
+ if (! classWrapRegistry.containsKey(rightChild)) {
+ alignmentSecondArg = classCounter++;
+ classWrapRegistry.put(rightChild, alignmentSecondArg);
+ } else {
+ alignmentSecondArg = classWrapRegistry.get(rightChild);
+ }
+ }
+ addAlignment(alignmentFirstArg, alignmentSecondArg);
}
- classCounter++;
- putIntoSuperObject(alignClass);
- objectStack.push(alignClass);
- stackedObjects++;
}
private void processSpan(ParseTree node) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/util/KoralObjectGenerator.java b/src/main/java/de/ids_mannheim/korap/query/serialize/util/KoralObjectGenerator.java
index 5550d51..34a4266 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/util/KoralObjectGenerator.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/util/KoralObjectGenerator.java
@@ -98,8 +98,8 @@
return group;
}
- public static LinkedHashMap<String, Object> makeSpanClass(int classCount) {
- return makeSpanClass(classCount, false);
+ public static LinkedHashMap<String, Object> makeSpanClass(int classId) {
+ return makeSpanClass(classId, false);
}
@Deprecated
diff --git a/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java
index 8f7a77a..7ce445d 100644
--- a/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java
@@ -1243,21 +1243,27 @@
qs.setQuery(query, "poliqarpplus");
res = mapper.readTree(qs.toJSON());
assertEquals("operation:sequence", res.at("/query/operation").asText());
- assertEquals("der", res.at("/query/operands/0/wrap/key").asText());
+ assertEquals("der", res.at("/query/operands/0/operands/0/wrap/key").asText());
+ assertEquals(1, res.at("/query/operands/0/classOut").asInt());
+ assertEquals("Mann", res.at("/query/operands/1/operands/0/wrap/key").asText());
assertEquals("operation:class", res.at("/query/operands/1/operation").asText());
- assertEquals(1, res.at("/query/operands/1/classOut").asInt());
- assertEquals(1, res.at("/meta/alignment").asInt());
+ assertEquals(2, res.at("/query/operands/1/classOut").asInt());
+ assertEquals(1, res.at("/meta/alignment/0/0").asInt());
+ assertEquals(2, res.at("/meta/alignment/0/1").asInt());
query = "[orth=der]^[orth=große][orth=Mann]";
qs.setQuery(query, "poliqarpplus");
res = mapper.readTree(qs.toJSON());
- assertEquals("operation:sequence", res.at("/query/operation").asText());
- assertEquals("operation:class", res.at("/query/operands/1/operation").asText());
- assertEquals("operation:sequence", res.at("/query/operands/1/operands/0/operation").asText());
- assertEquals("große", res.at("/query/operands/1/operands/0/operands/0/wrap/key").asText());
- assertEquals("Mann", res.at("/query/operands/1/operands/0/operands/1/wrap/key").asText());
- assertEquals(1, res.at("/query/operands/1/classOut").asInt());
- assertEquals(1, res.at("/meta/alignment").asInt());
+ assertEquals("operation:sequence", res.at("/query/operation").asText());
+ assertEquals("der", res.at("/query/operands/0/operands/0/wrap/key").asText());
+ assertEquals(1, res.at("/query/operands/0/classOut").asInt());
+ assertEquals("große", res.at("/query/operands/1/operands/0/wrap/key").asText());
+ assertEquals("operation:class", res.at("/query/operands/1/operation").asText());
+ assertEquals(2, res.at("/query/operands/1/classOut").asInt());
+ assertEquals("Mann", res.at("/query/operands/2/wrap/key").asText());
+ assertEquals(1, res.at("/meta/alignment/0/0").asInt());
+ assertEquals(2, res.at("/meta/alignment/0/1").asInt());
+
query = "([base=a]^[base=b])|[base=c]";
qs.setQuery(query, "poliqarpplus");
@@ -1265,29 +1271,54 @@
assertEquals("operation:disjunction", res.at("/query/operation").asText());
assertEquals("operation:sequence", res.at("/query/operands/0/operation").asText());
assertEquals("operation:class", res.at("/query/operands/0/operands/1/operation").asText());
- assertEquals("a", res.at("/query/operands/0/operands/0/wrap/key").asText());
+ assertEquals("a", res.at("/query/operands/0/operands/0/operands/0/wrap/key").asText());
assertEquals("b", res.at("/query/operands/0/operands/1/operands/0/wrap/key").asText());
assertEquals("c", res.at("/query/operands/1/wrap/key").asText());
- assertEquals(1, res.at("/query/operands/0/operands/1/classOut").asInt());
- assertEquals(1, res.at("/meta/alignment").asInt());
+ assertEquals(1, res.at("/query/operands/0/operands/0/classOut").asInt());
+ assertEquals(2, res.at("/query/operands/0/operands/1/classOut").asInt());
+ assertEquals(1, res.at("/meta/alignment/0/0").asInt());
+ assertEquals(2, res.at("/meta/alignment/0/1").asInt());
query = "([base=a]^[base=b][base=c])|[base=d]";
qs.setQuery(query, "poliqarpplus");
res = mapper.readTree(qs.toJSON());
- assertEquals("operation:sequence", res.at("/query/operands/0/operands/1/operands/0/operation").asText());
- assertEquals("b", res.at("/query/operands/0/operands/1/operands/0/operands/0/wrap/key").asText());
- assertEquals("c", res.at("/query/operands/0/operands/1/operands/0/operands/1/wrap/key").asText());
+ assertEquals("a", res.at("/query/operands/0/operands/0/operands/0/wrap/key").asText());
+ assertEquals("b", res.at("/query/operands/0/operands/1/operands/0/wrap/key").asText());
+ assertEquals("c", res.at("/query/operands/0/operands/2/wrap/key").asText());
assertEquals("d", res.at("/query/operands/1/wrap/key").asText());
query = "([base=a]^[base=b]^[base=c])|[base=d]";
qs.setQuery(query, "poliqarpplus");
res = mapper.readTree(qs.toJSON());
- assertEquals("operation:sequence", res.at("/query/operands/0/operands/1/operands/0/operation").asText());
- assertEquals("b", res.at("/query/operands/0/operands/1/operands/0/operands/0/wrap/key").asText());
- assertEquals("c", res.at("/query/operands/0/operands/1/operands/0/operands/1/operands/0/wrap/key").asText());
- assertEquals("d", res.at("/query/operands/1/wrap/key").asText());
- assertEquals(1, res.at("/meta/alignment/0").asInt());
- assertEquals(2, res.at("/meta/alignment/1").asInt());
+ assertEquals("a", res.at("/query/operands/0/operands/0/operands/0/wrap/key").asText());
+ assertEquals(1, res.at("/query/operands/0/operands/0/classOut").asInt());
+ assertEquals("b", res.at("/query/operands/0/operands/1/operands/0/wrap/key").asText());
+ assertEquals(2, res.at("/query/operands/0/operands/1/classOut").asInt());
+ assertEquals("c", res.at("/query/operands/0/operands/2/operands/0/wrap/key").asText());
+ assertEquals(3, res.at("/query/operands/0/operands/2/classOut").asInt());
+ assertEquals("d", res.at("/query/operands/1/wrap/key").asText());
+ assertEquals(1, res.at("/meta/alignment/0/0").asInt());
+ assertEquals(2, res.at("/meta/alignment/0/1").asInt());
+ assertEquals(2, res.at("/meta/alignment/1/0").asInt());
+ assertEquals(3, res.at("/meta/alignment/1/1").asInt());
+
+ query = "^ Mann";
+ qs.setQuery(query, "poliqarpplus");
+ res = mapper.readTree(qs.toJSON());
+ assertEquals("Mann", res.at("/query/operands/0/wrap/key").asText());
+ assertEquals("operation:class", res.at("/query/operation").asText());
+ assertEquals(1, res.at("/query/classOut").asInt());
+ assertEquals(-1, res.at("/meta/alignment/0/0").asInt());
+ assertEquals(1, res.at("/meta/alignment/0/1").asInt());
+
+ query = "Mann ^";
+ qs.setQuery(query, "poliqarpplus");
+ res = mapper.readTree(qs.toJSON());
+ assertEquals("Mann", res.at("/query/operands/0/wrap/key").asText());
+ assertEquals("operation:class", res.at("/query/operation").asText());
+ assertEquals(1, res.at("/query/classOut").asInt());
+ assertEquals(1, res.at("/meta/alignment/0/0").asInt());
+ assertEquals(-1, res.at("/meta/alignment/0/1").asInt());
}
@Test