Merge branch 'master' of ssh://korap.ids-mannheim.de:29418/KorAP/Krill
diff --git a/.gitignore b/.gitignore
index 2cccd90..d7c1747 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
# /
/sandbox
/target
+/bin
/temp
/tools
/.settings
@@ -10,12 +11,7 @@
/todo.org
/wiki.org
-# /src/main/java/de/ids_mannheim/korap/query/
-/src/main/java/de/ids_mannheim/korap/query/CopyOfSpanElementQuery.java
-
-# /src/main/java/de/ids_mannheim/korap/query/spans/
-/src/main/java/de/ids_mannheim/korap/query/spans/CopyOfElementSpans.java
-
# /src/main/resources/
/src/main/resources/server.properties
/src/main/resources/korap.conf
+/bin
diff --git a/Changes b/Changes
index b946585..a6f30b2 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.51 2015-03-04
+0.51 2015-03-17
- This is a major version (prepared for the GitHub release)
- [cleanup] Changed groupID to "de.ids_mannheim.korap",
renamed korap-style.xml to Format.xml (diewald)
@@ -9,6 +9,10 @@
- [bugfix] Updated SpanRelationQuery (margaretha)
- [cleanup] Autoformat (diewald)
- [documentation] References added to the Readme (diewald)
+ - [bugfix] Improved failing for missing property file (kupietz)
+ - [bugfix] Fixed tests for server responses to not use
+ Jackson deserialization (diewald)
+ - [cleanup] No more jersey logging in tests (diewald)
0.50.1 2015-03-02
- [feature] Deserialization of arbitrary elements with
diff --git a/Errorcodes b/Errorcodes
index 537da15..a481a51 100644
--- a/Errorcodes
+++ b/Errorcodes
@@ -1,4 +1,4 @@
-* 600 - 699 - Lucene Backend error codes
+* 600 - 699 - Krill server error codes
600: "Unable to read index"
601: "Unable to find index"
602: "Unable to add document to index"
@@ -10,8 +10,9 @@
680: "Server is up and running!"
681: "Document was added successfully", document id
682: "Response time exceeded"
+683: "Staged data committed"
-* 700 - 799 - Coral Deserialization errors
+* 700 - 799 - KoralQuery Deserialization errors
700: "No Query given"
701: "JSON-LD group has no @type attribute"
702: "Boundary definition is invalid"
@@ -29,6 +30,8 @@
714: "Span references expect a start position and a length parameter"
715: "Attribute type is not supported"
716: "Unknown relation"
+717: "Missing relation node"
+718: "Missing relation term"
740: "Key definition is missing in term or span"
741: "Match relation unknown"
742: "Term group needs operand list"
diff --git a/Readme.md b/Readme.md
index ede4a3d..04c72b8 100644
--- a/Readme.md
+++ b/Readme.md
@@ -67,13 +67,6 @@
$ mvn clean test
```
-
-To start the server ...
-
-```
-$ mvn compile exec:java
-```
-
## Caveats
Krill operates on tokens and is limited to a single tokenization stream.
diff --git a/pom.xml b/pom.xml
index 83dcd54..821b2f1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -84,6 +84,11 @@
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.5</version>
</dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jul-to-slf4j</artifactId>
+ <version>1.7.5</version>
+ </dependency>
<!-- SQLite for database connection tests -->
<dependency>
@@ -149,6 +154,11 @@
</dependency>
<!-- JSON support in Jersey -->
+ <dependency>
+ <groupId>com.fasterxml.jackson.jaxrs</groupId>
+ <artifactId>jackson-jaxrs-json-provider</artifactId>
+ <version>2.4.4</version>
+ </dependency>
<!--
<dependency>
<groupId>org.glassfish.jersey.media</groupId>
@@ -165,11 +175,6 @@
<version>2.16</version>
</dependency>
-->
- <dependency>
- <groupId>com.fasterxml.jackson.jaxrs</groupId>
- <artifactId>jackson-jaxrs-json-provider</artifactId>
- <version>2.4.4</version>
- </dependency>
<!-- JSON support using Jackson -->
<!-- see https://github.com/FasterXML/jackson-core -->
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index a425f36..cb604be 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -116,8 +116,8 @@
// Last line of defense against DOS
private int maxTermRelations = 100;
private int autoCommit = 500;
- private String version;
- private String name;
+ private String version = "unknown";
+ private String name = "Krill";
// Temp:
private IndexReader reader;
diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
index b5390e3..c966cc0 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
@@ -7,14 +7,25 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.lucene.util.automaton.RegExp;
-
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
-import de.ids_mannheim.korap.query.SpanWithinQuery;
import de.ids_mannheim.korap.query.QueryBuilder;
-import de.ids_mannheim.korap.query.wrap.*;
+import de.ids_mannheim.korap.query.SpanWithinQuery;
+import de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanAttributeQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanClassQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanFocusQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRelationWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSimpleQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSubspanQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanWithAttributeQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanWithinQueryWrapper;
import de.ids_mannheim.korap.response.Notifications;
import de.ids_mannheim.korap.util.QueryException;
@@ -74,6 +85,10 @@
private static final int MAX_CLASS_NUM = 255; // 127;
+ // Variables used for relation queries
+ private String direction;
+ private byte[] classNumbers;
+
// Private class for koral:boundary objects
private class Boundary {
public int min, max;
@@ -118,6 +133,10 @@
*/
public KrillQuery (String field) {
this.field = field;
+ this.direction = ">:";
+ this.classNumbers = new byte[2];
+ this.classNumbers[0] = (byte) 1;
+ this.classNumbers[1] = (byte) 2;
};
@@ -238,6 +257,9 @@
if (number > MAX_CLASS_NUM)
throw new QueryException(709,
"Valid class numbers exceeded");
+
+ this.classNumbers = null;
+
}
// Reference based on spans
@@ -281,6 +303,12 @@
// Get wrapped token
return this._segFromJson(json.get("wrap"));
+ case "koral:relation":
+ if (!json.has("wrap")) {
+ throw new QueryException(718, "Missing relation term");
+ }
+ return this._termFromJson(json.get("wrap"), direction);
+
case "koral:span":
return this._termFromJson(json);
};
@@ -386,8 +414,14 @@
return this._operationRepetitionFromJson(json, operands);
case "operation:relation":
- throw new QueryException(765,
- "Relations are currently not supported");
+ if (!json.has("relation")) {
+ throw new QueryException(717, "Missing relation node");
+ }
+
+ return _operationRelationFromJson(operands,
+ json.get("relation"));
+ /*throw new QueryException(765,
+ "Relations are currently not supported");*/
case "operation:or": // Deprecated in favor of operation:junction
return this._operationJunctionFromJson(operands);
@@ -402,6 +436,29 @@
};
+ private SpanQueryWrapper _operationRelationFromJson (JsonNode operands,
+ JsonNode relation) throws QueryException {
+
+ if (operands.size() < 2) {
+ throw new QueryException(705,
+ "Number of operands is not acceptable");
+ }
+
+ SpanQueryWrapper operand1 = fromJson(operands.get(0));
+ SpanQueryWrapper operand2 = fromJson(operands.get(1));
+
+ if (operand1.isEmpty()) {
+ direction = "<:";
+ }
+
+ SpanQueryWrapper relationWrapper = fromJson(relation);
+
+ return new SpanRelationWrapper(relationWrapper, operand1, operand2,
+ classNumbers);
+
+ }
+
+
// Deserialize operation:junction
private SpanQueryWrapper _operationJunctionFromJson (JsonNode operands)
throws QueryException {
@@ -862,9 +919,15 @@
};
- // Deserialize koral:term
private SpanQueryWrapper _termFromJson (JsonNode json)
throws QueryException {
+ return _termFromJson(json, null);
+ }
+
+
+ // Deserialize koral:term
+ private SpanQueryWrapper _termFromJson (JsonNode json, String direction)
+ throws QueryException {
if (!json.has("key") || json.get("key").asText().length() < 1) {
if (!json.has("attr"))
@@ -887,6 +950,10 @@
StringBuilder value = new StringBuilder();
+ if (direction != null) {
+ value.append(direction);
+ }
+
// expect orth? expect lemma?
// s:den | i:den | cnx/l:die | mate/m:mood:ind | cnx/syn:@PREMOD |
// mate/m:number:sg | opennlp/p:ART
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
index 2bef713..a4194b1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
@@ -43,7 +43,7 @@
*
* @author margaretha
* */
-public class SpanAttributeQuery extends SpanWithIdQuery {
+public class SpanAttributeQuery extends SimpleSpanQuery {
boolean negation;
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java
index aa9fc7a..8de7faf 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java
@@ -1,60 +1,35 @@
package de.ids_mannheim.korap.query;
import java.io.IOException;
-
-import java.util.Set;
import java.util.Map;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.Query;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import de.ids_mannheim.korap.query.spans.ClassSpans;
-
/**
* Marks spans with a special class payload.
*/
-public class SpanClassQuery extends SpanQuery {
- public String field;
- protected byte number;
- protected SpanQuery operand;
-
-
- public SpanClassQuery (SpanQuery operand, byte number) {
- this.field = operand.getField();
- this.operand = operand;
- this.number = number;
- };
+public class SpanClassQuery extends SimpleSpanQuery {
+ protected byte number = 1;
public SpanClassQuery (SpanQuery operand) {
- this.field = operand.getField();
- this.operand = operand;
- this.number = (byte) 1;
+ super(operand, false);
};
- public byte number () {
- return this.number;
- };
-
-
- @Override
- public String getField () {
- return field;
- }
-
-
- @Override
- public void extractTerms (Set<Term> terms) {
- this.operand.extractTerms(terms);
+ public SpanClassQuery (SpanQuery operand, byte number) {
+ super(operand, false);
+ this.number = number;
};
@@ -63,7 +38,7 @@
StringBuffer buffer = new StringBuffer("{");
short classNr = (short) this.number;
buffer.append(classNr & 0xFF).append(": ");
- buffer.append(this.operand.toString()).append('}');
+ buffer.append(this.firstClause.toString()).append('}');
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
};
@@ -72,7 +47,7 @@
@Override
public Spans getSpans (final AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
- return (Spans) new ClassSpans(this.operand, context, acceptDocs,
+ return (Spans) new ClassSpans(this.firstClause, context, acceptDocs,
termContexts, number);
};
@@ -80,12 +55,12 @@
@Override
public Query rewrite (IndexReader reader) throws IOException {
SpanClassQuery clone = null;
- SpanQuery query = (SpanQuery) this.operand.rewrite(reader);
+ SpanQuery query = (SpanQuery) this.firstClause.rewrite(reader);
- if (query != this.operand) {
+ if (query != this.firstClause) {
if (clone == null)
clone = this.clone();
- clone.operand = query;
+ clone.firstClause = query;
};
if (clone != null)
@@ -98,7 +73,7 @@
@Override
public SpanClassQuery clone () {
SpanClassQuery spanClassQuery = new SpanClassQuery(
- (SpanQuery) this.operand.clone(), this.number);
+ (SpanQuery) this.firstClause.clone(), this.number);
spanClassQuery.setBoost(getBoost());
return spanClassQuery;
};
@@ -114,7 +89,7 @@
final SpanClassQuery spanClassQuery = (SpanClassQuery) o;
- if (!this.operand.equals(spanClassQuery.operand))
+ if (!this.firstClause.equals(spanClassQuery.firstClause))
return false;
if (this.number != spanClassQuery.number)
@@ -128,10 +103,20 @@
@Override
public int hashCode () {
int result = 1;
- result = operand.hashCode();
+ result = firstClause.hashCode();
result += (int) number;
result ^= (result << 15) | (result >>> 18);
result += Float.floatToRawIntBits(getBoost());
return result;
+ }
+
+
+ public byte getNumber () {
+ return number;
+ }
+
+
+ public void setNumber (byte number) {
+ this.number = number;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
index 9aa684a..6c2981b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
@@ -47,7 +47,7 @@
* @author diewald
* @author margaretha
*/
-public class SpanElementQuery extends SpanWithIdQuery {
+public class SpanElementQuery extends SimpleSpanQuery {
private static Term elementTerm;
private String elementStr;
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
index 30cc536..305e42c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
@@ -1,65 +1,81 @@
package de.ids_mannheim.korap.query;
import java.io.IOException;
-
-import java.util.Set;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.Query;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import de.ids_mannheim.korap.query.spans.FocusSpans;
-import de.ids_mannheim.korap.query.SpanClassQuery;
/**
* Modify the span of a match to the boundaries of a certain class.
*
* In case multiple classes are found with the very same number, the
- * span
- * is maximized to start on the first occurrence from the left and end
- * on
- * the last occurrence on the right.
+ * span is
+ * maximized to start on the first occurrence from the left and end on
+ * the last
+ * occurrence on the right.
*
- * In case the class to modify on is not found in the subquery,
- * the match is ignored.
+ * In case the class to modify on is not found in the subquery, the
+ * match is
+ * ignored.
*
* @author diewald
*
* @see FocusSpans
*/
-public class SpanFocusQuery extends SpanClassQuery {
+public class SpanFocusQuery extends SimpleSpanQuery {
+
+ private List<Byte> classNumbers = new ArrayList<Byte>();
+ private boolean isSorted = true;
+
/**
* Construct a new SpanFocusQuery.
*
- * @param operand
+ * @param firstClause
* The nested {@link SpanQuery}, that contains one or
- * more classed spans.
+ * more
+ * classed spans.
* @param number
* The class number to focus on.
*/
- public SpanFocusQuery (SpanQuery operand, byte number) {
- super(operand, number);
+ public SpanFocusQuery (SpanQuery sq, byte classNumber) {
+ super(sq, true);
+ classNumbers.add(classNumber);
+ };
+
+
+ public SpanFocusQuery (SpanQuery sq, List<Byte> classNumbers) {
+ super(sq, true);
+ this.classNumbers = classNumbers;
+ isSorted = false;
};
/**
- * Construct a new SpanFocusQuery.
- * The class to focus on defaults to <tt>1</tt>.
+ * Construct a new SpanFocusQuery. The class to focus on defaults
+ * to
+ * <tt>1</tt>.
*
- * @param operand
+ * @param firstClause
* The nested {@link SpanQuery}, that contains one or
- * more classed spans.
+ * more
+ * classed spans.
*/
- public SpanFocusQuery (SpanQuery operand) {
- this(operand, (byte) 1);
+ public SpanFocusQuery (SpanQuery sq) {
+ super(sq, true);
+ classNumbers.add((byte) 1);
};
@@ -67,9 +83,20 @@
public String toString (String field) {
StringBuffer buffer = new StringBuffer();
buffer.append("focus(");
- short classNr = (short) this.number;
- buffer.append(classNr & 0xFF).append(": ");
- buffer.append(this.operand.toString());
+ if (classNumbers.size() > 1) {
+ buffer.append("[");
+ for (int i = 0; i < classNumbers.size(); i++) {
+ buffer.append((short) classNumbers.get(i) & 0xFF);
+ if (i != classNumbers.size() - 1) {
+ buffer.append(",");
+ }
+ }
+ buffer.append("]");
+ }
+ else {
+ buffer.append((short) classNumbers.get(0) & 0xFF).append(": ");
+ }
+ buffer.append(this.firstClause.toString());
buffer.append(')');
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
@@ -79,20 +106,19 @@
@Override
public Spans getSpans (final AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
- return (Spans) new FocusSpans(this.operand, context, acceptDocs,
- termContexts, number);
+ return new FocusSpans(this, context, acceptDocs, termContexts);
};
@Override
public Query rewrite (IndexReader reader) throws IOException {
SpanFocusQuery clone = null;
- SpanQuery query = (SpanQuery) this.operand.rewrite(reader);
+ SpanQuery query = (SpanQuery) this.firstClause.rewrite(reader);
- if (query != this.operand) {
+ if (query != this.firstClause) {
if (clone == null)
clone = this.clone();
- clone.operand = query;
+ clone.firstClause = query;
};
if (clone != null)
@@ -105,7 +131,7 @@
@Override
public SpanFocusQuery clone () {
SpanFocusQuery spanFocusQuery = new SpanFocusQuery(
- (SpanQuery) this.operand.clone(), this.number);
+ (SpanQuery) this.firstClause.clone(), this.getClassNumbers());
spanFocusQuery.setBoost(getBoost());
return spanFocusQuery;
};
@@ -120,9 +146,9 @@
final SpanFocusQuery spanFocusQuery = (SpanFocusQuery) o;
- if (!this.operand.equals(spanFocusQuery.operand))
+ if (!this.firstClause.equals(spanFocusQuery.firstClause))
return false;
- if (this.number != spanFocusQuery.number)
+ if (this.getClassNumbers() != spanFocusQuery.getClassNumbers())
return false;
// Probably not necessary
@@ -132,9 +158,31 @@
@Override
public int hashCode () {
- int result = operand.hashCode();
- result = 31 * result + number;
+ int result = firstClause.hashCode();
+ for (byte number : classNumbers)
+ result = 31 * result + number;
result += Float.floatToRawIntBits(getBoost());
return result;
- };
+ }
+
+
+ public List<Byte> getClassNumbers () {
+ return classNumbers;
+ }
+
+
+ public void setClassNumbers (List<Byte> classNumbers) {
+ this.classNumbers = classNumbers;
+ }
+
+
+ public boolean isSorted () {
+ return isSorted;
+ }
+
+
+ public void setSorted (boolean isSorted) {
+ this.isSorted = isSorted;
+ }
+
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
index f19a1dc..8df29b3 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
@@ -7,6 +7,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
@@ -51,10 +52,9 @@
*
* @author margaretha
* */
-public class SpanRelationQuery extends SpanWithIdQuery {
+public class SpanRelationQuery extends SimpleSpanQuery {
- private String type;
-
+ private int direction = 0;
/**
* Constructs a SpanRelationQuery based on the given span query.
@@ -69,6 +69,11 @@
*/
public SpanRelationQuery (SpanQuery firstClause, boolean collectPayloads) {
super(firstClause, collectPayloads);
+ SpanTermQuery st = (SpanTermQuery) firstClause;
+ String direction = st.getTerm().text().substring(0, 1);
+ if (direction.equals("<")) {
+ this.direction = 1;
+ }
}
@@ -97,4 +102,12 @@
return sb.toString();
}
+ public int getDirection() {
+ return direction;
+ }
+
+ public void setDirection(int direction) {
+ this.direction = direction;
+ }
+
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
index 512adc0..bb69d4c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
@@ -67,7 +67,7 @@
public SpanSegmentQuery (SpanRelationQuery firstClause,
- SpanWithIdQuery secondClause,
+ SimpleSpanQuery secondClause,
boolean collectPayloads) {
super(firstClause, secondClause, true);
isRelation = true;
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
index 84e30b2..87de370 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
@@ -10,7 +10,6 @@
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
-import de.ids_mannheim.korap.query.spans.SpansWithId;
import de.ids_mannheim.korap.query.spans.TermSpansWithId;
/**
@@ -32,7 +31,7 @@
*
* @author margaretha
* */
-public class SpanTermWithIdQuery extends SpanWithIdQuery {
+public class SpanTermWithIdQuery extends SimpleSpanQuery {
/**
* Constructs a SpanTermWithIdQuery for the given term.
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanWithAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanWithAttributeQuery.java
index 12d6876..2ea519a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanWithAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanWithAttributeQuery.java
@@ -12,10 +12,8 @@
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
-import de.ids_mannheim.korap.query.spans.ElementSpans;
-import de.ids_mannheim.korap.query.spans.RelationSpans;
+import de.ids_mannheim.korap.query.spans.SimpleSpans;
import de.ids_mannheim.korap.query.spans.SpansWithAttribute;
-import de.ids_mannheim.korap.query.spans.TermSpansWithId;
/**
* Enumeration of spans (e.g. element or relation spans) having some
@@ -40,7 +38,7 @@
*
* @author margaretha
*/
-public class SpanWithAttributeQuery extends SpanWithIdQuery {
+public class SpanWithAttributeQuery extends SimpleSpanQuery {
public boolean isMultipleAttributes;
private String type;
@@ -91,7 +89,7 @@
* <code>true</code> if payloads are to be collected,
* otherwise <code>false</code>.
*/
- public SpanWithAttributeQuery (SpanWithIdQuery firstClause,
+ public SpanWithAttributeQuery (SimpleSpanQuery firstClause,
SpanAttributeQuery secondClause,
boolean collectPayloads) {
super(firstClause, secondClause, collectPayloads);
@@ -109,7 +107,7 @@
* <code>true</code> if payloads are to be collected,
* otherwise <code>false</code>.
*/
- public SpanWithAttributeQuery (SpanWithIdQuery firstClause,
+ public SpanWithAttributeQuery (SimpleSpanQuery firstClause,
List<SpanQuery> secondClauses,
boolean collectPayloads) {
super(firstClause, secondClauses, collectPayloads);
@@ -137,7 +135,7 @@
if (SpanElementQuery.class.isInstance(firstClause)) {
type = "spanElementWithAttribute";
}
- else if (SpanRelationQuery.class.isInstance(firstClause)) {
+ else if (SpanFocusQuery.class.isInstance(firstClause)) {
type = "spanRelationWithAttribute";
}
else if (SpanTermWithIdQuery.class.isInstance(firstClause)) {
@@ -147,16 +145,16 @@
@Override
- public SimpleSpanQuery clone () {
+ public SpanWithAttributeQuery clone () {
if (secondClause != null) {
if (isMultipleAttributes) {
return new SpanWithAttributeQuery(
- (SpanWithIdQuery) firstClause.clone(),
+ (SimpleSpanQuery) firstClause.clone(),
cloneClauseList(), collectPayloads);
}
else {
return new SpanWithAttributeQuery(
- (SpanWithIdQuery) firstClause.clone(),
+ (SimpleSpanQuery) firstClause.clone(),
(SpanAttributeQuery) secondClause.clone(),
collectPayloads);
}
@@ -195,25 +193,10 @@
termContexts);
}
- Spans spans = this.getFirstClause().getSpans(context, acceptDocs,
+ SimpleSpans spans = (SimpleSpans) this.getFirstClause().getSpans(
+ context, acceptDocs, termContexts);
+ return new SpansWithAttribute(this, spans, context, acceptDocs,
termContexts);
-
- if (type.equals("spanElementWithAttribute")) {
- return new SpansWithAttribute(this, (ElementSpans) spans, context,
- acceptDocs, termContexts);
- }
- else if (type.equals("spanRelationWithAttribute")) {
- return new SpansWithAttribute(this, (RelationSpans) spans, context,
- acceptDocs, termContexts);
- }
- else if (type.equals("spanTermWithAttribute")) {
- return new SpansWithAttribute(this, (TermSpansWithId) spans,
- context, acceptDocs, termContexts);
- }
- else {
- throw new IllegalArgumentException("Span query type: " + type
- + "is unknown.");
- }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanWithIdQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanWithIdQuery.java
deleted file mode 100644
index 3545808..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/SpanWithIdQuery.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package de.ids_mannheim.korap.query;
-
-import java.util.List;
-
-import org.apache.lucene.search.spans.SpanQuery;
-
-/**
- * Base query for span queries whose resulting spans requires an id,
- * for
- * instance {@link SpanElementQuery} and {@link SpanRelationQuery}.
- *
- * @author margaretha
- *
- */
-public abstract class SpanWithIdQuery extends SimpleSpanQuery {
-
- /**
- * Constructs SpanWithIdQuery based on the given {@link SpanQuery}
- * and the
- * collectPayloads flag, for example, {@link SpanElementQuery}.
- *
- * @param firstClause
- * a SpanQuery
- * @param collectPayloads
- * a boolean flag representing the value
- * <code>true</code> if payloads are to be collected,
- * otherwise
- * <code>false</code>.
- */
- public SpanWithIdQuery (SpanQuery firstClause, boolean collectPayloads) {
- super(firstClause, collectPayloads);
- }
-
-
- /**
- * Constructs SpanWithIdQuery based on two span queries and the
- * collectPayloads flag, for instance, query a relation having a
- * specific
- * attribute.
- *
- * @param firstClause
- * a SpanQuery
- * @param secondClause
- * a SpanQuery
- * @param collectPayloads
- * a boolean flag representing the value
- * <code>true</code> if payloads are to be collected,
- * otherwise
- * <code>false</code>.
- */
- public SpanWithIdQuery (SpanQuery firstClause, SpanQuery secondClause,
- boolean collectPayloads) {
- super(firstClause, secondClause, collectPayloads);
- }
-
-
- /**
- * Constructs SpanWithIdQuery based on a span query and a list of
- * span
- * queries, for instance, query an element having two specific
- * attributes.
- *
- * @param firstClause
- * a SpanQuery
- * @param secondClauses
- * a list of SpanQuery
- * @param collectPayloads
- * a boolean flag representing the value
- * <code>true</code> if payloads are to be collected,
- * otherwise
- * <code>false</code>.
- */
- public SpanWithIdQuery (SpanQuery firstClause,
- List<SpanQuery> secondClauses,
- boolean collectPayloads) {
- super(firstClause, secondClauses, collectPayloads);
- }
-
-
- public SpanWithIdQuery (List<SpanQuery> clauses, boolean collectPayloads) {
- super(clauses, collectPayloads);
- }
-}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index 2816c96..cf70cb9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -44,7 +44,7 @@
*
* @author margaretha
* */
-public class AttributeSpans extends SpansWithId {
+public class AttributeSpans extends SimpleSpans {
private List<CandidateAttributeSpan> candidateList;
private int currentDoc, currentPosition;
@@ -69,6 +69,8 @@
Map<Term, TermContext> termContexts)
throws IOException {
super(spanAttributeQuery, context, acceptDocs, termContexts);
+ this.hasSpanId = true;
+
candidateList = new ArrayList<>();
hasMoreSpans = firstSpans.next();
if (hasMoreSpans) {
@@ -163,6 +165,7 @@
return new CandidateAttributeSpan(firstSpans, spanId, end);
}
else if (payload.get(0).length == 10) {
+ start = wrapper.getInt(0);
end = wrapper.getInt(4);
spanId = wrapper.getShort(8);
return new CandidateAttributeSpan(firstSpans, spanId, start, end);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
index c78eee4..8198195 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
@@ -19,7 +19,7 @@
public class CandidateSpan implements Comparable<CandidateSpan>, Cloneable {
protected int doc, start, end;
private long cost;
- private Collection<byte[]> payloads = new ArrayList<>();
+ private Collection<byte[]> payloads;
private int position;
private CandidateSpan childSpan; // used for example for multiple distance
// with unordered constraint
@@ -41,8 +41,18 @@
this.start = span.start();
this.end = span.end();
this.cost = span.cost();
- if (span.isPayloadAvailable())
+
+ this.payloads = new ArrayList<>();
+ if (span.isPayloadAvailable()) {
setPayloads(span.getPayload());
+ }
+ if (span instanceof SimpleSpans) {
+ SimpleSpans temp = (SimpleSpans) span;
+ this.spanId = temp.getSpanId();
+ }
+ else if (span instanceof ClassSpans) {
+ this.spanId = ((ClassSpans) span).getNumber();
+ }
}
@@ -180,6 +190,7 @@
*/
public void setPayloads (Collection<byte[]> payloads) {
+ this.payloads = new ArrayList<>();
for (byte[] b : payloads) {
if (b == null)
this.payloads.add(null);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index 35fe53f..4e97dbc 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
@@ -10,6 +10,7 @@
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
@@ -84,6 +85,16 @@
};
+ public byte getNumber () {
+ return number;
+ }
+
+
+ public void setNumber (byte number) {
+ this.number = number;
+ }
+
+
@Override
public int doc () {
return spans.doc();
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 21212c7..d2ceffe 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -11,10 +11,9 @@
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
-
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -28,7 +27,7 @@
* @author margaretha
* @author diewald
*/
-public class ElementSpans extends SpansWithId {
+public class ElementSpans extends SimpleSpans {
private TermSpans termSpans;
private boolean lazyLoaded = false;
@@ -58,6 +57,7 @@
super(spanElementQuery, context, acceptDocs, termContexts);
termSpans = (TermSpans) this.firstSpans;
hasMoreSpans = true;
+ // hasSpanId = true;
};
@@ -120,7 +120,13 @@
this.matchEndPosition = bb.getInt(8);
// Copy element id
- this.setSpanId(this.hasSpanId ? bb.getShort(12) : (short) -1);
+ if (length >= 14) {
+ this.setSpanId(bb.getShort(12));
+ this.hasSpanId = true;
+ }
+ else {
+ this.setSpanId((short) -1);
+ }
// Copy the start and end character offsets
byte[] b = new byte[8];
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
index eabb8c0..2da1558 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
@@ -1,66 +1,69 @@
package de.ids_mannheim.korap.query.spans;
-import static de.ids_mannheim.korap.util.KrillByte.*;
+import static de.ids_mannheim.korap.util.KrillByte.byte2int;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.search.spans.SpanQuery;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.Bits;
-
-import java.io.IOException;
-
-import java.util.Map;
-import java.util.ArrayList;
-import java.util.*;
-
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import de.ids_mannheim.korap.query.SpanFocusQuery;
+
/**
- * Spans, that can focus on the span boundaries of classed subqueries.
+ * originalSpans, that can focus on the span boundaries of classed
+ * subqueries.
* The boundaries of the classed subquery may exceed the boundaries of
* the
* nested query.
*
* In case multiple classes are found with the very same number, the
- * span
- * is maximized to start on the first occurrence from the left and end
- * on
- * the last occurrence on the right.
+ * span is
+ * maximized to start on the first occurrence from the left and end on
+ * the last
+ * occurrence on the right.
*
- * In case the class to focus on is not found in the payloads,
- * the match is ignored.
+ * In case the class to focus on is not found in the payloads, the
+ * match is
+ * ignored.
*
* <strong>Warning</strong>: Payloads other than class payloads won't
- * bubble up currently. That behaviour may change in the future
+ * bubble up
+ * currently. That behaviour may change in the futures
*
* @author diewald
*/
-public class FocusSpans extends Spans {
- private List<byte[]> wrappedPayload;
- private Collection<byte[]> payload;
- private final Spans spans;
- private byte number;
-
- private SpanQuery wrapQuery;
+public class FocusSpans extends SimpleSpans {
+ private List<Byte> classNumbers;
+ private SpanQuery query;
private final Logger log = LoggerFactory.getLogger(FocusSpans.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
- private int start = -1, end;
- private int tempStart = 0, tempEnd = 0;
+ // private SimpleSpans originalSpans;
+ private boolean isSorted;
+ private List<CandidateSpan> candidateSpans;
+ private int windowSize = 10;
+ private int currentDoc;
+ private byte number;
/**
* Construct a FocusSpan for the given {@link SpanQuery}.
*
- * @param wrapQuery
+ * @param query
* A {@link SpanQuery}.
* @param context
* The {@link AtomicReaderContext}.
@@ -73,139 +76,141 @@
* The class number to focus on.
* @throws IOException
*/
- public FocusSpans (SpanQuery wrapQuery, AtomicReaderContext context,
- Bits acceptDocs, Map<Term, TermContext> termContexts,
- byte number) throws IOException {
- this.spans = wrapQuery.getSpans(context, acceptDocs, termContexts);
- this.number = number;
- this.wrapQuery = wrapQuery;
- this.wrappedPayload = new ArrayList<byte[]>(6);
- };
+ public FocusSpans (SpanFocusQuery query, AtomicReaderContext context,
+ Bits acceptDocs, Map<Term, TermContext> termContexts)
+ throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ if (query.getClassNumbers() == null) {
+ throw new IllegalArgumentException(
+ "At least one class number must be specified.");
+ }
+ classNumbers = query.getClassNumbers();
+ isSorted = query.isSorted();
+ candidateSpans = new ArrayList<CandidateSpan>();
+ hasMoreSpans = firstSpans.next();
+ currentDoc = firstSpans.doc();
-
- @Override
- public Collection<byte[]> getPayload () throws IOException {
- return wrappedPayload;
- };
-
-
- @Override
- public boolean isPayloadAvailable () {
- return wrappedPayload.isEmpty() == false;
- };
-
-
- @Override
- public int doc () {
- return spans.doc();
- };
-
-
- @Override
- public int start () {
- return start;
- };
-
-
- @Override
- public int end () {
- return end;
- };
+ // matchPayload = new ArrayList<byte[]>(6);
+ this.query = query;
+ hasSpanId = true;
+ }
@Override
public boolean next () throws IOException {
- if (DEBUG)
- log.trace("Forward next match in {}", this.doc());
+ matchPayload.clear();
+ CandidateSpan cs;
+ while (hasMoreSpans || candidateSpans.size() > 0) {
+ if (isSorted) {
- // Next span
- while (spans.next()) {
- if (DEBUG)
- log.trace("Forward next inner span");
+ if (firstSpans.isPayloadAvailable()
+ && updateSpanPositions(cs = new CandidateSpan(
+ firstSpans))) {
+ setMatch(cs);
+ hasMoreSpans = firstSpans.next();
+ return true;
+ }
+ hasMoreSpans = firstSpans.next();
+ }
+ else if (candidateSpans.isEmpty()) {
+ currentDoc = firstSpans.doc();
+ collectCandidates();
+ Collections.sort(candidateSpans);
+ }
+ else {
+ setMatch(candidateSpans.get(0));
+ candidateSpans.remove(0);
+ return true;
+ }
+ }
- // No classes stored
- wrappedPayload.clear();
-
- start = -1;
- if (spans.isPayloadAvailable()) {
- end = 0;
-
- // Iterate over all payloads and find the maximum span per class
- for (byte[] payload : spans.getPayload()) {
-
- // No class payload - ignore
- // this may be problematic for other calculated payloads!
- if (payload.length != 9) {
- if (DEBUG)
- log.trace("Ignore old payload {}", payload);
- continue;
- };
-
- // Found class payload of structure <i>start<i>end<b>class
- // and classes are matches!
- if (payload[8] == this.number) {
- tempStart = byte2int(payload, 0);
- tempEnd = byte2int(payload, 4);
-
- if (DEBUG) {
- log.trace("Found matching class {}-{}", tempStart,
- tempEnd);
- };
-
- // Set start position
- if (start == -1 || tempStart < start)
- start = tempStart;
-
- // Set end position
- if (tempEnd > end)
- end = tempEnd;
- };
-
- // Definately keep class information
- // Even if it is already used for shrinking
- wrappedPayload.add(payload);
- };
- };
-
- // Class not found
- if (start == -1)
- continue;
-
- if (DEBUG) {
- log.trace("Start to focus on class {} from {} to {}", number,
- start, end);
- };
- return true;
- };
-
- // No more spans
- this.wrappedPayload.clear();
return false;
- };
+ }
+
+
+ private void collectCandidates () throws IOException {
+ CandidateSpan cs = null;
+ while (hasMoreSpans && candidateSpans.size() < windowSize
+ && firstSpans.doc() == currentDoc) {
+
+ if (firstSpans.isPayloadAvailable()
+ && updateSpanPositions(cs = new CandidateSpan(firstSpans))) {
+ candidateSpans.add(cs);
+ }
+ hasMoreSpans = firstSpans.next();
+ }
+ }
+
+
+ private void setMatch (CandidateSpan cs) {
+ matchStartPosition = cs.getStart();
+ matchEndPosition = cs.getEnd();
+ matchDocNumber = cs.getDoc();
+ matchPayload.addAll(cs.getPayloads());
+ setSpanId(cs.getSpanId());
+ }
+
+
+ private boolean updateSpanPositions (CandidateSpan candidateSpan)
+ throws IOException {
+ int minPos = 0, maxPos = 0;
+ int classStart, classEnd;
+ boolean isStart = true;
+ boolean isClassFound = false;
+
+ candidateSpan.getPayloads().clear();
+
+ // Iterate over all payloads and find the maximum span per class
+ for (byte[] payload : firstSpans.getPayload()) {
+ // No class payload - ignore
+ // this may be problematic for other calculated payloads!
+ if (payload.length == 9) {
+ if (classNumbers.contains(payload[8])) {
+ isClassFound = true;
+ classStart = byte2int(payload, 0);
+ classEnd = byte2int(payload, 4);
+
+ if (isStart || classStart < minPos) {
+ minPos = classStart;
+ isStart = false;
+ }
+ if (classEnd > maxPos) {
+ maxPos = classEnd;
+ }
+ }
+ candidateSpan.getPayloads().add(payload.clone());
+ }
+
+ }
+
+ if (isClassFound) {
+ candidateSpan.start = minPos;
+ candidateSpan.end = maxPos;
+ }
+
+ return isClassFound;
+ }
// Todo: Check for this on document boundaries!
@Override
public boolean skipTo (int target) throws IOException {
- if (DEBUG)
- log.trace("Skip MatchSpans {} -> {}", this.doc(), target);
-
- if (this.doc() < target && spans.skipTo(target)) {
-
- };
+ if (this.doc() < target && firstSpans.skipTo(target)) {
+ return next();
+ }
return false;
};
@Override
public String toString () {
- return getClass().getName() + "(" + this.wrapQuery.toString() + ")@"
+ return getClass().getName() + "(" + this.query.toString() + ")@"
+ (doc() + ":" + start() + "-" + end());
};
@Override
public long cost () {
- return spans.cost();
+ return firstSpans.cost();
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
index 8f97f35..d8299ef 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
@@ -8,9 +8,9 @@
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
+import de.ids_mannheim.korap.query.SimpleSpanQuery;
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanRelationQuery;
-import de.ids_mannheim.korap.query.SpanWithIdQuery;
/**
* RelationBaseSpans is a base class for relation spans containing
@@ -24,14 +24,16 @@
* @author margaretha
*
*/
-public abstract class RelationBaseSpans extends SpansWithId {
+public abstract class RelationBaseSpans extends SimpleSpans {
protected short leftId, rightId;
protected int leftStart, leftEnd;
protected int rightStart, rightEnd;
- public RelationBaseSpans () {};
+ public RelationBaseSpans () {
+ this.hasSpanId = true;
+ };
/**
@@ -47,11 +49,12 @@
* @param termContexts
* @throws IOException
*/
- public RelationBaseSpans (SpanWithIdQuery spanWithIdQuery,
+ public RelationBaseSpans (SimpleSpanQuery spanWithIdQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts)
throws IOException {
super(spanWithIdQuery, context, acceptDocs, termContexts);
+ this.hasSpanId = true;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
index 38eb5fe..f17043d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
@@ -20,14 +20,11 @@
/**
* Enumeration of spans denoting relations between two
- * tokens/elements. The
- * start and end of a RelationSpan always denote the start and end of
- * the
- * left-side token/element.
+ * tokens/elements. The start and end of a RelationSpan always denote
+ * the start and end of the left-side token/element.
*
* There are 4 types of relations, which is differentiated by the
- * payload length
- * in bytes.
+ * payload length in bytes.
* <ol>
* <li>Token to token relation (1 int & 3 short, length: 10)</li>
* <li>Token to span (2 int & 3 short, length: 14)</li>
@@ -35,29 +32,28 @@
* <li>Span to Span (3 int & 3 short, length: 18)</li>
* </ol>
* Every integer value denotes the start/end position of the
- * start/target of a
- * relation, in this format: (sourceEndPos?, startTargetPos,
- * endTargetPos?). The
- * end position of a token is identical to its start position, and
- * therefore not
- * is saved in a payload.
+ * start/target of a relation, in this format: (sourceEndPos?,
+ * startTargetPos, endTargetPos?). The end position of a token is
+ * identical to its start position, and therefore not is saved in a
+ * payload.
*
* The short values denote the relation id, left id, and right id. The
- * byte in
- * relation #3 is just a dummy to create a different length from the
- * relation
- * #2.
+ * byte in relation #3 is just a dummy to create a different length
+ * from the relation #2.
*
* NOTE: Sorting of the candidate spans can alternatively be done in
- * indexing,
- * instead of here. (first by left positions and then by right
- * positions)
+ * indexing, instead of here. (first by left positions and then by
+ * right positions)
+ *
+ * The class number of relation source is always 1 and that of
+ * relation target is always 2 regardless of the relation direction.
*
* @author margaretha
* */
public class RelationSpans extends RelationBaseSpans {
private int currentDoc, currentPosition;
+ private int direction;
private TermSpans relationTermSpan;
protected Logger logger = LoggerFactory.getLogger(RelationSpans.class);
@@ -80,6 +76,7 @@
Map<Term, TermContext> termContexts)
throws IOException {
super(relationSpanQuery, context, acceptDocs, termContexts);
+ direction = relationSpanQuery.getDirection();
candidateList = new ArrayList<>();
relationTermSpan = (TermSpans) firstSpans;
hasMoreSpans = relationTermSpan.next();
@@ -169,28 +166,34 @@
ByteBuffer bb = ByteBuffer.allocate(length);
bb.put(payload.get(0));
+ cs.setLeftStart(cs.start);
+
int i;
switch (length) {
- case 10: // Token to token
+ case 10: // Token to token
i = bb.getInt(0);
- cs.setRightStart(i - 1);
- cs.setRightEnd(i);
+ cs.setLeftEnd(cs.start + 1);
+ cs.setRightStart(i);
+ cs.setRightEnd(i + 1);
break;
case 14: // Token to span
+ cs.setLeftEnd(cs.start + 1);
cs.setRightStart(bb.getInt(0));
cs.setRightEnd(bb.getInt(4));
break;
case 15: // Span to token
cs.setEnd(bb.getInt(0));
+ cs.setLeftEnd(cs.end);
i = bb.getInt(5);
- cs.setRightStart(i - 1);
- cs.setRightEnd(i);
+ cs.setRightStart(i);
+ cs.setRightEnd(i + 1);
break;
case 18: // Span to span
cs.setEnd(bb.getInt(0));
+ cs.setLeftEnd(cs.end);
cs.setRightStart(bb.getInt(4));
cs.setRightEnd(bb.getInt(8));
break;
@@ -199,7 +202,7 @@
cs.setRightId(bb.getShort(length - 2)); //right id
cs.setLeftId(bb.getShort(length - 4)); //left id
cs.setSpanId(bb.getShort(length - 6)); //relation id
- // Payload is cleared.
+ // Payload is cleared.
}
@@ -208,10 +211,18 @@
if (relationTermSpan.isPayloadAvailable()) {
payload.addAll(relationTermSpan.getPayload());
}
- payload.add(createClassPayload(cs.getLeftStart(), cs.getLeftEnd(),
- (byte) 1));
- payload.add(createClassPayload(cs.getRightStart(), cs.getRightEnd(),
- (byte) 2));
+ if (direction == 0) {
+ payload.add(createClassPayload(cs.getLeftStart(), cs.getLeftEnd(),
+ (byte) 1));
+ payload.add(createClassPayload(cs.getRightStart(),
+ cs.getRightEnd(), (byte) 2));
+ }
+ else {
+ payload.add(createClassPayload(cs.getRightStart(),
+ cs.getRightEnd(), (byte) 1));
+ payload.add(createClassPayload(cs.getLeftStart(), cs.getLeftEnd(),
+ (byte) 2));
+ }
cs.setPayloads(payload);
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
index 79ef116..8ac53b8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
+import java.util.Collection;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
@@ -17,7 +18,7 @@
*
* @author margaretha
* */
-public class SegmentSpans extends NonPartialOverlappingSpans {
+public class SegmentSpans extends SimpleSpans {
private boolean isRelation;
@@ -38,11 +39,49 @@
throws IOException {
super(spanSegmentQuery, context, acceptDocs, termContexts);
if (spanSegmentQuery.isRelation()) {
- SpansWithId s2 = (SpansWithId) secondSpans;
- // hacking for element query
- s2.hasSpanId = true;
isRelation = true;
}
+
+ collectPayloads = true;
+ hasMoreSpans = secondSpans.next();
+ }
+
+
+ @Override
+ public boolean next () throws IOException {
+ // Warning: this does not work for overlapping spans
+ // e.g. get multiple second spans in a firstspan
+ hasMoreSpans &= firstSpans.next();
+ isStartEnumeration = false;
+ matchPayload.clear();
+ return advance();
+ }
+
+
+ /**
+ * Advances to the next match.
+ *
+ * @return <code>true</code> if a match is found,
+ * <code>false</code> otherwise.
+ * @throws IOException
+ */
+ protected boolean advance () throws IOException {
+ // The complexity is linear for searching in a document.
+ // It's better if we can skip to >= position in a document.
+ while (hasMoreSpans && ensureSameDoc(firstSpans, secondSpans)) {
+ int matchCase = findMatch();
+ if (matchCase == 0) {
+ doCollectPayloads();
+ return true;
+ }
+ else if (matchCase == 1) {
+ hasMoreSpans = secondSpans.next();
+ }
+ else {
+ hasMoreSpans = firstSpans.next();
+ }
+ }
+ return false;
}
@@ -52,21 +91,21 @@
* secondspan are identical.
*
* */
- @Override
protected int findMatch () {
RelationSpans s1;
- SpansWithId s2;
+ SimpleSpans s2;
if (firstSpans.start() == secondSpans.start()
&& firstSpans.end() == secondSpans.end()) {
if (isRelation) {
s1 = (RelationSpans) firstSpans;
- s2 = (SpansWithId) secondSpans;
+ s2 = (SimpleSpans) secondSpans;
//System.out.println("segment: " + s1.getRightStart() + " "
// + s1.getRightEnd());
if (s1.getLeftId() == s2.getSpanId()) {
setMatch();
+ setSpanId(s2.getSpanId());
return 0;
}
}
@@ -89,4 +128,44 @@
matchStartPosition = firstSpans.start();
matchEndPosition = firstSpans.end();
}
+
+
+ /**
+ * Collects available payloads from the current first and second
+ * spans.
+ *
+ * @throws IOException
+ */
+ private void doCollectPayloads () throws IOException {
+ Collection<byte[]> payload;
+ if (collectPayloads) {
+ if (firstSpans.isPayloadAvailable()) {
+ payload = firstSpans.getPayload();
+ matchPayload.addAll(payload);
+ }
+ if (secondSpans.isPayloadAvailable()) {
+ payload = secondSpans.getPayload();
+ matchPayload.addAll(payload);
+ }
+ }
+ }
+
+
+ @Override
+ public boolean skipTo (int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)) {
+ if (!firstSpans.skipTo(target)) {
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ matchPayload.clear();
+ return advance();
+ }
+
+
+ @Override
+ public long cost () {
+ return firstSpans.cost() + secondSpans.cost();
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index f1ec996..d82b8fa 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -33,6 +33,9 @@
protected int matchDocNumber, matchStartPosition, matchEndPosition;
protected Collection<byte[]> matchPayload;
+ protected short spanId;
+ protected boolean hasSpanId = false;
+
public SimpleSpans () {
collectPayloads = true;
@@ -151,4 +154,26 @@
+ start() + "-" + end()) : "END"));
}
+
+
+ /**
+ * Returns the span id of the current span
+ *
+ * @return the span id of the current span
+ */
+ public short getSpanId () {
+ return spanId;
+ }
+
+
+ /**
+ * Sets the span id of the current span
+ *
+ * @param spanId
+ * span id
+ */
+ public void setSpanId (short spanId) {
+ this.spanId = spanId;
+ }
+
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java
index d7fa03f..8f0e0b9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java
@@ -32,9 +32,9 @@
*
* @author margaretha
* */
-public class SpansWithAttribute extends SpansWithId {
+public class SpansWithAttribute extends SimpleSpans {
- private SpansWithId referentSpans;
+ private SimpleSpans referentSpans;
private List<AttributeSpans> attributeList;
private List<AttributeSpans> notAttributeList;
@@ -56,13 +56,16 @@
* @throws IOException
*/
public SpansWithAttribute (SpanWithAttributeQuery spanWithAttributeQuery,
- SpansWithId spansWithId,
+ SimpleSpans referentSpans,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts)
throws IOException {
super(spanWithAttributeQuery, context, acceptDocs, termContexts);
- referentSpans = spansWithId;
- referentSpans.hasSpanId = true; // dummy setting enabling reading elementRef
+ // if (!referentSpans.hasSpanId) {
+ // throw new
+ // IllegalArgumentException("Referent spans must have ids.");
+ // }
+ this.referentSpans = referentSpans;
hasMoreSpans = referentSpans.next();
setAttributeList(spanWithAttributeQuery, context, acceptDocs,
termContexts);
@@ -168,7 +171,7 @@
private boolean advanceAttribute () throws IOException {
while (hasMoreSpans) {
- SpansWithId referentSpans = attributeList.get(0);
+ SimpleSpans referentSpans = attributeList.get(0);
advanceNotAttributes(referentSpans);
if (checkNotReferentId(referentSpans)) {
this.matchDocNumber = referentSpans.doc();
@@ -199,8 +202,9 @@
private boolean advance () throws IOException {
while (hasMoreSpans && searchSpanPosition()) {
- // System.out.println("element: " + referentSpans.start() + ","
- // + referentSpans.end() + " ref:"+ referentSpans.getSpanId());
+ // System.out.println(referentSpans.start() + ","
+ // + referentSpans.end() + " " +
+ // referentSpans.getSpanId());
if (checkReferentId() && checkNotReferentId(referentSpans)) {
this.matchDocNumber = referentSpans.doc();
@@ -287,7 +291,7 @@
* document and
* start position.
* */
- private boolean ensureSamePosition (SpansWithId spans,
+ private boolean ensureSamePosition (SimpleSpans spans,
AttributeSpans attributes) throws IOException {
while (hasMoreSpans && ensureSameDoc(spans, attributes)) {
@@ -373,7 +377,7 @@
* <code>false</code> otherwise.
* @throws IOException
*/
- private boolean checkNotReferentId (SpansWithId referentSpans)
+ private boolean checkNotReferentId (SimpleSpans referentSpans)
throws IOException {
for (AttributeSpans notAttribute : notAttributeList) {
if (!notAttribute.isFinish()
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
deleted file mode 100644
index 9b54565..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package de.ids_mannheim.korap.query.spans;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.util.Bits;
-
-import de.ids_mannheim.korap.query.SpanWithIdQuery;
-
-/**
- * Base class for enumeration of span requiring an id, such as
- * elements and
- * relations.
- *
- * @author margaretha
- * */
-public abstract class SpansWithId extends SimpleSpans {
-
- protected short spanId;
- protected boolean hasSpanId = false; // A dummy flag
-
-
- /**
- * Constructs SpansWithId for the given {@link SpanWithIdQuery}.
- *
- * @param spanWithIdQuery
- * a SpanWithIdQuery
- * @param context
- * @param acceptDocs
- * @param termContexts
- * @throws IOException
- */
- public SpansWithId (SpanWithIdQuery spanWithIdQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(spanWithIdQuery, context, acceptDocs, termContexts);
- }
-
-
- public SpansWithId () {}
-
-
- /**
- * Returns the span id of the current span
- *
- * @return the span id of the current span
- */
- public short getSpanId () {
- return spanId;
- }
-
-
- /**
- * Sets the span id of the current span
- *
- * @param spanId
- * span id
- */
- public void setSpanId (short spanId) {
- this.spanId = spanId;
- }
-}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java b/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
index cf12aa0..431b839 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
@@ -24,7 +24,7 @@
*
* @author margaretha
* */
-public class TermSpansWithId extends SpansWithId {
+public class TermSpansWithId extends SimpleSpans {
private TermSpans termSpans;
@@ -46,6 +46,7 @@
super(spanTermWithIdQuery, context, acceptDocs, termContexts);
termSpans = (TermSpans) firstSpans;
hasMoreSpans = termSpans.next();
+ hasSpanId = true;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java
index 90e9a2b..968d544 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java
@@ -15,10 +15,12 @@
public SpanAttributeQueryWrapper (SpanQueryWrapper sqw) {
- if (sqw == null) {
- isNull = true;
- return;
+ if (sqw != null) {
+ isNull = false;
}
+ else
+ return;
+
if (sqw.isEmpty()) {
isEmpty = true;
return;
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRelationWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRelationWrapper.java
new file mode 100644
index 0000000..17afa22
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRelationWrapper.java
@@ -0,0 +1,103 @@
+package de.ids_mannheim.korap.query.wrap;
+
+import java.util.ArrayList;
+
+import org.apache.lucene.search.spans.SpanQuery;
+
+import de.ids_mannheim.korap.query.SpanFocusQuery;
+import de.ids_mannheim.korap.query.SpanSegmentQuery;
+import de.ids_mannheim.korap.util.QueryException;
+
+public class SpanRelationWrapper extends SpanQueryWrapper {
+
+ private SpanQueryWrapper relationQuery;
+ private SpanQueryWrapper subQuery1;
+ private SpanQueryWrapper subQuery2;
+ private byte[] classNumbers;
+
+
+ public SpanRelationWrapper (SpanQueryWrapper relationWrapper,
+ SpanQueryWrapper operand1,
+ SpanQueryWrapper operand2, byte[] classNumbers) {
+
+ this.relationQuery = relationWrapper;
+ if (relationQuery != null) {
+ this.isNull = false;
+ }
+ else
+ return;
+
+ if (relationQuery.isEmpty) {
+ this.isEmpty = true;
+ return;
+ }
+
+ this.subQuery1 = operand1;
+ this.subQuery2 = operand2;
+ this.classNumbers = classNumbers;
+ }
+
+
+ @Override
+ public SpanQuery toQuery () throws QueryException {
+
+ if (this.isNull() || this.isEmpty()) {
+ return null;
+ }
+
+ SpanQuery sq = relationQuery.retrieveNode(this.retrieveNode).toQuery();
+ if (sq == null)
+ return null;
+
+ SpanQuery subq1, subq2;
+ if (subQuery1.isEmpty) {
+ if (!subQuery2.isEmpty) {
+ // match target
+ subq2 = subQuery2.retrieveNode(this.retrieveNode).toQuery();
+ if (subq2 != null) {
+ return createQuery(new SpanSegmentQuery(sq, subq2, true));
+ }
+ }
+ }
+ else if (subQuery2.isEmpty) {
+ if (!subQuery1.isEmpty) {
+ // match source
+ subq1 = subQuery1.retrieveNode(this.retrieveNode).toQuery();
+ if (subq1 != null) {
+ return createQuery(new SpanSegmentQuery(sq, subq1, true));
+ }
+ }
+ }
+ else {
+ // match both
+ subq1 = subQuery1.retrieveNode(this.retrieveNode).toQuery();
+ if (subq1 != null) {
+ SpanFocusQuery fq = new SpanFocusQuery(new SpanSegmentQuery(sq,
+ subq1, true), (byte) 2);
+ fq.setSorted(false);
+ sq = fq;
+ }
+
+ subq2 = subQuery2.retrieveNode(this.retrieveNode).toQuery();
+ if (subq2 != null) {
+ return createQuery(new SpanSegmentQuery(sq, subq2, true));
+ }
+ }
+
+ return createQuery(sq);
+ }
+
+ private SpanQuery createQuery(SpanQuery sq) {
+ ArrayList<Byte> classNumbers = new ArrayList<Byte>();
+ if (this.classNumbers != null) {
+ for (byte c : this.classNumbers) {
+ if (c > 0) {
+ classNumbers.add(c);
+ }
+ }
+ return new SpanFocusQuery(sq, classNumbers);
+ }
+ return sq;
+
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java
index 8916bda..b51557d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java
@@ -6,9 +6,9 @@
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
+import de.ids_mannheim.korap.query.SimpleSpanQuery;
import de.ids_mannheim.korap.query.SpanAttributeQuery;
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
-import de.ids_mannheim.korap.query.SpanWithIdQuery;
import de.ids_mannheim.korap.util.QueryException;
/**
@@ -133,7 +133,7 @@
private SpanQuery createSpecificSpanWithAttributeQuery ()
throws QueryException {
- SpanWithIdQuery withIdQuery = (SpanWithIdQuery) withIdQueryWrapper
+ SimpleSpanQuery withIdQuery = (SimpleSpanQuery) withIdQueryWrapper
.toQuery();
if (withIdQuery == null) {
isNull = true;
@@ -152,7 +152,7 @@
private SpanWithAttributeQuery createSpanWithSingleAttributeQuery (
- SpanWithIdQuery withIdQuery) throws QueryException {
+ SimpleSpanQuery withIdQuery) throws QueryException {
SpanAttributeQuery attrQuery = createSpanAttributeQuery(this.attrQueryWrapper);
if (attrQuery != null) {
if (withIdQuery != null) {
@@ -171,6 +171,8 @@
SpanQueryWrapper attrQueryWrapper) throws QueryException {
SpanQuery sq = attrQueryWrapper.toQuery();
if (sq != null) {
+ if (sq instanceof SpanAttributeQuery)
+ return (SpanAttributeQuery) sq;
if (sq instanceof SpanTermQuery) {
return new SpanAttributeQuery((SpanTermQuery) sq,
attrQueryWrapper.isNegative, true);
@@ -185,7 +187,7 @@
private SpanWithAttributeQuery createSpanWithAttributeListQuery (
- SpanWithIdQuery withIdQuery) throws QueryException {
+ SimpleSpanQuery withIdQuery) throws QueryException {
List<SpanQuery> attrQueries = new ArrayList<SpanQuery>();
SpanQuery attrQuery = null;
for (SpanQueryWrapper sqw : queryWrapperList) {
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 17f6cae..ff90f1d 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -1,39 +1,42 @@
package de.ids_mannheim.korap.response;
-import java.util.*;
-import java.io.*;
-
+import java.io.IOException;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
-import com.fasterxml.jackson.annotation.*;
-import com.fasterxml.jackson.annotation.JsonInclude.Include;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.node.*;
-
-import de.ids_mannheim.korap.index.PositionsToOffset;
-
-// Todo:
-import de.ids_mannheim.korap.response.SearchContext;
-
-import de.ids_mannheim.korap.index.AbstractDocument;
-import de.ids_mannheim.korap.response.match.HighlightCombinator;
-import de.ids_mannheim.korap.response.match.HighlightCombinatorElement;
-import de.ids_mannheim.korap.response.match.Relation;
-import de.ids_mannheim.korap.response.match.MatchIdentifier;
-import de.ids_mannheim.korap.response.match.PosIdentifier;
-import de.ids_mannheim.korap.query.SpanElementQuery;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import org.apache.lucene.document.Document;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.document.Document;
import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import de.ids_mannheim.korap.index.AbstractDocument;
+import de.ids_mannheim.korap.index.PositionsToOffset;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.response.match.HighlightCombinator;
+import de.ids_mannheim.korap.response.match.HighlightCombinatorElement;
+import de.ids_mannheim.korap.response.match.MatchIdentifier;
+import de.ids_mannheim.korap.response.match.PosIdentifier;
+import de.ids_mannheim.korap.response.match.Relation;
/*
Todo: The implemented classes and private names are horrible!
@@ -55,7 +58,7 @@
private final static Logger log = LoggerFactory.getLogger(Match.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+ public static final boolean DEBUG = true;
// Mapper for JSON serialization
ObjectMapper mapper = new ObjectMapper();
diff --git a/src/main/java/de/ids_mannheim/korap/server/Node.java b/src/main/java/de/ids_mannheim/korap/server/Node.java
index 8b4de0f..3e406f5 100644
--- a/src/main/java/de/ids_mannheim/korap/server/Node.java
+++ b/src/main/java/de/ids_mannheim/korap/server/Node.java
@@ -10,6 +10,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.logging.LogManager;
+import org.slf4j.bridge.SLF4JBridgeHandler;
import java.net.URI;
import java.beans.PropertyVetoException;
@@ -20,31 +22,37 @@
import com.mchange.v2.c3p0.*;
/**
- * Standalone REST-Service for the Lucene Search Backend.
+ * Standalone REST-Service for the Krill node.
+ * Reads a property file at <tt>krill.properties</tt>.
+ * Defaults to port <tt>9876</tt> if no information is given,
+ * and an unprotected in-memory SQLite database for collections.
*
* @author diewald
*/
public class Node {
// Base URI the Grizzly HTTP server will listen on
- public static String BASE_URI = "http://localhost:8080/";
+ public static String BASE_URI = "http://localhost:9876/";
+ private static String propFile = "krill.properties";
+
// Logger
private final static Logger log = LoggerFactory.getLogger(Node.class);
// Index
private static KrillIndex index;
+
+ // Database
private static ComboPooledDataSource cpds;
- private static String path, name = "unknown";
-
+ private static String path = null;
+ private static String name = "unknown";
private static String dbUser, dbPwd;
-
- private static String dbClass = "org.sqlite.JDBC", dbURL = "jdbc:sqlite:";
+ private static String dbClass = "org.sqlite.JDBC";
+ private static String dbURL = "jdbc:sqlite:";
/*
- * Todo: Add shutdown hook,
- * Then also close cdps.close();
+ * Todo: Close cdps.close() on shutdown.
* see: https://10.0.10.12/trac/korap/browser/KorAP-modules/KorAP-REST/src/main/java/de/ids_mannheim/korap/web/Application.java
* https://10.0.10.12/trac/korap/browser/KorAP-modules/KorAP-REST/src/main/java/de/ids_mannheim/korap/web/ShutdownHook.java
*/
@@ -52,38 +60,12 @@
/**
* Starts Grizzly HTTP server exposing JAX-RS
* resources defined in this application.
+ * This will load a <tt>krill.properties</tt> property file.
*
* @return Grizzly HTTP server.
*/
public static HttpServer startServer () {
-
- // Load configuration
- URL resUrl = Node.class.getClassLoader().getResource("krill.properties");
- if (resUrl == null) {
- log.error("Cannot find \"krill.properties\". Please create it "
- +"using \"krill.properties.info\" as template. Terminating.");
- System.exit(1);
- }
- try {
- InputStream file = new FileInputStream(resUrl.getFile());
- Properties prop = new Properties();
- prop.load(file);
-
- // Node properties
- path = prop.getProperty("krill.indexDir", path);
- name = prop.getProperty("krill.server.name", name);
- BASE_URI = prop.getProperty("krill.server.baseURI", BASE_URI);
-
- // Database properties
- dbUser = prop.getProperty("krill.db.user", dbUser);
- dbPwd = prop.getProperty("krill.db.pwd", dbPwd);
- dbClass = prop.getProperty("krill.db.class", dbClass);
- dbURL = prop.getProperty("krill.db.jdbcURL", dbURL);
-
- }
- catch (IOException e) {
- log.error(e.getLocalizedMessage());
- };
+ _loadResourceProperties();
// create a resource config that scans for JAX-RS resources and providers
// in de.ids_mannheim.korap.server package
@@ -97,7 +79,21 @@
};
+ /**
+ * Starts Grizzly HTTP server exposing JAX-RS
+ * resources defined in this application.
+ * Mainly used for testing.
+ *
+ * @param nodeName
+ * The name of the node.
+ * @param indexPath
+ * The path of the Lucene index.
+ *
+ * @return Grizzly {@link HttpServer} server.
+ */
public static HttpServer startServer (String nodeName, String indexPath) {
+ LogManager.getLogManager().reset();
+ SLF4JBridgeHandler.install();
// create a resource config that scans for JAX-RS resources and providers
// in de.ids_mannheim.korap.server package
@@ -115,24 +111,29 @@
/**
- * Main method.
+ * Runner method for Krill node.
*
* @param args
+ * No special arguments required.
* @throws IOException
*/
public static void main (String[] args) throws IOException {
- // WADL available at BASE_URI + application.wadl
+ // WADL available at BASE_URI + application.wadl
+ // Start the server with krill properties or given defaults
final HttpServer server = startServer();
// Establish shutdown hook
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+
@Override
public void run () {
log.info("Stop Server");
- // staaahp!
server.stop();
- }
+ if (cpds != null)
+ cpds.close();
+ };
+
}, "shutdownHook"));
// Start server
@@ -147,25 +148,49 @@
};
- // What's the servers name?
+ /**
+ * Get the name of the node.
+ * The name is unique in the cluster and should be persistent.
+ *
+ * @return The unique name of the node.
+ */
public static String getName () {
return name;
};
- // What is the server listening on?
+ /**
+ * Get the URI (incl. port) the node is listening on.
+ *
+ * @return The URI the node is listening on.
+ */
public static String getListener () {
return BASE_URI;
};
- // Get database pool
+ /**
+ * Shut down the database pool.
+ */
+ public static void closeDBPool () {
+ if (cpds != null)
+ cpds.close();
+ };
+
+
+ /**
+ * Get the associated database pool
+ * for match collection.
+ *
+ * @return The CPDS {@link ComboPooledDataSource} object.
+ */
public static ComboPooledDataSource getDBPool () {
// Pool already initiated
if (cpds != null)
return cpds;
+ // Initiate pool
try {
// Parameters are defined in the property file
@@ -186,7 +211,11 @@
};
- // Get Lucene Index
+ /**
+ * Get the associuated {@link KrillIndex}.
+ *
+ * @return The associated {@link KrillIndex}.
+ */
public static KrillIndex getIndex () {
// Index already instantiated
@@ -196,10 +225,13 @@
try {
// Get a temporary index
- if (path == null)
+ if (path == null) {
+
// Temporary index
index = new KrillIndex();
+ }
+ // Get a MMap directory index
else {
File file = new File(path);
@@ -219,4 +251,46 @@
};
return null;
};
+
+
+ // Load properties from file
+ private static Properties _loadProperties (String propFile) {
+ try {
+ InputStream file = new FileInputStream(propFile);
+ Properties prop = new Properties();
+ prop.load(file);
+
+ // Node properties
+ path = prop.getProperty("krill.indexDir", path);
+ name = prop.getProperty("krill.server.name", name);
+ BASE_URI = prop.getProperty("krill.server.baseURI", BASE_URI);
+
+ // Database properties
+ dbUser = prop.getProperty("krill.db.user", dbUser);
+ dbPwd = prop.getProperty("krill.db.pwd", dbPwd);
+ dbClass = prop.getProperty("krill.db.class", dbClass);
+ dbURL = prop.getProperty("krill.db.jdbcURL", dbURL);
+ return prop;
+ }
+ catch (IOException e) {
+ log.error(e.getLocalizedMessage());
+ };
+ return null;
+ };
+
+
+ // Load properties from resource file
+ private static Properties _loadResourceProperties () {
+
+ // Load configuration
+ URL resUrl = Node.class.getClassLoader().getResource(propFile);
+ if (resUrl == null) {
+ log.error(
+ "Cannot find {}. Please create it using \"{}.info\" as template.",
+ propFile, propFile);
+ return null;
+ };
+
+ return _loadProperties(resUrl.getFile());
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/server/Resource.java b/src/main/java/de/ids_mannheim/korap/server/Resource.java
index 4b4fa31..7f9e99b 100644
--- a/src/main/java/de/ids_mannheim/korap/server/Resource.java
+++ b/src/main/java/de/ids_mannheim/korap/server/Resource.java
@@ -20,16 +20,16 @@
import javax.ws.rs.WebApplicationException;
import de.ids_mannheim.korap.server.Node;
-import de.ids_mannheim.korap.KrillIndex;
import de.ids_mannheim.korap.Krill;
+import de.ids_mannheim.korap.KrillIndex;
import de.ids_mannheim.korap.KrillCollection;
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.response.Match;
import de.ids_mannheim.korap.response.Response;
-import de.ids_mannheim.korap.index.FieldDocument;
-import de.ids_mannheim.korap.util.QueryException;
import de.ids_mannheim.korap.response.MatchCollector;
import de.ids_mannheim.korap.response.collector.MatchCollectorDB;
+import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.index.FieldDocument;
import java.util.List;
import java.util.regex.Pattern;
@@ -45,16 +45,14 @@
/**
- * Root resource (exposed at root path)
+ * Root resource (exposed at root path) of the Krill node.
* The responses only represent JSON responses, although HTML
- * responses
- * may be handy.
+ * responses may be handy.
*
- * @author Nils Diewald
- *
- * Look at
- * http://www.mkyong.com/webservices/jax-rs/json-example
- * -with-jersey-jackson/
+ * @author diewald
+ */
+/* Look at
+ * http://www.mkyong.com/webservices/jax-rs/json-example-with-jersey-jackson/
*/
@Path("/")
public class Resource {
@@ -68,21 +66,8 @@
public static final boolean DEBUG = false;
// Slightly based on String::BooleanSimple
- static Pattern p = Pattern
- .compile("\\s*(?i:false|no|inactive|disabled|off|n|neg(?:ative)?|not|null|undef)\\s*");
-
-
- // Check if a string is meant to represent null
- private static boolean isNull (String value) {
- if (value == null)
- return true;
-
- Matcher m = p.matcher(value);
- if (m.matches())
- return true;
-
- return false;
- };
+ static Pattern p = Pattern.compile("\\s*(?i:false|no|inactive|disabled|"
+ + "off|n|neg(?:ative)?|not|null|undef)\\s*");
/**
@@ -91,19 +76,19 @@
@GET
@Produces(MediaType.APPLICATION_JSON)
public String info () {
- KrillIndex index = Node.getIndex();
Response kresp = new Response();
kresp.setNode(Node.getName());
+ kresp.setListener(Node.getListener());
+
+ // Get index
+ KrillIndex index = Node.getIndex();
kresp.setName(index.getName());
kresp.setVersion(index.getVersion());
-
- kresp.setListener(Node.getListener());
- long texts = -1;
/*
- kresp.addMessage(
+ kresp.addMessage(
"Number of documents in the index",
String.parseLong(index.numberOf("documents"))
- );
+ );
*/
kresp.addMessage(680, "Server is up and running!");
return kresp.toJsonString();
@@ -134,7 +119,6 @@
*/
// Todo: Parameter for server node
-
if (DEBUG)
log.trace("Added new document with unique identifier {}", uid);
@@ -197,6 +181,7 @@
// There are documents to commit
try {
index.commit();
+ kresp.addMessage(683, "Staged data committed");
}
catch (IOException e) {
// Set HTTP to ???
@@ -373,22 +358,22 @@
boolean includeSpans = false, includeHighlights = true, extendToSentence = false, info = false;
// Optional query parameter "info" for more information on the match
- if (!isNull(qp.getFirst("info")))
+ if (!_isNull(qp.getFirst("info")))
info = true;
// Optional query parameter "spans" for span information inclusion
- if (!isNull(qp.getFirst("spans"))) {
+ if (!_isNull(qp.getFirst("spans"))) {
includeSpans = true;
info = true;
};
// Optional query parameter "highlights" for highlight information inclusion
String highlights = qp.getFirst("highlights");
- if (highlights != null && isNull(highlights))
+ if (highlights != null && _isNull(highlights))
includeHighlights = false;
// Optional query parameter "extended" for sentence expansion
- if (!isNull(qp.getFirst("extended")))
+ if (!_isNull(qp.getFirst("extended")))
extendToSentence = true;
List<String> foundries = qp.get("foundry");
@@ -468,4 +453,17 @@
context.proceed();
};
};
+
+
+ // Check if a string is meant to represent null
+ private static boolean _isNull (String value) {
+ if (value == null)
+ return true;
+
+ Matcher m = p.matcher(value);
+ if (m.matches())
+ return true;
+
+ return false;
+ };
};
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 9bd7ad2..5fed33b 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-# log4j.rootLogger = ERROR, stdout
+log4j.rootLogger = ERROR, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -35,8 +35,11 @@
# Tests:
# log4j.logger.de.ids_mannheim.korap.index.TestSegmentIndex = TRACE, stdout
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+# Server
+# log4j.category.org.glassfish.jersey = TRACE, stdout
+
+log4j.appender.stdout = org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern = %5p (%F:%L) -> %m%n
# log4j.appender.stdout.Target=System.out
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
index 5995ded..82724b0 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
@@ -1,31 +1,29 @@
package de.ids_mannheim.korap.index;
-import java.util.*;
-import java.io.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Bits;
+import java.io.IOException;
-import static org.junit.Assert.*;
-import org.junit.Test;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
import org.junit.Ignore;
+import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
-import de.ids_mannheim.korap.KrillIndex;
-import de.ids_mannheim.korap.KrillQuery;
-import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.query.QueryBuilder;
+import de.ids_mannheim.korap.query.SpanClassQuery;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanFocusQuery;
+import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.query.SpanWithinQuery;
import de.ids_mannheim.korap.response.Match;
-import de.ids_mannheim.korap.query.*;
-import de.ids_mannheim.korap.index.FieldDocument;
-import de.ids_mannheim.korap.index.MultiTermTokenStream;
-
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.index.Term;
+import de.ids_mannheim.korap.response.Result;
// mvn -Dtest=TestWithinIndex#indexExample1 test
@@ -33,6 +31,48 @@
@RunWith(JUnit4.class)
public class TestMatchIndex {
+ @Test
+ public void testEmbeddedClassQuery () throws IOException {
+ KrillIndex ki = new KrillIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
+ + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]"
+ + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
+ + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]"
+ + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]"
+ + "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ SpanQuery sq;
+ Result kr;
+
+ sq = new SpanFocusQuery(new SpanClassQuery(new SpanNextQuery(
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
+ (byte) 1), new SpanClassQuery(new SpanTermQuery(
+ new Term("base", "s:c")), (byte) 2)), (byte) 3),
+ (byte) 3);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", kr.getTotalResults(), 2);
+ assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
+ assertEquals("SnippetBrackets (0)", "a[{3:{1:b}{2:c}}]abcaba ...", kr
+ .getMatch(0).getSnippetBrackets());
+ assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
+ assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
+ assertEquals("SnippetBrackets (1)", "abca[{3:{1:b}{2:c}}]abac", kr
+ .getMatch(1).getSnippetBrackets());
+
+ assertEquals("Document count", 1, ki.numberOf("base", "documents"));
+ assertEquals("Token count", 10, ki.numberOf("base", "t"));
+
+ }
+
@Test
public void indexExample1 () throws IOException {
@@ -149,26 +189,7 @@
"<span class=\"context-left\"><span class=\"more\"></span>a</span><mark><mark class=\"class-2 level-0\">b<mark class=\"class-1 level-1\">a</mark></mark></mark><span class=\"context-right\"><span class=\"more\"></span></span>",
kr.getMatch(0).getSnippetHTML());
- sq = new SpanFocusQuery(new SpanClassQuery(new SpanNextQuery(
- new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
- (byte) 1), new SpanClassQuery(new SpanTermQuery(
- new Term("base", "s:c")), (byte) 2)), (byte) 3),
- (byte) 3);
- kr = ki.search(sq, (short) 10);
-
- assertEquals("totalResults", kr.getTotalResults(), 2);
- assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
- assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
- assertEquals("SnippetBrackets (0)", "a[{3:{1:b}{2:c}}]abcaba ...", kr
- .getMatch(0).getSnippetBrackets());
- assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
- assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
- assertEquals("SnippetBrackets (1)", "abca[{3:{1:b}{2:c}}]abac", kr
- .getMatch(1).getSnippetBrackets());
-
- assertEquals("Document count", 1, ki.numberOf("base", "documents"));
- assertEquals("Token count", 10, ki.numberOf("base", "t"));
// Don't match the expected class!
sq = new SpanFocusQuery(new SpanNextQuery(new SpanClassQuery(
@@ -375,15 +396,15 @@
SpanQuery sq;
Result kr;
- sq = new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery(
- "base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(
- new Term("base", "s:b")), (byte) 3));
-
- kr = ki.search(sq, (short) 10);
- assertEquals(kr.getSerialQuery(),
- "spanContain({2: <base:s />}, {3: base:s:b})");
- assertEquals(kr.getMatch(0).getSnippetBrackets(),
- "a[{2:{3:b}cab}]cabac");
+ // sq = new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery(
+ // "base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(
+ // new Term("base", "s:b")), (byte) 3));
+ //
+ // kr = ki.search(sq, (short) 10);
+ // assertEquals(kr.getSerialQuery(),
+ // "spanContain({2: <base:s />}, {3: base:s:b})");
+ // assertEquals(kr.getMatch(0).getSnippetBrackets(),
+ // "a[{2:{3:b}cab}]cabac");
sq = new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(
new SpanElementQuery("base", "s"), (byte) 2),
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index 838a572..e0d6598 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -3,6 +3,7 @@
import static org.junit.Assert.assertEquals;
import java.io.IOException;
+import java.util.ArrayList;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanQuery;
@@ -17,7 +18,6 @@
import de.ids_mannheim.korap.query.SpanSegmentQuery;
import de.ids_mannheim.korap.query.SpanTermWithIdQuery;
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
-import de.ids_mannheim.korap.response.Match;
import de.ids_mannheim.korap.response.Result;
/*
@@ -116,7 +116,7 @@
}
- private FieldDocument createFieldDoc2 () {
+ public static FieldDocument createFieldDoc2 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-2");
fd.addTV(
@@ -124,59 +124,59 @@
"Ich kaufe die Blümen für meine Mutter.",
"[(0-3)s:Ich|_0#0-3|pos:NN$<s>1|<>:s#0-38$<i>7<s>2|<>:np#0-3$<i>1<s>3|"
+ ">:child-of$<i>0<i>7<s>1<s>3<s>2|"
- + ">:child-of$<i>0<i>1<s>6<s>1<s>3|"
- + "<:child-of$<i>1<b>0<i>1<s>3<s>3<s>1|"
+ + ">:child-of$<i>0<i>1<s>2<s>1<s>3|"
+ + "<:child-of$<i>0<s>3<s>3<s>1|"
+ "<:child-of$<i>7<i>0<i>1<s>4<s>2<s>3|"
+ "<:child-of$<i>7<i>1<i>7<s>5<s>2<s>2|"
- + "<:dep$<i>2<s>2<s>1<s>1|"
+ + "<:dep$<i>1<s>2<s>1<s>1|"
+ "r@:func=sbj$<i>0<i>7<s>1]"
+
"[(1-2)s:kaufe|_1#4-9|pos:V$<s>1|<>:vp#4-38$<i>7<s>2|"
- + ">:child-of$<i>7<i>0<i>7<s>1<s>2<s>2|"
- + ">:child-of$<i>1<i>7<s>2<s>1<s>2|"
- + "<:child-of$<i>7<b>0<i>2<s>5<s>2<s>1|"
- + "<:child-of$<i>7<i>2<i>7<s>6<s>2<s>4|"
- + ">:dep$<i>1<s>3<s>1<s>1|"
- + ">:dep$<i>4<s>4<s>1<s>1]"
+ + ">:child-of$<i>7<i>0<i>7<s>6<s>2<s>2|"
+ + ">:child-of$<i>1<i>7<s>2<s>7<s>2|"
+ + "<:child-of$<i>7<b>0<i>2<s>8<s>2<s>1|"
+ + "<:child-of$<i>7<i>2<i>7<s>9<s>2<s>4|"
+ + ">:dep$<i>0<s>3<s>1<s>1|"
+ + ">:dep$<i>3<s>4<s>1<s>1]"
+
"[(2-3)s:die|_2#10-13|pos:ART$<s>1|tt:DET$<s>2|<>:np#10-20$<i>4<s>3|<>:np#10-38$<i>7<s>4|"
- + ">:child-of$<i>4<i>2<i>7<s>1<s>3<s>4|"
- + ">:child-of$<i>2<i>4<s>2<s>1<s>3|"
- + ">:child-of$<i>7<i>1<i>7<s>2<s>4<s>2|"
- + "<:child-of$<i>4<b>0<i>3<s>3<s>3<s>1|"
- + "<:child-of$<i>4<b>0<i>4<s>4<s>3<s>1|"
- + "<:child-of$<i>7<i>2<i>4<s>5<s>4<s>3|"
- + "<:child-of$<i>7<i>4<i>7<s>6<s>4<s>2|"
- + "<:dep$<i>4<s>3<s>1<s>1|"
- + "r@:func=obj$<i>2<i>7<s>1]" +
+ + ">:child-of$<i>4<i>2<i>7<s>10<s>3<s>4|"
+ + ">:child-of$<i>2<i>4<s>11<s>1<s>3|"
+ + ">:child-of$<i>7<i>1<i>7<s>12<s>4<s>2|"
+ + "<:child-of$<i>4<b>0<i>2<s>13<s>3<s>1|"
+ + "<:child-of$<i>4<b>0<i>3<s>14<s>3<s>1|"
+ + "<:child-of$<i>7<i>2<i>4<s>15<s>4<s>3|"
+ + "<:child-of$<i>7<i>4<i>7<s>16<s>4<s>2|"
+ + "<:dep$<i>3<s>2<s>1<s>1]" +
"[(3-4)s:Blümen|_3#14-20|pos:NN$<s>1|"
- + ">:child-of$<i>2<i>4<s>1<s>1<s>3|"
- + "<:dep$<i>2<s>2<s>1<s>2|" + ">:dep$<i>3<s>3<s>1<s>1|"
- + ">:dep$<i>5<s>4<s>1<s>1|"
- + "r@:func=head$<i>2<i>4<s>2]" +
+ + ">:child-of$<i>2<i>4<s>17<s>1<s>3|"
+ + "<:dep$<i>1<s>2<s>1<s>1|" + ">:dep$<i>2<s>3<s>1<s>1|"
+ + ">:dep$<i>4<s>4<s>1<s>1|"
+ + "r@:func=head$<i>2<i>4<s>2|"
+ + "r@:func=obj$<i>1<i>4<s>2]" +
"[(4-5)s:für|_4#21-24|pos:PREP$<s>1|<>:pp#21-38$<i>7<s>2|"
- + ">:child-of$<i>4<i>7<s>1<s>1<s>2|"
- + ">:child-of$<i>7<i>2<i>7<s>2<s>2<s>4|"
- + "<:child-of$<i>7<b>0<i>5<s>4<s>2<s>1|"
- + "<:child-of$<i>7<i>5<i>7<s>5<s>2<s>2|"
- + "<:dep$<i>4<s>1<s>1<s>1|" + ">:dep$<i>7<s>3<s>1<s>1]"
+ + ">:child-of$<i>4<i>7<s>18<s>1<s>2|"
+ + ">:child-of$<i>7<i>2<i>7<s>19<s>2<s>4|"
+ + "<:child-of$<i>7<b>0<i>5<s>20<s>2<s>1|"
+ + "<:child-of$<i>7<i>5<i>7<s>21<s>2<s>2|"
+ + "<:dep$<i>3<s>1<s>1<s>1|" + ">:dep$<i>6<s>3<s>1<s>1]"
+
"[(5-6)s:meine|_5#25-30|pos:ART$<s>1|<>:np#25-38$<i>7<s>2|"
- + ">:child-of$<i>5<i>7<s>1<s>1<s>2|"
- + ">:child-of$<i>7<i>4<i>7<s>2<s>2<s>2|"
- + "<:child-of$<i>7<b>0<i>6<s>4<s>2<s>1|"
- + "<:child-of$<i>7<b>0<i>7<s>5<s>2<s>1|"
- + "<:dep$<i>7<s>3<s>1<s>1]" +
+ + ">:child-of$<i>5<i>7<s>22<s>1<s>2|"
+ + ">:child-of$<i>7<i>4<i>7<s>23<s>2<s>2|"
+ + "<:child-of$<i>7<b>0<i>5<s>24<s>2<s>1|"
+ + "<:child-of$<i>7<b>0<i>6<s>25<s>2<s>1|"
+ + "<:dep$<i>6<s>3<s>1<s>1]" +
"[(6-7)s:Mutter.|_6#31-38|pos:NN$<s>1|"
- + ">:child-of$<i>5<i>7<s>1<s>1<s>2|"
- + ">:dep$<i>6<s>2<s>1<s>1|" + "<:dep$<i>5<s>3<s>1<s>1|"
- + "r@:func=head$<i>6<i>7<s>3]");
+ + ">:child-of$<i>5<i>7<s>26<s>1<s>2|"
+ + ">:dep$<i>5<s>2<s>1<s>1|" + "<:dep$<i>4<s>3<s>1<s>1|"
+ + "r@:func=head$<i>5<i>7<s>3]");
return fd;
}
@@ -262,163 +262,173 @@
/**
* Relations with attributes
- * NEED focusMulti on span relation query before
+ * need focusMulti on span relation query before
* SpanWithAttributeQuery
* */
- /* @Test
- public void testCase3() throws IOException {
- ki.addDoc(createFieldDoc2());
- ki.commit();
-
- // child-of relations
- SpanRelationQuery srq= new SpanRelationQuery(new SpanTermQuery(
- new Term("base", ">:child-of")), true);
- kr = ki.search(srq,(short) 20);
-
- assertEquals((long) 13, kr.getTotalResults());
+ @Test
+ public void testCase3 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
- // child-of with attr func=sbj
- SpanWithAttributeQuery wq =
- new SpanWithAttributeQuery(srq,
- new SpanAttributeQuery(
- new SpanTermQuery(new Term("base", "r@:func=sbj")),
- true),
- true
- );
-
- kr = ki.search(wq,(short) 10);
- assertEquals((long) 1, kr.getTotalResults());
- assertEquals(0,kr.getMatch(0).getStartPos()); // token
- assertEquals(1, kr.getMatch(0).getEndPos());
-
- // child-of without attr func=sbj
- wq =
- new SpanWithAttributeQuery(srq,
- new SpanAttributeQuery(
- new SpanTermQuery(new Term("base", "r@:func=sbj")),
- true, true),
- true
- );
- kr = ki.search(wq,(short) 20);
- assertEquals((long) 12, kr.getTotalResults());
+ // child-of relations
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("base", ">:child-of")), true);
+ kr = ki.search(srq, (short) 20);
- // child-of with attr func-obj
- wq = new SpanWithAttributeQuery(srq,
- new SpanAttributeQuery(
- new SpanTermQuery( new Term("base", "r@:func=obj")),
- true),
- true
- );
-
- kr = ki.search(wq,(short) 10);
- assertEquals((long) 1, kr.getTotalResults());
- assertEquals(2,kr.getMatch(0).getStartPos()); // element
- assertEquals(4,kr.getMatch(0).getEndPos());
+ assertEquals((long) 13, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(1, kr.getMatch(0).getEndPos());
+ assertEquals(0, kr.getMatch(1).getStartPos());
+ assertEquals(1, kr.getMatch(1).getEndPos());
+ assertEquals(1, kr.getMatch(2).getStartPos());
+ assertEquals(2, kr.getMatch(2).getEndPos());
+ assertEquals(1, kr.getMatch(3).getStartPos());
+ assertEquals(7, kr.getMatch(3).getEndPos());
+ assertEquals(2, kr.getMatch(4).getStartPos());
+ assertEquals(3, kr.getMatch(4).getEndPos());
+ assertEquals(2, kr.getMatch(5).getStartPos());
+ assertEquals(4, kr.getMatch(5).getEndPos());
- // target of a dependency relation
- srq = new SpanRelationQuery(
- new SpanTermQuery(new Term("base", "<:dep")), true);
- kr = ki.search(srq,(short) 10);
-
- assertEquals((long) 6, kr.getTotalResults());
+ ArrayList<Byte> classNumbers = new ArrayList<Byte>();
+ classNumbers.add((byte) 1);
+ classNumbers.add((byte) 2);
- // target of a dependency relation, which is also a head
- wq = new SpanWithAttributeQuery(srq,
- new SpanAttributeQuery(
- new SpanTermQuery( new Term("base", "r@:func=head")),
- true),
- true
- );
-
- kr = ki.search(wq,(short) 20);
- // for (Match km : kr.getMatches()) {
- // System.out.println(km.getStartPos() + "," + km.getEndPos() + " "
- // + km.getSnippetBrackets());
- // }
- assertEquals((long) 2, kr.getTotalResults());
- assertEquals(3, kr.getMatch(0).getStartPos());
- assertEquals(4,kr.getMatch(0).getEndPos());
- assertEquals(6, kr.getMatch(1).getStartPos());
- assertEquals(7, kr.getMatch(1).getEndPos());
-
+ SpanFocusQuery fq = new SpanFocusQuery(srq, classNumbers);
+ kr = ki.search(fq, (short) 20);
+ /*
+ * for (Match km : kr.getMatches()) {
+ * System.out.println(km.getStartPos() + "," + km.getEndPos()
+ * + " " + km.getSnippetBrackets()); }
+ */
+ assertEquals((long) 13, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(1, kr.getMatch(0).getEndPos());
+ assertEquals(0, kr.getMatch(1).getStartPos());
+ assertEquals(7, kr.getMatch(1).getEndPos());
+ assertEquals(0, kr.getMatch(2).getStartPos());
+ assertEquals(7, kr.getMatch(2).getEndPos());
+ assertEquals(1, kr.getMatch(3).getStartPos());
+ assertEquals(7, kr.getMatch(3).getEndPos());
+ assertEquals(1, kr.getMatch(4).getStartPos());
+ assertEquals(7, kr.getMatch(4).getEndPos());
}
-
- // FOCUS has not sorted
- /** Relation with variable
- * match right, return left
- * sort by right, then sort by left
- * @throws IOException
- * */
- /*@Test
- public void testCase4() throws IOException {
- ki.addDoc(createFieldDoc2());
- ki.commit();
-
- //return all children of np
- SpanQuery rv = new SpanFocusQuery(
- new SpanSegmentQuery(
- new SpanRelationQuery(
- new SpanTermQuery(new Term("base","<:child-of")), true),
- new SpanElementQuery("base","np"),
- true),
- (byte) 2);
-
- kr = ki.search(rv,(short) 10);
- for (Match km : kr.getMatches()) {
- System.out.println(km.getStartPos() + "," + km.getEndPos()
- // + " "+ km.getSnippetBrackets()
- );
- }
- assertEquals((long) 7, kr.getTotalResults());
- assertEquals(0,kr.getMatch(0).getStartPos());
- assertEquals(1,kr.getMatch(0).getEndPos());
- assertEquals(2,kr.getMatch(1).getStartPos());
- assertEquals(3,kr.getMatch(1).getEndPos());
- assertEquals(2,kr.getMatch(2).getStartPos());
- assertEquals(4,kr.getMatch(2).getEndPos());
- assertEquals(3,kr.getMatch(3).getStartPos());
- assertEquals(4,kr.getMatch(3).getEndPos());
- assertEquals(4,kr.getMatch(4).getStartPos());
- assertEquals(7,kr.getMatch(4).getEndPos());
- assertEquals(5,kr.getMatch(5).getStartPos());
- assertEquals(6,kr.getMatch(5).getEndPos());
- assertEquals(6,kr.getMatch(6).getStartPos());
- assertEquals(7,kr.getMatch(6).getEndPos());
- // sorting left problem (solved)
-
- //return all children of np that are articles
- SpanSegmentQuery rv2 = new SpanSegmentQuery(rv, new SpanTermQuery(new Term("base","pos:ART")));
- kr = ki.search(rv2,(short) 10);
-
- assertEquals((long) 2, kr.getTotalResults());
- assertEquals(2,kr.getMatch(0).getStartPos());
- assertEquals(3,kr.getMatch(0).getEndPos());
- assertEquals(5,kr.getMatch(1).getStartPos());
- assertEquals(6,kr.getMatch(1).getEndPos());
-
- // return all nps whose children are articles
- /*SpanRelationPartQuery rv3 =
- new SpanRelationPartQuery(rv,
- new SpanTermWithIdQuery(new Term("base","pos:ART"), true),
- false, true, true);
- kr = ki.search(rv3,(short) 10);
-
- assertEquals((long) 2, kr.getTotalResults());
- assertEquals(2,kr.getMatch(0).getStartPos());
- assertEquals(4,kr.getMatch(0).getEndPos());
- assertEquals(5,kr.getMatch(1).getStartPos());
- assertEquals(7,kr.getMatch(1).getEndPos());
-
- */
- //}
+
+ @Test
+ public void testCase4 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("base", ">:child-of")), true);
+
+ ArrayList<Byte> classNumbers = new ArrayList<Byte>();
+ classNumbers.add((byte) 1);
+ classNumbers.add((byte) 2);
+
+ SpanWithAttributeQuery wq = new SpanWithAttributeQuery(
+ new SpanFocusQuery(srq, classNumbers), new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base", "r@:func=sbj")),
+ true), true);
+
+ kr = ki.search(wq, (short) 20);
+ assertEquals((long) 1, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos()); // token
+ assertEquals(7, kr.getMatch(0).getEndPos());
+
+ // child-of without attr func=sbj
+ wq = new SpanWithAttributeQuery(new SpanFocusQuery(srq, classNumbers),
+ new SpanAttributeQuery(new SpanTermQuery(new Term("base",
+ "r@:func=sbj")), true, true), true);
+ kr = ki.search(wq, (short) 20);
+ assertEquals((long) 12, kr.getTotalResults());
+ }
+
+
+ @Test
+ public void testCase5 () throws IOException {
+
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("base", "<:dep")), true);
+ kr = ki.search(srq, (short) 10);
+
+ ArrayList<Byte> classNumbers = new ArrayList<Byte>();
+ classNumbers.add((byte) 1);
+ classNumbers.add((byte) 2);
+
+ SpanFocusQuery fq = new SpanFocusQuery(srq, classNumbers);
+ kr = ki.search(fq, (short) 10);
+ // for (Match km : kr.getMatches()) {
+ // System.out.println(km.getStartPos() + "," + km.getEndPos()
+ // + " "
+ // + km.getSnippetBrackets());
+ // }
+
+ SpanAttributeQuery saq = new SpanAttributeQuery(new SpanTermQuery(
+ new Term("base", "r@:func=obj")), true);
+ kr = ki.search(saq, (short) 10);
+
+ // child-of with attr func-obj
+ SpanWithAttributeQuery wq = new SpanWithAttributeQuery(
+ new SpanFocusQuery(srq, classNumbers), new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base", "r@:func=obj")),
+ true), true);
+
+ kr = ki.search(wq, (short) 10);
+ assertEquals((long) 1, kr.getTotalResults());
+ assertEquals(1, kr.getMatch(0).getStartPos()); // element
+ assertEquals(4, kr.getMatch(0).getEndPos());
+ }
+
+
+ @Test
+ public void testCase10 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+ // target of a dependency relation
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("base", "<:dep")), true);
+ kr = ki.search(srq, (short) 10);
+ assertEquals((long) 6, kr.getTotalResults());
+
+ ArrayList<Byte> classNumbers = new ArrayList<Byte>();
+ classNumbers.add((byte) 1);
+ classNumbers.add((byte) 2);
+
+ SpanFocusQuery fq = new SpanFocusQuery(srq, classNumbers);
+ kr = ki.search(fq, (short) 10);
+ assertEquals((long) 6, kr.getTotalResults());
+
+ SpanAttributeQuery aq = new SpanAttributeQuery(new SpanTermQuery(
+ new Term("base", "r@:func=head")), true);
+ kr = ki.search(aq, (short) 10);
+
+ // dependency relation, which is also a head
+ SpanWithAttributeQuery wq = new SpanWithAttributeQuery(
+ new SpanFocusQuery(srq, classNumbers), new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base", "r@:func=head")),
+ true), true);
+
+ kr = ki.search(wq, (short) 20);
+
+ assertEquals((long) 2, kr.getTotalResults());
+ assertEquals(2, kr.getMatch(0).getStartPos());
+ assertEquals(4, kr.getMatch(0).getEndPos());
+ assertEquals(5, kr.getMatch(1).getStartPos());
+ assertEquals(7, kr.getMatch(1).getEndPos());
+
+ }
+
/**
* Match left return left
* Match right return right
* */
@Test
- public void testCase5 () throws IOException {
+ public void testCase6 () throws IOException {
ki.addDoc(createFieldDoc2());
ki.commit();
@@ -464,48 +474,44 @@
}
- // FOCUS has not sorted
/**
* Match left, return right
* sort by left, then sort by right
* */
- /*@Test
- public void testCase7() throws IOException {
- ki.addDoc(createFieldDoc2());
- ki.commit();
-
- // return all children that are NP
- SpanQuery rv = new SpanSegmentQuery(
- new SpanRelationQuery(
- new SpanTermQuery(new Term("base",">:child-of")), true),
- new SpanElementQuery("base","np"),
- true);
-
- //return all parents of np
- SpanQuery rv2 = new SpanFocusQuery(rv, (byte) 2);
- kr = ki.search(rv2, (short) 10);
- for (Match km : kr.getMatches()) {
- System.out.println(km.getStartPos() + "," + km.getEndPos()
- // + " "+ km.getSnippetBrackets()
- );
- }
- assertEquals((long) 4, kr.getTotalResults());
- assertEquals(0,kr.getMatch(0).getStartPos());
- assertEquals(7,kr.getMatch(0).getEndPos());
- assertEquals(1,kr.getMatch(1).getStartPos());
- assertEquals(7,kr.getMatch(1).getEndPos());
- assertEquals(2,kr.getMatch(2).getStartPos());
- assertEquals(7,kr.getMatch(2).getEndPos());
- assertEquals(4,kr.getMatch(3).getStartPos());
- assertEquals(7,kr.getMatch(3).getEndPos());
- // id problem (solved)
+ @Test
+ public void testCase7 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
- // return all parents of np that are PP
-
+ // return all children that are NP
+ SpanQuery rv = new SpanSegmentQuery(new SpanRelationQuery(
+ new SpanTermQuery(new Term("base", ">:child-of")), true),
+ new SpanElementQuery("base", "np"), true);
+
+ //return all parents of np
+ SpanFocusQuery rv2 = new SpanFocusQuery(rv, (byte) 2);
+ rv2.setSorted(false);
+ kr = ki.search(rv2, (short) 10);
+
+ assertEquals((long) 4, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(7, kr.getMatch(0).getEndPos());
+ assertEquals(1, kr.getMatch(1).getStartPos());
+ assertEquals(7, kr.getMatch(1).getEndPos());
+ assertEquals(2, kr.getMatch(2).getStartPos());
+ assertEquals(7, kr.getMatch(2).getEndPos());
+ assertEquals(4, kr.getMatch(3).getStartPos());
+ assertEquals(7, kr.getMatch(3).getEndPos());
+ // id problem (solved)
+
+ // return all parents of np that are PP
+
}
-
- /** Relations whose source/target do not embed
- * its counterparts.
+
+
+ /**
+ * Relations whose source/target do not embed
+ * its counterparts.
* */
@Test
public void testCase8 () throws IOException {
@@ -516,7 +522,7 @@
SpanQuery rv = new SpanFocusQuery(new SpanSegmentQuery(
new SpanRelationQuery(new SpanTermQuery(new Term("base",
"<:dep")), true), new SpanTermWithIdQuery(new Term(
- "base", "pos:NN"), true), true), (byte) 2);
+ "base", "pos:NN"), true), true), (byte) 1);
kr = ki.search(rv, (short) 10);
assertEquals((long) 3, kr.getTotalResults());
@@ -544,4 +550,70 @@
}
+
+ /**
+ * Relation with variable match right, return left sort by right,
+ * then sort by left
+ *
+ * @throws IOException
+ * */
+ @Test
+ public void testCase9 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ // return all children of np
+ SpanFocusQuery rv = new SpanFocusQuery(new SpanSegmentQuery(
+ new SpanRelationQuery(new SpanTermQuery(new Term("base",
+ "<:child-of")), true), new SpanElementQuery("base",
+ "np"), true), (byte) 1);
+ rv.setSorted(false);
+
+ kr = ki.search(rv, (short) 10);
+
+ assertEquals((long) 7, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(1, kr.getMatch(0).getEndPos());
+ assertEquals(2, kr.getMatch(1).getStartPos());
+ assertEquals(3, kr.getMatch(1).getEndPos());
+ assertEquals(2, kr.getMatch(2).getStartPos());
+ assertEquals(4, kr.getMatch(2).getEndPos());
+ assertEquals(3, kr.getMatch(3).getStartPos());
+ assertEquals(4, kr.getMatch(3).getEndPos());
+ assertEquals(4, kr.getMatch(4).getStartPos());
+ assertEquals(7, kr.getMatch(4).getEndPos());
+ assertEquals(5, kr.getMatch(5).getStartPos());
+ assertEquals(6, kr.getMatch(5).getEndPos());
+ assertEquals(6, kr.getMatch(6).getStartPos());
+ assertEquals(7, kr.getMatch(6).getEndPos());
+ // sorting left problem (solved)
+
+ // return all children of np that are articles
+ SpanSegmentQuery rv2 = new SpanSegmentQuery(rv, new SpanTermQuery(
+ new Term("base", "pos:ART")));
+ kr = ki.search(rv2, (short) 10);
+
+ assertEquals((long) 2, kr.getTotalResults());
+ assertEquals(2, kr.getMatch(0).getStartPos());
+ assertEquals(3, kr.getMatch(0).getEndPos());
+ assertEquals(5, kr.getMatch(1).getStartPos());
+ assertEquals(6, kr.getMatch(1).getEndPos());
+
+ // return all nps whose children are articles
+ SpanSegmentQuery rv3 = new SpanSegmentQuery(rv,
+ new SpanTermWithIdQuery(new Term("base", "pos:ART"), true));
+
+
+ SpanFocusQuery sf = new SpanFocusQuery(rv3, (byte) 1);
+ kr = ki.search(sf, (short) 10);
+
+ assertEquals((long) 2, kr.getTotalResults());
+
+ assertEquals(2, kr.getMatch(0).getStartPos());
+ assertEquals(3, kr.getMatch(0).getEndPos());
+ assertEquals(5, kr.getMatch(1).getStartPos());
+ assertEquals(6, kr.getMatch(1).getEndPos());
+
+ }
+
}
\ No newline at end of file
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestSpanRelationQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestSpanRelationQueryJSON.java
new file mode 100644
index 0000000..2512d1d
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/query/TestSpanRelationQueryJSON.java
@@ -0,0 +1,113 @@
+package de.ids_mannheim.korap.query;
+
+import static de.ids_mannheim.korap.TestSimple.getJSONQuery;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.lucene.search.spans.SpanQuery;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
+
+public class TestSpanRelationQueryJSON {
+
+ @Test
+ public void testMatchRelationSource () throws QueryException {
+ //
+ String filepath = getClass().getResource(
+ "/queries/relation/match-source.json").getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(
+ "focus([1,2]spanSegment(tokens:>:mate/d:HEAD, <tokens:c:s />))",
+ sq.toString());
+ }
+
+
+ @Test
+ public void testMatchRelationTarget () throws QueryException {
+ //
+ String filepath = getClass().getResource(
+ "/queries/relation/match-target.json").getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(
+ "focus([1,2]spanSegment(tokens:<:mate/d:HEAD, <tokens:c:vp />))",
+ sq.toString());
+ }
+
+
+ @Test
+ public void testMatchRelationSourceAndTarget () throws QueryException {
+ //
+ String filepath = getClass().getResource(
+ "/queries/relation/match-source-and-target.json").getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(
+ "focus([1,2]spanSegment(focus(2: spanSegment(tokens:>:mate/d:HEAD, <tokens:c:s />)), <tokens:c:vp />))",
+ sq.toString());
+ }
+
+
+ @Test
+ public void testMatchOperandWithProperty () throws QueryException {
+ //
+ String filepath = getClass().getResource(
+ "/queries/relation/operand-with-property.json").getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(
+ "focus([1,2]spanSegment(focus(2: spanSegment(tokens:>:mate/d:HEAD, "
+ + "spanElementWithAttribute(<tokens:c:s />, spanAttribute(tokens:@root)))), <tokens:c:vp />))",
+ sq.toString());
+ }
+
+
+ @Test
+ public void testMatchOperandWithAttribute () throws QueryException {
+ //
+ String filepath = getClass().getResource(
+ "/queries/relation/operand-with-attribute.json").getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(
+ "focus([1,2]spanSegment(focus(2: spanSegment(tokens:>:mate/d:HEAD, "
+ + "spanElementWithAttribute(<tokens:c:s />, spanAttribute(tokens:type:top)))), <tokens:c:vp />))",
+ sq.toString());
+ }
+
+
+ @Test
+ public void testMatchRelationOnly () throws QueryException {
+ //
+ String filepath = getClass().getResource(
+ "/queries/relation/relation-only.json").getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals("focus([1,2]tokens:<:mate/d:HEAD)", sq.toString());
+ }
+
+ @Test
+ public void testFocusSource () throws QueryException {
+ //
+ String filepath = getClass().getResource(
+ "/queries/relation/focus-source.json").getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(
+ "focus(1: spanSegment(tokens:<:mate/d:HEAD, <tokens:np />))",
+ sq.toString());
+ }
+
+ @Test
+ public void testFocusTarget () throws QueryException {
+ String filepath = getClass().getResource(
+ "/queries/relation/focus-target.json").getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(
+ "focus(2: spanSegment(tokens:>:mate/d:HEAD, <tokens:s />))",
+ sq.toString());
+ }
+}
diff --git a/src/test/java/de/ids_mannheim/korap/server/TestResource.java b/src/test/java/de/ids_mannheim/korap/server/TestResource.java
index b69fb31..ce44a19 100644
--- a/src/test/java/de/ids_mannheim/korap/server/TestResource.java
+++ b/src/test/java/de/ids_mannheim/korap/server/TestResource.java
@@ -10,13 +10,11 @@
import javax.ws.rs.client.Entity;
import org.glassfish.grizzly.http.server.HttpServer;
-import com.fasterxml.jackson.jaxrs.annotation.JacksonFeatures;
import static org.junit.Assert.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-import org.junit.Ignore;
import java.io.FileInputStream;
@@ -25,6 +23,9 @@
import de.ids_mannheim.korap.response.Response;
import static de.ids_mannheim.korap.util.KrillString.*;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
/**
* @author diewald
@@ -34,6 +35,8 @@
private HttpServer server;
private WebTarget target;
+ ObjectMapper mapper = new ObjectMapper();
+
@Before
public void setUp () throws Exception {
@@ -49,7 +52,7 @@
// c.configuration().enable(com.sun.jersey.api.json.POJOMappingFeature());
// c.configuration().enable(new org.glassfish.jersey.media.json.JsonJaxbFeature());
- // c.register(JacksonFeature.class);
+ // c.register(JacksonFeatures.class);
// c.register(com.fasterxml.jackson.jaxrs.annotation.JacksonFeatures.class);
/*
@@ -64,6 +67,7 @@
@After
public void tearDown () throws Exception {
server.stop();
+ Node.closeDBPool();
};
@@ -78,9 +82,19 @@
};
- @Ignore
+ @Test
+ public void testInfo () throws IOException {
+ String responseMsg = target.path("/").request().get(String.class);
+ JsonNode res = mapper.readTree(responseMsg);
+ assertEquals("milena", res.at("/node").asText());
+ assertEquals(680, res.at("/messages/0/0").asInt());
+ };
+
+
+ @Test
public void testResource () throws IOException {
- Response kresp;
+ String resp;
+ JsonNode res;
for (String i : new String[] { "00001", "00002", "00003", "00004",
"00005", "00006", "02439" }) {
@@ -91,13 +105,13 @@
Entity jsonE = Entity.json(json);
try {
- kresp = target.path("/index/" + i).request("application/json")
- .put(jsonE, Response.class);
+ // Put new documents to the index
+ resp = target.path("/index/" + i).request("application/json")
+ .put(jsonE, String.class);
- assertEquals(kresp.getNode(), "milena");
- assertFalse(kresp.hasErrors());
- assertFalse(kresp.hasWarnings());
- assertFalse(kresp.hasMessages());
+ res = mapper.readTree(resp);
+ assertEquals("milena", res.at("/node").asText());
+ assertEquals(681, res.at("/messages/0/0").asInt());
}
catch (Exception e) {
fail("Server response failed " + e.getMessage()
@@ -105,30 +119,26 @@
}
};
- kresp = target.path("/index").request("application/json")
- .post(Entity.text(""), Response.class);
- assertEquals(kresp.getNode(), "milena");
- assertFalse(kresp.hasErrors());
- assertFalse(kresp.hasWarnings());
- assertFalse(kresp.hasMessages());
+ resp = target.path("/index").request("application/json")
+ .post(Entity.text(""), String.class);
+ res = mapper.readTree(resp);
+ assertEquals("milena", res.at("/node").asText());
+ assertEquals(683, res.at("/messages/0/0").asInt());
};
- @Ignore
+ @Test
public void testCollection () throws IOException {
String json = getString(getClass().getResource(
"/queries/bsp-uid-example.jsonld").getFile());
try {
- Response kresp = target.path("/").queryParam("uid", "1")
+ String resp = target.path("/").queryParam("uid", "1")
.queryParam("uid", "4").request("application/json")
- .post(Entity.json(json), Response.class);
-
- assertEquals(2, kresp.getTotalResults());
- assertFalse(kresp.hasErrors());
- assertFalse(kresp.hasWarnings());
- assertFalse(kresp.hasMessages());
+ .post(Entity.json(json), String.class);
+ JsonNode res = mapper.readTree(resp);
+ assertEquals(2, res.at("/totalResults").asInt());
}
catch (Exception e) {
fail("Server response failed: " + e.getMessage() + " (Known issue)");
diff --git a/src/test/resources/queries/relation/focus-source.json b/src/test/resources/queries/relation/focus-source.json
new file mode 100644
index 0000000..d934a3b
--- /dev/null
+++ b/src/test/resources/queries/relation/focus-source.json
@@ -0,0 +1,30 @@
+{
+ "query": {
+ "@type": "koral:reference",
+ "operation": "operation:focus",
+ "classRef": [1],
+ "operands": [{
+ "@type": "koral:group",
+ "operation": "operation:relation",
+ "operands": [
+ {
+ "@type": "koral:token"
+ },
+ {
+ "@type": "koral:span",
+ "key": "np"
+ }
+ ],
+ "relation": {
+ "@type": "koral:relation",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer": "d",
+ "key": "HEAD"
+ }
+ }
+ }]
+ },
+ "meta": {}
+}
diff --git a/src/test/resources/queries/relation/focus-target.json b/src/test/resources/queries/relation/focus-target.json
new file mode 100644
index 0000000..daec972
--- /dev/null
+++ b/src/test/resources/queries/relation/focus-target.json
@@ -0,0 +1,30 @@
+{
+ "query": {
+ "@type": "koral:reference",
+ "operation": "operation:focus",
+ "classRef": [2],
+ "operands": [{
+ "@type": "koral:group",
+ "operation": "operation:relation",
+ "operands": [
+ {
+ "@type": "koral:span",
+ "key": "s"
+ },
+ {
+ "@type": "koral:token"
+ }
+ ],
+ "relation": {
+ "@type": "koral:relation",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer": "d",
+ "key": "HEAD"
+ }
+ }
+ }]
+ },
+ "meta": {}
+}
diff --git a/src/test/resources/queries/relation/match-source-and-target.json b/src/test/resources/queries/relation/match-source-and-target.json
new file mode 100644
index 0000000..860528f
--- /dev/null
+++ b/src/test/resources/queries/relation/match-source-and-target.json
@@ -0,0 +1,28 @@
+{
+ "query": {
+ "@type": "koral:group",
+ "operation": "operation:relation",
+ "operands": [
+ {
+ "@type": "koral:span",
+ "layer": "c",
+ "key": "s"
+ },
+ {
+ "@type": "koral:span",
+ "layer": "c",
+ "key": "vp"
+ }
+ ],
+ "relation": {
+ "@type": "koral:relation",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer": "d",
+ "key": "HEAD"
+ }
+ }
+ },
+ "meta": {}
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/relation/match-source.json b/src/test/resources/queries/relation/match-source.json
new file mode 100644
index 0000000..3ca5686
--- /dev/null
+++ b/src/test/resources/queries/relation/match-source.json
@@ -0,0 +1,26 @@
+{
+ "query": {
+ "@type": "koral:group",
+ "operation": "operation:relation",
+ "operands": [
+ {
+ "@type": "koral:span",
+ "layer": "c",
+ "key": "s"
+ },
+ {
+ "@type": "koral:token"
+ }
+ ],
+ "relation": {
+ "@type": "koral:relation",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer": "d",
+ "key": "HEAD"
+ }
+ }
+ },
+ "meta": {}
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/relation/match-target.json b/src/test/resources/queries/relation/match-target.json
new file mode 100644
index 0000000..7fe4ef0
--- /dev/null
+++ b/src/test/resources/queries/relation/match-target.json
@@ -0,0 +1,24 @@
+{
+ "query": {
+ "@type": "koral:group",
+ "operation": "operation:relation",
+ "operands": [
+ {"@type": "koral:token"},
+ {
+ "@type": "koral:span",
+ "layer": "c",
+ "key": "vp"
+ }
+ ],
+ "relation": {
+ "@type": "koral:relation",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer": "d",
+ "key": "HEAD"
+ }
+ }
+ },
+ "meta": {}
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/relation/operand-with-attribute.json b/src/test/resources/queries/relation/operand-with-attribute.json
new file mode 100644
index 0000000..570031c
--- /dev/null
+++ b/src/test/resources/queries/relation/operand-with-attribute.json
@@ -0,0 +1,35 @@
+{
+ "query": {
+ "@type": "koral:group",
+ "operation": "operation:relation",
+ "operands": [
+ {
+ "@type": "koral:span",
+ "layer": "c",
+ "key": "s",
+ "match": "match:eq",
+ "attr": {
+ "@type": "koral:term",
+ "layer": "type",
+ "key": "top",
+ "match": "match:eq"
+ }
+ },
+ {
+ "@type": "koral:span",
+ "layer": "c",
+ "key": "vp"
+ }
+ ],
+ "relation": {
+ "@type": "koral:relation",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer": "d",
+ "key": "HEAD"
+ }
+ }
+ },
+ "meta": {}
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/relation/operand-with-property.json b/src/test/resources/queries/relation/operand-with-property.json
new file mode 100644
index 0000000..407c900
--- /dev/null
+++ b/src/test/resources/queries/relation/operand-with-property.json
@@ -0,0 +1,33 @@
+{
+ "query": {
+ "@type": "koral:group",
+ "operation": "operation:relation",
+ "operands": [
+ {
+ "@type": "koral:span",
+ "layer": "c",
+ "key": "s",
+ "match": "match:eq",
+ "attr": {
+ "@type": "koral:term",
+ "root": true
+ }
+ },
+ {
+ "@type": "koral:span",
+ "layer": "c",
+ "key": "vp"
+ }
+ ],
+ "relation": {
+ "@type": "koral:relation",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer": "d",
+ "key": "HEAD"
+ }
+ }
+ },
+ "meta": {}
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/relation/relation-only.json b/src/test/resources/queries/relation/relation-only.json
new file mode 100644
index 0000000..8801184
--- /dev/null
+++ b/src/test/resources/queries/relation/relation-only.json
@@ -0,0 +1,20 @@
+{
+ "query": {
+ "@type": "koral:group",
+ "operation": "operation:relation",
+ "operands": [
+ {"@type": "koral:token"},
+ {"@type": "koral:token"}
+ ],
+ "relation": {
+ "@type": "koral:relation",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer": "d",
+ "key": "HEAD"
+ }
+ }
+ },
+ "meta": {}
+}
\ No newline at end of file