Merge "Removed some copies." into focus
diff --git a/Readme.md b/Readme.md
index 11f9a6e..0a9feba 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,4 +1,4 @@
-![Krill](http://nils-diewald.de/temp/krill-logo.png)
+![Krill](http://nils-diewald.de/temp/krill.png)
A Corpusdata Retrieval Index using Lucene for Look-Ups
@@ -7,7 +7,7 @@
Krill is a [Lucene](https://lucene.apache.org/) based search
engine for large annotated corpora,
-used as a backend component of the [KorAP Corpus Analysis](http://korap.ids-mannheim.de/) at the [IDS Mannheim](http://ids-mannheim.de/).
+used as a backend component of the [KorAP Corpus Analysis Platform](http://korap.ids-mannheim.de/) at the [IDS Mannheim](http://ids-mannheim.de/).
**! This software is in its early stages and not stable yet! Use it on your own risk!**
@@ -136,6 +136,8 @@
Annotation tools and models used in preparation of the test corpora are based on the following work:
+Belica, Cyril (1994): *A German Lemmatizer*. MECOLB Final Report MLAP93-21/WP2. Luxemburg.
+
Bohnet, Bernd (2010): *Top accuracy and fast dependency parsing is not a contradiction*. In *Proceedings of COLING*, pp 89–97, Beijing, China.
Bohnet, Bernd, Joakim Nivre, Igor Boguslavsky, Richard Farkas, Filip Ginter, and Jan Hajic (2013): *Joint Morphological and Syntactic Analysis for Richly Inflected Languages*. Transactions of the Association for Computational Linguistics, 1, pp. 415-428.
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
index 2bef713..a4194b1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
@@ -43,7 +43,7 @@
*
* @author margaretha
* */
-public class SpanAttributeQuery extends SpanWithIdQuery {
+public class SpanAttributeQuery extends SimpleSpanQuery {
boolean negation;
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java
index aa9fc7a..8de7faf 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java
@@ -1,60 +1,35 @@
package de.ids_mannheim.korap.query;
import java.io.IOException;
-
-import java.util.Set;
import java.util.Map;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.Query;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import de.ids_mannheim.korap.query.spans.ClassSpans;
-
/**
* Marks spans with a special class payload.
*/
-public class SpanClassQuery extends SpanQuery {
- public String field;
- protected byte number;
- protected SpanQuery operand;
-
-
- public SpanClassQuery (SpanQuery operand, byte number) {
- this.field = operand.getField();
- this.operand = operand;
- this.number = number;
- };
+public class SpanClassQuery extends SimpleSpanQuery {
+ protected byte number = 1;
public SpanClassQuery (SpanQuery operand) {
- this.field = operand.getField();
- this.operand = operand;
- this.number = (byte) 1;
+ super(operand, false);
};
- public byte number () {
- return this.number;
- };
-
-
- @Override
- public String getField () {
- return field;
- }
-
-
- @Override
- public void extractTerms (Set<Term> terms) {
- this.operand.extractTerms(terms);
+ public SpanClassQuery (SpanQuery operand, byte number) {
+ super(operand, false);
+ this.number = number;
};
@@ -63,7 +38,7 @@
StringBuffer buffer = new StringBuffer("{");
short classNr = (short) this.number;
buffer.append(classNr & 0xFF).append(": ");
- buffer.append(this.operand.toString()).append('}');
+ buffer.append(this.firstClause.toString()).append('}');
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
};
@@ -72,7 +47,7 @@
@Override
public Spans getSpans (final AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
- return (Spans) new ClassSpans(this.operand, context, acceptDocs,
+ return (Spans) new ClassSpans(this.firstClause, context, acceptDocs,
termContexts, number);
};
@@ -80,12 +55,12 @@
@Override
public Query rewrite (IndexReader reader) throws IOException {
SpanClassQuery clone = null;
- SpanQuery query = (SpanQuery) this.operand.rewrite(reader);
+ SpanQuery query = (SpanQuery) this.firstClause.rewrite(reader);
- if (query != this.operand) {
+ if (query != this.firstClause) {
if (clone == null)
clone = this.clone();
- clone.operand = query;
+ clone.firstClause = query;
};
if (clone != null)
@@ -98,7 +73,7 @@
@Override
public SpanClassQuery clone () {
SpanClassQuery spanClassQuery = new SpanClassQuery(
- (SpanQuery) this.operand.clone(), this.number);
+ (SpanQuery) this.firstClause.clone(), this.number);
spanClassQuery.setBoost(getBoost());
return spanClassQuery;
};
@@ -114,7 +89,7 @@
final SpanClassQuery spanClassQuery = (SpanClassQuery) o;
- if (!this.operand.equals(spanClassQuery.operand))
+ if (!this.firstClause.equals(spanClassQuery.firstClause))
return false;
if (this.number != spanClassQuery.number)
@@ -128,10 +103,20 @@
@Override
public int hashCode () {
int result = 1;
- result = operand.hashCode();
+ result = firstClause.hashCode();
result += (int) number;
result ^= (result << 15) | (result >>> 18);
result += Float.floatToRawIntBits(getBoost());
return result;
+ }
+
+
+ public byte getNumber () {
+ return number;
+ }
+
+
+ public void setNumber (byte number) {
+ this.number = number;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
index 9aa684a..6c2981b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
@@ -47,7 +47,7 @@
* @author diewald
* @author margaretha
*/
-public class SpanElementQuery extends SpanWithIdQuery {
+public class SpanElementQuery extends SimpleSpanQuery {
private static Term elementTerm;
private String elementStr;
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
index 30cc536..305e42c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
@@ -1,65 +1,81 @@
package de.ids_mannheim.korap.query;
import java.io.IOException;
-
-import java.util.Set;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.Query;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import de.ids_mannheim.korap.query.spans.FocusSpans;
-import de.ids_mannheim.korap.query.SpanClassQuery;
/**
* Modify the span of a match to the boundaries of a certain class.
*
* In case multiple classes are found with the very same number, the
- * span
- * is maximized to start on the first occurrence from the left and end
- * on
- * the last occurrence on the right.
+ * span is
+ * maximized to start on the first occurrence from the left and end on
+ * the last
+ * occurrence on the right.
*
- * In case the class to modify on is not found in the subquery,
- * the match is ignored.
+ * In case the class to modify on is not found in the subquery, the
+ * match is
+ * ignored.
*
* @author diewald
*
* @see FocusSpans
*/
-public class SpanFocusQuery extends SpanClassQuery {
+public class SpanFocusQuery extends SimpleSpanQuery {
+
+ private List<Byte> classNumbers = new ArrayList<Byte>();
+ private boolean isSorted = true;
+
/**
* Construct a new SpanFocusQuery.
*
- * @param operand
+ * @param firstClause
* The nested {@link SpanQuery}, that contains one or
- * more classed spans.
+ * more
+ * classed spans.
* @param number
* The class number to focus on.
*/
- public SpanFocusQuery (SpanQuery operand, byte number) {
- super(operand, number);
+ public SpanFocusQuery (SpanQuery sq, byte classNumber) {
+ super(sq, true);
+ classNumbers.add(classNumber);
+ };
+
+
+ public SpanFocusQuery (SpanQuery sq, List<Byte> classNumbers) {
+ super(sq, true);
+ this.classNumbers = classNumbers;
+ isSorted = false;
};
/**
- * Construct a new SpanFocusQuery.
- * The class to focus on defaults to <tt>1</tt>.
+ * Construct a new SpanFocusQuery. The class to focus on defaults
+ * to
+ * <tt>1</tt>.
*
- * @param operand
+ * @param firstClause
* The nested {@link SpanQuery}, that contains one or
- * more classed spans.
+ * more
+ * classed spans.
*/
- public SpanFocusQuery (SpanQuery operand) {
- this(operand, (byte) 1);
+ public SpanFocusQuery (SpanQuery sq) {
+ super(sq, true);
+ classNumbers.add((byte) 1);
};
@@ -67,9 +83,20 @@
public String toString (String field) {
StringBuffer buffer = new StringBuffer();
buffer.append("focus(");
- short classNr = (short) this.number;
- buffer.append(classNr & 0xFF).append(": ");
- buffer.append(this.operand.toString());
+ if (classNumbers.size() > 1) {
+ buffer.append("[");
+ for (int i = 0; i < classNumbers.size(); i++) {
+ buffer.append((short) classNumbers.get(i) & 0xFF);
+ if (i != classNumbers.size() - 1) {
+ buffer.append(",");
+ }
+ }
+ buffer.append("]");
+ }
+ else {
+ buffer.append((short) classNumbers.get(0) & 0xFF).append(": ");
+ }
+ buffer.append(this.firstClause.toString());
buffer.append(')');
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
@@ -79,20 +106,19 @@
@Override
public Spans getSpans (final AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
- return (Spans) new FocusSpans(this.operand, context, acceptDocs,
- termContexts, number);
+ return new FocusSpans(this, context, acceptDocs, termContexts);
};
@Override
public Query rewrite (IndexReader reader) throws IOException {
SpanFocusQuery clone = null;
- SpanQuery query = (SpanQuery) this.operand.rewrite(reader);
+ SpanQuery query = (SpanQuery) this.firstClause.rewrite(reader);
- if (query != this.operand) {
+ if (query != this.firstClause) {
if (clone == null)
clone = this.clone();
- clone.operand = query;
+ clone.firstClause = query;
};
if (clone != null)
@@ -105,7 +131,7 @@
@Override
public SpanFocusQuery clone () {
SpanFocusQuery spanFocusQuery = new SpanFocusQuery(
- (SpanQuery) this.operand.clone(), this.number);
+ (SpanQuery) this.firstClause.clone(), this.getClassNumbers());
spanFocusQuery.setBoost(getBoost());
return spanFocusQuery;
};
@@ -120,9 +146,9 @@
final SpanFocusQuery spanFocusQuery = (SpanFocusQuery) o;
- if (!this.operand.equals(spanFocusQuery.operand))
+ if (!this.firstClause.equals(spanFocusQuery.firstClause))
return false;
- if (this.number != spanFocusQuery.number)
+ if (this.getClassNumbers() != spanFocusQuery.getClassNumbers())
return false;
// Probably not necessary
@@ -132,9 +158,31 @@
@Override
public int hashCode () {
- int result = operand.hashCode();
- result = 31 * result + number;
+ int result = firstClause.hashCode();
+ for (byte number : classNumbers)
+ result = 31 * result + number;
result += Float.floatToRawIntBits(getBoost());
return result;
- };
+ }
+
+
+ public List<Byte> getClassNumbers () {
+ return classNumbers;
+ }
+
+
+ public void setClassNumbers (List<Byte> classNumbers) {
+ this.classNumbers = classNumbers;
+ }
+
+
+ public boolean isSorted () {
+ return isSorted;
+ }
+
+
+ public void setSorted (boolean isSorted) {
+ this.isSorted = isSorted;
+ }
+
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
index f19a1dc..cf1c154 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
@@ -51,7 +51,7 @@
*
* @author margaretha
* */
-public class SpanRelationQuery extends SpanWithIdQuery {
+public class SpanRelationQuery extends SimpleSpanQuery {
private String type;
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
index 512adc0..bb69d4c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
@@ -67,7 +67,7 @@
public SpanSegmentQuery (SpanRelationQuery firstClause,
- SpanWithIdQuery secondClause,
+ SimpleSpanQuery secondClause,
boolean collectPayloads) {
super(firstClause, secondClause, true);
isRelation = true;
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
index 84e30b2..87de370 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
@@ -10,7 +10,6 @@
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
-import de.ids_mannheim.korap.query.spans.SpansWithId;
import de.ids_mannheim.korap.query.spans.TermSpansWithId;
/**
@@ -32,7 +31,7 @@
*
* @author margaretha
* */
-public class SpanTermWithIdQuery extends SpanWithIdQuery {
+public class SpanTermWithIdQuery extends SimpleSpanQuery {
/**
* Constructs a SpanTermWithIdQuery for the given term.
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanWithAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanWithAttributeQuery.java
index 12d6876..2ea519a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanWithAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanWithAttributeQuery.java
@@ -12,10 +12,8 @@
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
-import de.ids_mannheim.korap.query.spans.ElementSpans;
-import de.ids_mannheim.korap.query.spans.RelationSpans;
+import de.ids_mannheim.korap.query.spans.SimpleSpans;
import de.ids_mannheim.korap.query.spans.SpansWithAttribute;
-import de.ids_mannheim.korap.query.spans.TermSpansWithId;
/**
* Enumeration of spans (e.g. element or relation spans) having some
@@ -40,7 +38,7 @@
*
* @author margaretha
*/
-public class SpanWithAttributeQuery extends SpanWithIdQuery {
+public class SpanWithAttributeQuery extends SimpleSpanQuery {
public boolean isMultipleAttributes;
private String type;
@@ -91,7 +89,7 @@
* <code>true</code> if payloads are to be collected,
* otherwise <code>false</code>.
*/
- public SpanWithAttributeQuery (SpanWithIdQuery firstClause,
+ public SpanWithAttributeQuery (SimpleSpanQuery firstClause,
SpanAttributeQuery secondClause,
boolean collectPayloads) {
super(firstClause, secondClause, collectPayloads);
@@ -109,7 +107,7 @@
* <code>true</code> if payloads are to be collected,
* otherwise <code>false</code>.
*/
- public SpanWithAttributeQuery (SpanWithIdQuery firstClause,
+ public SpanWithAttributeQuery (SimpleSpanQuery firstClause,
List<SpanQuery> secondClauses,
boolean collectPayloads) {
super(firstClause, secondClauses, collectPayloads);
@@ -137,7 +135,7 @@
if (SpanElementQuery.class.isInstance(firstClause)) {
type = "spanElementWithAttribute";
}
- else if (SpanRelationQuery.class.isInstance(firstClause)) {
+ else if (SpanFocusQuery.class.isInstance(firstClause)) {
type = "spanRelationWithAttribute";
}
else if (SpanTermWithIdQuery.class.isInstance(firstClause)) {
@@ -147,16 +145,16 @@
@Override
- public SimpleSpanQuery clone () {
+ public SpanWithAttributeQuery clone () {
if (secondClause != null) {
if (isMultipleAttributes) {
return new SpanWithAttributeQuery(
- (SpanWithIdQuery) firstClause.clone(),
+ (SimpleSpanQuery) firstClause.clone(),
cloneClauseList(), collectPayloads);
}
else {
return new SpanWithAttributeQuery(
- (SpanWithIdQuery) firstClause.clone(),
+ (SimpleSpanQuery) firstClause.clone(),
(SpanAttributeQuery) secondClause.clone(),
collectPayloads);
}
@@ -195,25 +193,10 @@
termContexts);
}
- Spans spans = this.getFirstClause().getSpans(context, acceptDocs,
+ SimpleSpans spans = (SimpleSpans) this.getFirstClause().getSpans(
+ context, acceptDocs, termContexts);
+ return new SpansWithAttribute(this, spans, context, acceptDocs,
termContexts);
-
- if (type.equals("spanElementWithAttribute")) {
- return new SpansWithAttribute(this, (ElementSpans) spans, context,
- acceptDocs, termContexts);
- }
- else if (type.equals("spanRelationWithAttribute")) {
- return new SpansWithAttribute(this, (RelationSpans) spans, context,
- acceptDocs, termContexts);
- }
- else if (type.equals("spanTermWithAttribute")) {
- return new SpansWithAttribute(this, (TermSpansWithId) spans,
- context, acceptDocs, termContexts);
- }
- else {
- throw new IllegalArgumentException("Span query type: " + type
- + "is unknown.");
- }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanWithIdQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanWithIdQuery.java
deleted file mode 100644
index 3545808..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/SpanWithIdQuery.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package de.ids_mannheim.korap.query;
-
-import java.util.List;
-
-import org.apache.lucene.search.spans.SpanQuery;
-
-/**
- * Base query for span queries whose resulting spans requires an id,
- * for
- * instance {@link SpanElementQuery} and {@link SpanRelationQuery}.
- *
- * @author margaretha
- *
- */
-public abstract class SpanWithIdQuery extends SimpleSpanQuery {
-
- /**
- * Constructs SpanWithIdQuery based on the given {@link SpanQuery}
- * and the
- * collectPayloads flag, for example, {@link SpanElementQuery}.
- *
- * @param firstClause
- * a SpanQuery
- * @param collectPayloads
- * a boolean flag representing the value
- * <code>true</code> if payloads are to be collected,
- * otherwise
- * <code>false</code>.
- */
- public SpanWithIdQuery (SpanQuery firstClause, boolean collectPayloads) {
- super(firstClause, collectPayloads);
- }
-
-
- /**
- * Constructs SpanWithIdQuery based on two span queries and the
- * collectPayloads flag, for instance, query a relation having a
- * specific
- * attribute.
- *
- * @param firstClause
- * a SpanQuery
- * @param secondClause
- * a SpanQuery
- * @param collectPayloads
- * a boolean flag representing the value
- * <code>true</code> if payloads are to be collected,
- * otherwise
- * <code>false</code>.
- */
- public SpanWithIdQuery (SpanQuery firstClause, SpanQuery secondClause,
- boolean collectPayloads) {
- super(firstClause, secondClause, collectPayloads);
- }
-
-
- /**
- * Constructs SpanWithIdQuery based on a span query and a list of
- * span
- * queries, for instance, query an element having two specific
- * attributes.
- *
- * @param firstClause
- * a SpanQuery
- * @param secondClauses
- * a list of SpanQuery
- * @param collectPayloads
- * a boolean flag representing the value
- * <code>true</code> if payloads are to be collected,
- * otherwise
- * <code>false</code>.
- */
- public SpanWithIdQuery (SpanQuery firstClause,
- List<SpanQuery> secondClauses,
- boolean collectPayloads) {
- super(firstClause, secondClauses, collectPayloads);
- }
-
-
- public SpanWithIdQuery (List<SpanQuery> clauses, boolean collectPayloads) {
- super(clauses, collectPayloads);
- }
-}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index 2816c96..cf70cb9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -44,7 +44,7 @@
*
* @author margaretha
* */
-public class AttributeSpans extends SpansWithId {
+public class AttributeSpans extends SimpleSpans {
private List<CandidateAttributeSpan> candidateList;
private int currentDoc, currentPosition;
@@ -69,6 +69,8 @@
Map<Term, TermContext> termContexts)
throws IOException {
super(spanAttributeQuery, context, acceptDocs, termContexts);
+ this.hasSpanId = true;
+
candidateList = new ArrayList<>();
hasMoreSpans = firstSpans.next();
if (hasMoreSpans) {
@@ -163,6 +165,7 @@
return new CandidateAttributeSpan(firstSpans, spanId, end);
}
else if (payload.get(0).length == 10) {
+ start = wrapper.getInt(0);
end = wrapper.getInt(4);
spanId = wrapper.getShort(8);
return new CandidateAttributeSpan(firstSpans, spanId, start, end);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
index c78eee4..8198195 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
@@ -19,7 +19,7 @@
public class CandidateSpan implements Comparable<CandidateSpan>, Cloneable {
protected int doc, start, end;
private long cost;
- private Collection<byte[]> payloads = new ArrayList<>();
+ private Collection<byte[]> payloads;
private int position;
private CandidateSpan childSpan; // used for example for multiple distance
// with unordered constraint
@@ -41,8 +41,18 @@
this.start = span.start();
this.end = span.end();
this.cost = span.cost();
- if (span.isPayloadAvailable())
+
+ this.payloads = new ArrayList<>();
+ if (span.isPayloadAvailable()) {
setPayloads(span.getPayload());
+ }
+ if (span instanceof SimpleSpans) {
+ SimpleSpans temp = (SimpleSpans) span;
+ this.spanId = temp.getSpanId();
+ }
+ else if (span instanceof ClassSpans) {
+ this.spanId = ((ClassSpans) span).getNumber();
+ }
}
@@ -180,6 +190,7 @@
*/
public void setPayloads (Collection<byte[]> payloads) {
+ this.payloads = new ArrayList<>();
for (byte[] b : payloads) {
if (b == null)
this.payloads.add(null);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index 35fe53f..4e97dbc 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
@@ -10,6 +10,7 @@
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
@@ -84,6 +85,16 @@
};
+ public byte getNumber () {
+ return number;
+ }
+
+
+ public void setNumber (byte number) {
+ this.number = number;
+ }
+
+
@Override
public int doc () {
return spans.doc();
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 21212c7..d2ceffe 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -11,10 +11,9 @@
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
-
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -28,7 +27,7 @@
* @author margaretha
* @author diewald
*/
-public class ElementSpans extends SpansWithId {
+public class ElementSpans extends SimpleSpans {
private TermSpans termSpans;
private boolean lazyLoaded = false;
@@ -58,6 +57,7 @@
super(spanElementQuery, context, acceptDocs, termContexts);
termSpans = (TermSpans) this.firstSpans;
hasMoreSpans = true;
+ // hasSpanId = true;
};
@@ -120,7 +120,13 @@
this.matchEndPosition = bb.getInt(8);
// Copy element id
- this.setSpanId(this.hasSpanId ? bb.getShort(12) : (short) -1);
+ if (length >= 14) {
+ this.setSpanId(bb.getShort(12));
+ this.hasSpanId = true;
+ }
+ else {
+ this.setSpanId((short) -1);
+ }
// Copy the start and end character offsets
byte[] b = new byte[8];
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
index eabb8c0..2da1558 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
@@ -1,66 +1,69 @@
package de.ids_mannheim.korap.query.spans;
-import static de.ids_mannheim.korap.util.KrillByte.*;
+import static de.ids_mannheim.korap.util.KrillByte.byte2int;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.search.spans.SpanQuery;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.Bits;
-
-import java.io.IOException;
-
-import java.util.Map;
-import java.util.ArrayList;
-import java.util.*;
-
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import de.ids_mannheim.korap.query.SpanFocusQuery;
+
/**
- * Spans, that can focus on the span boundaries of classed subqueries.
+ * originalSpans, that can focus on the span boundaries of classed
+ * subqueries.
* The boundaries of the classed subquery may exceed the boundaries of
* the
* nested query.
*
* In case multiple classes are found with the very same number, the
- * span
- * is maximized to start on the first occurrence from the left and end
- * on
- * the last occurrence on the right.
+ * span is
+ * maximized to start on the first occurrence from the left and end on
+ * the last
+ * occurrence on the right.
*
- * In case the class to focus on is not found in the payloads,
- * the match is ignored.
+ * In case the class to focus on is not found in the payloads, the
+ * match is
+ * ignored.
*
* <strong>Warning</strong>: Payloads other than class payloads won't
- * bubble up currently. That behaviour may change in the future
+ * bubble up
+ * currently. That behaviour may change in the futures
*
* @author diewald
*/
-public class FocusSpans extends Spans {
- private List<byte[]> wrappedPayload;
- private Collection<byte[]> payload;
- private final Spans spans;
- private byte number;
-
- private SpanQuery wrapQuery;
+public class FocusSpans extends SimpleSpans {
+ private List<Byte> classNumbers;
+ private SpanQuery query;
private final Logger log = LoggerFactory.getLogger(FocusSpans.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
- private int start = -1, end;
- private int tempStart = 0, tempEnd = 0;
+ // private SimpleSpans originalSpans;
+ private boolean isSorted;
+ private List<CandidateSpan> candidateSpans;
+ private int windowSize = 10;
+ private int currentDoc;
+ private byte number;
/**
* Construct a FocusSpan for the given {@link SpanQuery}.
*
- * @param wrapQuery
+ * @param query
* A {@link SpanQuery}.
* @param context
* The {@link AtomicReaderContext}.
@@ -73,139 +76,141 @@
* The class number to focus on.
* @throws IOException
*/
- public FocusSpans (SpanQuery wrapQuery, AtomicReaderContext context,
- Bits acceptDocs, Map<Term, TermContext> termContexts,
- byte number) throws IOException {
- this.spans = wrapQuery.getSpans(context, acceptDocs, termContexts);
- this.number = number;
- this.wrapQuery = wrapQuery;
- this.wrappedPayload = new ArrayList<byte[]>(6);
- };
+ public FocusSpans (SpanFocusQuery query, AtomicReaderContext context,
+ Bits acceptDocs, Map<Term, TermContext> termContexts)
+ throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ if (query.getClassNumbers() == null) {
+ throw new IllegalArgumentException(
+ "At least one class number must be specified.");
+ }
+ classNumbers = query.getClassNumbers();
+ isSorted = query.isSorted();
+ candidateSpans = new ArrayList<CandidateSpan>();
+ hasMoreSpans = firstSpans.next();
+ currentDoc = firstSpans.doc();
-
- @Override
- public Collection<byte[]> getPayload () throws IOException {
- return wrappedPayload;
- };
-
-
- @Override
- public boolean isPayloadAvailable () {
- return wrappedPayload.isEmpty() == false;
- };
-
-
- @Override
- public int doc () {
- return spans.doc();
- };
-
-
- @Override
- public int start () {
- return start;
- };
-
-
- @Override
- public int end () {
- return end;
- };
+ // matchPayload = new ArrayList<byte[]>(6);
+ this.query = query;
+ hasSpanId = true;
+ }
@Override
public boolean next () throws IOException {
- if (DEBUG)
- log.trace("Forward next match in {}", this.doc());
+ matchPayload.clear();
+ CandidateSpan cs;
+ while (hasMoreSpans || candidateSpans.size() > 0) {
+ if (isSorted) {
- // Next span
- while (spans.next()) {
- if (DEBUG)
- log.trace("Forward next inner span");
+ if (firstSpans.isPayloadAvailable()
+ && updateSpanPositions(cs = new CandidateSpan(
+ firstSpans))) {
+ setMatch(cs);
+ hasMoreSpans = firstSpans.next();
+ return true;
+ }
+ hasMoreSpans = firstSpans.next();
+ }
+ else if (candidateSpans.isEmpty()) {
+ currentDoc = firstSpans.doc();
+ collectCandidates();
+ Collections.sort(candidateSpans);
+ }
+ else {
+ setMatch(candidateSpans.get(0));
+ candidateSpans.remove(0);
+ return true;
+ }
+ }
- // No classes stored
- wrappedPayload.clear();
-
- start = -1;
- if (spans.isPayloadAvailable()) {
- end = 0;
-
- // Iterate over all payloads and find the maximum span per class
- for (byte[] payload : spans.getPayload()) {
-
- // No class payload - ignore
- // this may be problematic for other calculated payloads!
- if (payload.length != 9) {
- if (DEBUG)
- log.trace("Ignore old payload {}", payload);
- continue;
- };
-
- // Found class payload of structure <i>start<i>end<b>class
- // and classes are matches!
- if (payload[8] == this.number) {
- tempStart = byte2int(payload, 0);
- tempEnd = byte2int(payload, 4);
-
- if (DEBUG) {
- log.trace("Found matching class {}-{}", tempStart,
- tempEnd);
- };
-
- // Set start position
- if (start == -1 || tempStart < start)
- start = tempStart;
-
- // Set end position
- if (tempEnd > end)
- end = tempEnd;
- };
-
- // Definately keep class information
- // Even if it is already used for shrinking
- wrappedPayload.add(payload);
- };
- };
-
- // Class not found
- if (start == -1)
- continue;
-
- if (DEBUG) {
- log.trace("Start to focus on class {} from {} to {}", number,
- start, end);
- };
- return true;
- };
-
- // No more spans
- this.wrappedPayload.clear();
return false;
- };
+ }
+
+
+ private void collectCandidates () throws IOException {
+ CandidateSpan cs = null;
+ while (hasMoreSpans && candidateSpans.size() < windowSize
+ && firstSpans.doc() == currentDoc) {
+
+ if (firstSpans.isPayloadAvailable()
+ && updateSpanPositions(cs = new CandidateSpan(firstSpans))) {
+ candidateSpans.add(cs);
+ }
+ hasMoreSpans = firstSpans.next();
+ }
+ }
+
+
+ private void setMatch (CandidateSpan cs) {
+ matchStartPosition = cs.getStart();
+ matchEndPosition = cs.getEnd();
+ matchDocNumber = cs.getDoc();
+ matchPayload.addAll(cs.getPayloads());
+ setSpanId(cs.getSpanId());
+ }
+
+
+ private boolean updateSpanPositions (CandidateSpan candidateSpan)
+ throws IOException {
+ int minPos = 0, maxPos = 0;
+ int classStart, classEnd;
+ boolean isStart = true;
+ boolean isClassFound = false;
+
+ candidateSpan.getPayloads().clear();
+
+ // Iterate over all payloads and find the maximum span per class
+ for (byte[] payload : firstSpans.getPayload()) {
+ // No class payload - ignore
+ // this may be problematic for other calculated payloads!
+ if (payload.length == 9) {
+ if (classNumbers.contains(payload[8])) {
+ isClassFound = true;
+ classStart = byte2int(payload, 0);
+ classEnd = byte2int(payload, 4);
+
+ if (isStart || classStart < minPos) {
+ minPos = classStart;
+ isStart = false;
+ }
+ if (classEnd > maxPos) {
+ maxPos = classEnd;
+ }
+ }
+ candidateSpan.getPayloads().add(payload.clone());
+ }
+
+ }
+
+ if (isClassFound) {
+ candidateSpan.start = minPos;
+ candidateSpan.end = maxPos;
+ }
+
+ return isClassFound;
+ }
// Todo: Check for this on document boundaries!
@Override
public boolean skipTo (int target) throws IOException {
- if (DEBUG)
- log.trace("Skip MatchSpans {} -> {}", this.doc(), target);
-
- if (this.doc() < target && spans.skipTo(target)) {
-
- };
+ if (this.doc() < target && firstSpans.skipTo(target)) {
+ return next();
+ }
return false;
};
@Override
public String toString () {
- return getClass().getName() + "(" + this.wrapQuery.toString() + ")@"
+ return getClass().getName() + "(" + this.query.toString() + ")@"
+ (doc() + ":" + start() + "-" + end());
};
@Override
public long cost () {
- return spans.cost();
+ return firstSpans.cost();
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
index 8f97f35..d8299ef 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
@@ -8,9 +8,9 @@
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
+import de.ids_mannheim.korap.query.SimpleSpanQuery;
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanRelationQuery;
-import de.ids_mannheim.korap.query.SpanWithIdQuery;
/**
* RelationBaseSpans is a base class for relation spans containing
@@ -24,14 +24,16 @@
* @author margaretha
*
*/
-public abstract class RelationBaseSpans extends SpansWithId {
+public abstract class RelationBaseSpans extends SimpleSpans {
protected short leftId, rightId;
protected int leftStart, leftEnd;
protected int rightStart, rightEnd;
- public RelationBaseSpans () {};
+ public RelationBaseSpans () {
+ this.hasSpanId = true;
+ };
/**
@@ -47,11 +49,12 @@
* @param termContexts
* @throws IOException
*/
- public RelationBaseSpans (SpanWithIdQuery spanWithIdQuery,
+ public RelationBaseSpans (SimpleSpanQuery spanWithIdQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts)
throws IOException {
super(spanWithIdQuery, context, acceptDocs, termContexts);
+ this.hasSpanId = true;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
index 38eb5fe..853ce2f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
@@ -169,28 +169,34 @@
ByteBuffer bb = ByteBuffer.allocate(length);
bb.put(payload.get(0));
+ cs.setLeftStart(cs.start);
+
int i;
switch (length) {
- case 10: // Token to token
+ case 10: // Token to token
i = bb.getInt(0);
- cs.setRightStart(i - 1);
- cs.setRightEnd(i);
+ cs.setLeftEnd(cs.start + 1);
+ cs.setRightStart(i);
+ cs.setRightEnd(i + 1);
break;
case 14: // Token to span
+ cs.setLeftEnd(cs.start + 1);
cs.setRightStart(bb.getInt(0));
cs.setRightEnd(bb.getInt(4));
break;
case 15: // Span to token
cs.setEnd(bb.getInt(0));
+ cs.setLeftEnd(cs.end);
i = bb.getInt(5);
- cs.setRightStart(i - 1);
- cs.setRightEnd(i);
+ cs.setRightStart(i);
+ cs.setRightEnd(i + 1);
break;
case 18: // Span to span
cs.setEnd(bb.getInt(0));
+ cs.setLeftEnd(cs.end);
cs.setRightStart(bb.getInt(4));
cs.setRightEnd(bb.getInt(8));
break;
@@ -199,7 +205,7 @@
cs.setRightId(bb.getShort(length - 2)); //right id
cs.setLeftId(bb.getShort(length - 4)); //left id
cs.setSpanId(bb.getShort(length - 6)); //relation id
- // Payload is cleared.
+ // Payload is cleared.
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
index 79ef116..8ac53b8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
+import java.util.Collection;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
@@ -17,7 +18,7 @@
*
* @author margaretha
* */
-public class SegmentSpans extends NonPartialOverlappingSpans {
+public class SegmentSpans extends SimpleSpans {
private boolean isRelation;
@@ -38,11 +39,49 @@
throws IOException {
super(spanSegmentQuery, context, acceptDocs, termContexts);
if (spanSegmentQuery.isRelation()) {
- SpansWithId s2 = (SpansWithId) secondSpans;
- // hacking for element query
- s2.hasSpanId = true;
isRelation = true;
}
+
+ collectPayloads = true;
+ hasMoreSpans = secondSpans.next();
+ }
+
+
+ @Override
+ public boolean next () throws IOException {
+ // Warning: this does not work for overlapping spans
+ // e.g. get multiple second spans in a firstspan
+ hasMoreSpans &= firstSpans.next();
+ isStartEnumeration = false;
+ matchPayload.clear();
+ return advance();
+ }
+
+
+ /**
+ * Advances to the next match.
+ *
+ * @return <code>true</code> if a match is found,
+ * <code>false</code> otherwise.
+ * @throws IOException
+ */
+ protected boolean advance () throws IOException {
+ // The complexity is linear for searching in a document.
+ // It's better if we can skip to >= position in a document.
+ while (hasMoreSpans && ensureSameDoc(firstSpans, secondSpans)) {
+ int matchCase = findMatch();
+ if (matchCase == 0) {
+ doCollectPayloads();
+ return true;
+ }
+ else if (matchCase == 1) {
+ hasMoreSpans = secondSpans.next();
+ }
+ else {
+ hasMoreSpans = firstSpans.next();
+ }
+ }
+ return false;
}
@@ -52,21 +91,21 @@
* secondspan are identical.
*
* */
- @Override
protected int findMatch () {
RelationSpans s1;
- SpansWithId s2;
+ SimpleSpans s2;
if (firstSpans.start() == secondSpans.start()
&& firstSpans.end() == secondSpans.end()) {
if (isRelation) {
s1 = (RelationSpans) firstSpans;
- s2 = (SpansWithId) secondSpans;
+ s2 = (SimpleSpans) secondSpans;
//System.out.println("segment: " + s1.getRightStart() + " "
// + s1.getRightEnd());
if (s1.getLeftId() == s2.getSpanId()) {
setMatch();
+ setSpanId(s2.getSpanId());
return 0;
}
}
@@ -89,4 +128,44 @@
matchStartPosition = firstSpans.start();
matchEndPosition = firstSpans.end();
}
+
+
+ /**
+ * Collects available payloads from the current first and second
+ * spans.
+ *
+ * @throws IOException
+ */
+ private void doCollectPayloads () throws IOException {
+ Collection<byte[]> payload;
+ if (collectPayloads) {
+ if (firstSpans.isPayloadAvailable()) {
+ payload = firstSpans.getPayload();
+ matchPayload.addAll(payload);
+ }
+ if (secondSpans.isPayloadAvailable()) {
+ payload = secondSpans.getPayload();
+ matchPayload.addAll(payload);
+ }
+ }
+ }
+
+
+ @Override
+ public boolean skipTo (int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)) {
+ if (!firstSpans.skipTo(target)) {
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ matchPayload.clear();
+ return advance();
+ }
+
+
+ @Override
+ public long cost () {
+ return firstSpans.cost() + secondSpans.cost();
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index f1ec996..d82b8fa 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -33,6 +33,9 @@
protected int matchDocNumber, matchStartPosition, matchEndPosition;
protected Collection<byte[]> matchPayload;
+ protected short spanId;
+ protected boolean hasSpanId = false;
+
public SimpleSpans () {
collectPayloads = true;
@@ -151,4 +154,26 @@
+ start() + "-" + end()) : "END"));
}
+
+
+ /**
+ * Returns the span id of the current span
+ *
+ * @return the span id of the current span
+ */
+ public short getSpanId () {
+ return spanId;
+ }
+
+
+ /**
+ * Sets the span id of the current span
+ *
+ * @param spanId
+ * span id
+ */
+ public void setSpanId (short spanId) {
+ this.spanId = spanId;
+ }
+
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java
index d7fa03f..8f0e0b9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java
@@ -32,9 +32,9 @@
*
* @author margaretha
* */
-public class SpansWithAttribute extends SpansWithId {
+public class SpansWithAttribute extends SimpleSpans {
- private SpansWithId referentSpans;
+ private SimpleSpans referentSpans;
private List<AttributeSpans> attributeList;
private List<AttributeSpans> notAttributeList;
@@ -56,13 +56,16 @@
* @throws IOException
*/
public SpansWithAttribute (SpanWithAttributeQuery spanWithAttributeQuery,
- SpansWithId spansWithId,
+ SimpleSpans referentSpans,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts)
throws IOException {
super(spanWithAttributeQuery, context, acceptDocs, termContexts);
- referentSpans = spansWithId;
- referentSpans.hasSpanId = true; // dummy setting enabling reading elementRef
+ // if (!referentSpans.hasSpanId) {
+ // throw new
+ // IllegalArgumentException("Referent spans must have ids.");
+ // }
+ this.referentSpans = referentSpans;
hasMoreSpans = referentSpans.next();
setAttributeList(spanWithAttributeQuery, context, acceptDocs,
termContexts);
@@ -168,7 +171,7 @@
private boolean advanceAttribute () throws IOException {
while (hasMoreSpans) {
- SpansWithId referentSpans = attributeList.get(0);
+ SimpleSpans referentSpans = attributeList.get(0);
advanceNotAttributes(referentSpans);
if (checkNotReferentId(referentSpans)) {
this.matchDocNumber = referentSpans.doc();
@@ -199,8 +202,9 @@
private boolean advance () throws IOException {
while (hasMoreSpans && searchSpanPosition()) {
- // System.out.println("element: " + referentSpans.start() + ","
- // + referentSpans.end() + " ref:"+ referentSpans.getSpanId());
+ // System.out.println(referentSpans.start() + ","
+ // + referentSpans.end() + " " +
+ // referentSpans.getSpanId());
if (checkReferentId() && checkNotReferentId(referentSpans)) {
this.matchDocNumber = referentSpans.doc();
@@ -287,7 +291,7 @@
* document and
* start position.
* */
- private boolean ensureSamePosition (SpansWithId spans,
+ private boolean ensureSamePosition (SimpleSpans spans,
AttributeSpans attributes) throws IOException {
while (hasMoreSpans && ensureSameDoc(spans, attributes)) {
@@ -373,7 +377,7 @@
* <code>false</code> otherwise.
* @throws IOException
*/
- private boolean checkNotReferentId (SpansWithId referentSpans)
+ private boolean checkNotReferentId (SimpleSpans referentSpans)
throws IOException {
for (AttributeSpans notAttribute : notAttributeList) {
if (!notAttribute.isFinish()
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
deleted file mode 100644
index 9b54565..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package de.ids_mannheim.korap.query.spans;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.util.Bits;
-
-import de.ids_mannheim.korap.query.SpanWithIdQuery;
-
-/**
- * Base class for enumeration of span requiring an id, such as
- * elements and
- * relations.
- *
- * @author margaretha
- * */
-public abstract class SpansWithId extends SimpleSpans {
-
- protected short spanId;
- protected boolean hasSpanId = false; // A dummy flag
-
-
- /**
- * Constructs SpansWithId for the given {@link SpanWithIdQuery}.
- *
- * @param spanWithIdQuery
- * a SpanWithIdQuery
- * @param context
- * @param acceptDocs
- * @param termContexts
- * @throws IOException
- */
- public SpansWithId (SpanWithIdQuery spanWithIdQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(spanWithIdQuery, context, acceptDocs, termContexts);
- }
-
-
- public SpansWithId () {}
-
-
- /**
- * Returns the span id of the current span
- *
- * @return the span id of the current span
- */
- public short getSpanId () {
- return spanId;
- }
-
-
- /**
- * Sets the span id of the current span
- *
- * @param spanId
- * span id
- */
- public void setSpanId (short spanId) {
- this.spanId = spanId;
- }
-}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java b/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
index cf12aa0..431b839 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
@@ -24,7 +24,7 @@
*
* @author margaretha
* */
-public class TermSpansWithId extends SpansWithId {
+public class TermSpansWithId extends SimpleSpans {
private TermSpans termSpans;
@@ -46,6 +46,7 @@
super(spanTermWithIdQuery, context, acceptDocs, termContexts);
termSpans = (TermSpans) firstSpans;
hasMoreSpans = termSpans.next();
+ hasSpanId = true;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java
index 8916bda..ebbc219 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java
@@ -6,9 +6,9 @@
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
+import de.ids_mannheim.korap.query.SimpleSpanQuery;
import de.ids_mannheim.korap.query.SpanAttributeQuery;
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
-import de.ids_mannheim.korap.query.SpanWithIdQuery;
import de.ids_mannheim.korap.util.QueryException;
/**
@@ -133,7 +133,7 @@
private SpanQuery createSpecificSpanWithAttributeQuery ()
throws QueryException {
- SpanWithIdQuery withIdQuery = (SpanWithIdQuery) withIdQueryWrapper
+ SimpleSpanQuery withIdQuery = (SimpleSpanQuery) withIdQueryWrapper
.toQuery();
if (withIdQuery == null) {
isNull = true;
@@ -152,7 +152,7 @@
private SpanWithAttributeQuery createSpanWithSingleAttributeQuery (
- SpanWithIdQuery withIdQuery) throws QueryException {
+ SimpleSpanQuery withIdQuery) throws QueryException {
SpanAttributeQuery attrQuery = createSpanAttributeQuery(this.attrQueryWrapper);
if (attrQuery != null) {
if (withIdQuery != null) {
@@ -185,7 +185,7 @@
private SpanWithAttributeQuery createSpanWithAttributeListQuery (
- SpanWithIdQuery withIdQuery) throws QueryException {
+ SimpleSpanQuery withIdQuery) throws QueryException {
List<SpanQuery> attrQueries = new ArrayList<SpanQuery>();
SpanQuery attrQuery = null;
for (SpanQueryWrapper sqw : queryWrapperList) {
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 17f6cae..ff90f1d 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -1,39 +1,42 @@
package de.ids_mannheim.korap.response;
-import java.util.*;
-import java.io.*;
-
+import java.io.IOException;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
-import com.fasterxml.jackson.annotation.*;
-import com.fasterxml.jackson.annotation.JsonInclude.Include;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.node.*;
-
-import de.ids_mannheim.korap.index.PositionsToOffset;
-
-// Todo:
-import de.ids_mannheim.korap.response.SearchContext;
-
-import de.ids_mannheim.korap.index.AbstractDocument;
-import de.ids_mannheim.korap.response.match.HighlightCombinator;
-import de.ids_mannheim.korap.response.match.HighlightCombinatorElement;
-import de.ids_mannheim.korap.response.match.Relation;
-import de.ids_mannheim.korap.response.match.MatchIdentifier;
-import de.ids_mannheim.korap.response.match.PosIdentifier;
-import de.ids_mannheim.korap.query.SpanElementQuery;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import org.apache.lucene.document.Document;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.document.Document;
import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import de.ids_mannheim.korap.index.AbstractDocument;
+import de.ids_mannheim.korap.index.PositionsToOffset;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.response.match.HighlightCombinator;
+import de.ids_mannheim.korap.response.match.HighlightCombinatorElement;
+import de.ids_mannheim.korap.response.match.MatchIdentifier;
+import de.ids_mannheim.korap.response.match.PosIdentifier;
+import de.ids_mannheim.korap.response.match.Relation;
/*
Todo: The implemented classes and private names are horrible!
@@ -55,7 +58,7 @@
private final static Logger log = LoggerFactory.getLogger(Match.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+ public static final boolean DEBUG = true;
// Mapper for JSON serialization
ObjectMapper mapper = new ObjectMapper();
diff --git a/src/main/java/de/ids_mannheim/korap/server/Node.java b/src/main/java/de/ids_mannheim/korap/server/Node.java
index 6267bb9..8b4de0f 100644
--- a/src/main/java/de/ids_mannheim/korap/server/Node.java
+++ b/src/main/java/de/ids_mannheim/korap/server/Node.java
@@ -2,6 +2,7 @@
import java.util.*;
import java.io.*;
+import java.net.URL;
import org.glassfish.grizzly.http.server.HttpServer;
import org.glassfish.jersey.grizzly2.httpserver.GrizzlyHttpServerFactory;
@@ -57,9 +58,14 @@
public static HttpServer startServer () {
// Load configuration
+ URL resUrl = Node.class.getClassLoader().getResource("krill.properties");
+ if (resUrl == null) {
+ log.error("Cannot find \"krill.properties\". Please create it "
+ +"using \"krill.properties.info\" as template. Terminating.");
+ System.exit(1);
+ }
try {
- InputStream file = new FileInputStream(Node.class.getClassLoader()
- .getResource("krill.properties").getFile());
+ InputStream file = new FileInputStream(resUrl.getFile());
Properties prop = new Properties();
prop.load(file);
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
index 5995ded..82724b0 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
@@ -1,31 +1,29 @@
package de.ids_mannheim.korap.index;
-import java.util.*;
-import java.io.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Bits;
+import java.io.IOException;
-import static org.junit.Assert.*;
-import org.junit.Test;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
import org.junit.Ignore;
+import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
-import de.ids_mannheim.korap.KrillIndex;
-import de.ids_mannheim.korap.KrillQuery;
-import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.query.QueryBuilder;
+import de.ids_mannheim.korap.query.SpanClassQuery;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanFocusQuery;
+import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.query.SpanWithinQuery;
import de.ids_mannheim.korap.response.Match;
-import de.ids_mannheim.korap.query.*;
-import de.ids_mannheim.korap.index.FieldDocument;
-import de.ids_mannheim.korap.index.MultiTermTokenStream;
-
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.index.Term;
+import de.ids_mannheim.korap.response.Result;
// mvn -Dtest=TestWithinIndex#indexExample1 test
@@ -33,6 +31,48 @@
@RunWith(JUnit4.class)
public class TestMatchIndex {
+ @Test
+ public void testEmbeddedClassQuery () throws IOException {
+ KrillIndex ki = new KrillIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
+ + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]"
+ + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
+ + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]"
+ + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]"
+ + "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ SpanQuery sq;
+ Result kr;
+
+ sq = new SpanFocusQuery(new SpanClassQuery(new SpanNextQuery(
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
+ (byte) 1), new SpanClassQuery(new SpanTermQuery(
+ new Term("base", "s:c")), (byte) 2)), (byte) 3),
+ (byte) 3);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", kr.getTotalResults(), 2);
+ assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
+ assertEquals("SnippetBrackets (0)", "a[{3:{1:b}{2:c}}]abcaba ...", kr
+ .getMatch(0).getSnippetBrackets());
+ assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
+ assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
+ assertEquals("SnippetBrackets (1)", "abca[{3:{1:b}{2:c}}]abac", kr
+ .getMatch(1).getSnippetBrackets());
+
+ assertEquals("Document count", 1, ki.numberOf("base", "documents"));
+ assertEquals("Token count", 10, ki.numberOf("base", "t"));
+
+ }
+
@Test
public void indexExample1 () throws IOException {
@@ -149,26 +189,7 @@
"<span class=\"context-left\"><span class=\"more\"></span>a</span><mark><mark class=\"class-2 level-0\">b<mark class=\"class-1 level-1\">a</mark></mark></mark><span class=\"context-right\"><span class=\"more\"></span></span>",
kr.getMatch(0).getSnippetHTML());
- sq = new SpanFocusQuery(new SpanClassQuery(new SpanNextQuery(
- new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
- (byte) 1), new SpanClassQuery(new SpanTermQuery(
- new Term("base", "s:c")), (byte) 2)), (byte) 3),
- (byte) 3);
- kr = ki.search(sq, (short) 10);
-
- assertEquals("totalResults", kr.getTotalResults(), 2);
- assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
- assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
- assertEquals("SnippetBrackets (0)", "a[{3:{1:b}{2:c}}]abcaba ...", kr
- .getMatch(0).getSnippetBrackets());
- assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
- assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
- assertEquals("SnippetBrackets (1)", "abca[{3:{1:b}{2:c}}]abac", kr
- .getMatch(1).getSnippetBrackets());
-
- assertEquals("Document count", 1, ki.numberOf("base", "documents"));
- assertEquals("Token count", 10, ki.numberOf("base", "t"));
// Don't match the expected class!
sq = new SpanFocusQuery(new SpanNextQuery(new SpanClassQuery(
@@ -375,15 +396,15 @@
SpanQuery sq;
Result kr;
- sq = new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery(
- "base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(
- new Term("base", "s:b")), (byte) 3));
-
- kr = ki.search(sq, (short) 10);
- assertEquals(kr.getSerialQuery(),
- "spanContain({2: <base:s />}, {3: base:s:b})");
- assertEquals(kr.getMatch(0).getSnippetBrackets(),
- "a[{2:{3:b}cab}]cabac");
+ // sq = new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery(
+ // "base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(
+ // new Term("base", "s:b")), (byte) 3));
+ //
+ // kr = ki.search(sq, (short) 10);
+ // assertEquals(kr.getSerialQuery(),
+ // "spanContain({2: <base:s />}, {3: base:s:b})");
+ // assertEquals(kr.getMatch(0).getSnippetBrackets(),
+ // "a[{2:{3:b}cab}]cabac");
sq = new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(
new SpanElementQuery("base", "s"), (byte) 2),
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index 838a572..02787b1 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -3,6 +3,7 @@
import static org.junit.Assert.assertEquals;
import java.io.IOException;
+import java.util.ArrayList;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanQuery;
@@ -17,7 +18,6 @@
import de.ids_mannheim.korap.query.SpanSegmentQuery;
import de.ids_mannheim.korap.query.SpanTermWithIdQuery;
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
-import de.ids_mannheim.korap.response.Match;
import de.ids_mannheim.korap.response.Result;
/*
@@ -124,59 +124,59 @@
"Ich kaufe die Blümen für meine Mutter.",
"[(0-3)s:Ich|_0#0-3|pos:NN$<s>1|<>:s#0-38$<i>7<s>2|<>:np#0-3$<i>1<s>3|"
+ ">:child-of$<i>0<i>7<s>1<s>3<s>2|"
- + ">:child-of$<i>0<i>1<s>6<s>1<s>3|"
- + "<:child-of$<i>1<b>0<i>1<s>3<s>3<s>1|"
+ + ">:child-of$<i>0<i>1<s>2<s>1<s>3|"
+ + "<:child-of$<i>0<s>3<s>3<s>1|"
+ "<:child-of$<i>7<i>0<i>1<s>4<s>2<s>3|"
+ "<:child-of$<i>7<i>1<i>7<s>5<s>2<s>2|"
- + "<:dep$<i>2<s>2<s>1<s>1|"
+ + "<:dep$<i>1<s>2<s>1<s>1|"
+ "r@:func=sbj$<i>0<i>7<s>1]"
+
"[(1-2)s:kaufe|_1#4-9|pos:V$<s>1|<>:vp#4-38$<i>7<s>2|"
- + ">:child-of$<i>7<i>0<i>7<s>1<s>2<s>2|"
- + ">:child-of$<i>1<i>7<s>2<s>1<s>2|"
- + "<:child-of$<i>7<b>0<i>2<s>5<s>2<s>1|"
- + "<:child-of$<i>7<i>2<i>7<s>6<s>2<s>4|"
- + ">:dep$<i>1<s>3<s>1<s>1|"
- + ">:dep$<i>4<s>4<s>1<s>1]"
+ + ">:child-of$<i>7<i>0<i>7<s>6<s>2<s>2|"
+ + ">:child-of$<i>1<i>7<s>2<s>7<s>2|"
+ + "<:child-of$<i>7<b>0<i>2<s>8<s>2<s>1|"
+ + "<:child-of$<i>7<i>2<i>7<s>9<s>2<s>4|"
+ + ">:dep$<i>0<s>3<s>1<s>1|"
+ + ">:dep$<i>3<s>4<s>1<s>1]"
+
"[(2-3)s:die|_2#10-13|pos:ART$<s>1|tt:DET$<s>2|<>:np#10-20$<i>4<s>3|<>:np#10-38$<i>7<s>4|"
- + ">:child-of$<i>4<i>2<i>7<s>1<s>3<s>4|"
- + ">:child-of$<i>2<i>4<s>2<s>1<s>3|"
- + ">:child-of$<i>7<i>1<i>7<s>2<s>4<s>2|"
- + "<:child-of$<i>4<b>0<i>3<s>3<s>3<s>1|"
- + "<:child-of$<i>4<b>0<i>4<s>4<s>3<s>1|"
- + "<:child-of$<i>7<i>2<i>4<s>5<s>4<s>3|"
- + "<:child-of$<i>7<i>4<i>7<s>6<s>4<s>2|"
- + "<:dep$<i>4<s>3<s>1<s>1|"
- + "r@:func=obj$<i>2<i>7<s>1]" +
+ + ">:child-of$<i>4<i>2<i>7<s>10<s>3<s>4|"
+ + ">:child-of$<i>2<i>4<s>11<s>1<s>3|"
+ + ">:child-of$<i>7<i>1<i>7<s>12<s>4<s>2|"
+ + "<:child-of$<i>4<b>0<i>2<s>13<s>3<s>1|"
+ + "<:child-of$<i>4<b>0<i>3<s>14<s>3<s>1|"
+ + "<:child-of$<i>7<i>2<i>4<s>15<s>4<s>3|"
+ + "<:child-of$<i>7<i>4<i>7<s>16<s>4<s>2|"
+ + "<:dep$<i>3<s>2<s>1<s>1]" +
"[(3-4)s:Blümen|_3#14-20|pos:NN$<s>1|"
- + ">:child-of$<i>2<i>4<s>1<s>1<s>3|"
- + "<:dep$<i>2<s>2<s>1<s>2|" + ">:dep$<i>3<s>3<s>1<s>1|"
- + ">:dep$<i>5<s>4<s>1<s>1|"
- + "r@:func=head$<i>2<i>4<s>2]" +
+ + ">:child-of$<i>2<i>4<s>17<s>1<s>3|"
+ + "<:dep$<i>1<s>2<s>1<s>1|" + ">:dep$<i>2<s>3<s>1<s>1|"
+ + ">:dep$<i>4<s>4<s>1<s>1|"
+ + "r@:func=head$<i>2<i>4<s>2|"
+ + "r@:func=obj$<i>1<i>4<s>2]" +
"[(4-5)s:für|_4#21-24|pos:PREP$<s>1|<>:pp#21-38$<i>7<s>2|"
- + ">:child-of$<i>4<i>7<s>1<s>1<s>2|"
- + ">:child-of$<i>7<i>2<i>7<s>2<s>2<s>4|"
- + "<:child-of$<i>7<b>0<i>5<s>4<s>2<s>1|"
- + "<:child-of$<i>7<i>5<i>7<s>5<s>2<s>2|"
- + "<:dep$<i>4<s>1<s>1<s>1|" + ">:dep$<i>7<s>3<s>1<s>1]"
+ + ">:child-of$<i>4<i>7<s>18<s>1<s>2|"
+ + ">:child-of$<i>7<i>2<i>7<s>19<s>2<s>4|"
+ + "<:child-of$<i>7<b>0<i>5<s>20<s>2<s>1|"
+ + "<:child-of$<i>7<i>5<i>7<s>21<s>2<s>2|"
+ + "<:dep$<i>3<s>1<s>1<s>1|" + ">:dep$<i>6<s>3<s>1<s>1]"
+
"[(5-6)s:meine|_5#25-30|pos:ART$<s>1|<>:np#25-38$<i>7<s>2|"
- + ">:child-of$<i>5<i>7<s>1<s>1<s>2|"
- + ">:child-of$<i>7<i>4<i>7<s>2<s>2<s>2|"
- + "<:child-of$<i>7<b>0<i>6<s>4<s>2<s>1|"
- + "<:child-of$<i>7<b>0<i>7<s>5<s>2<s>1|"
- + "<:dep$<i>7<s>3<s>1<s>1]" +
+ + ">:child-of$<i>5<i>7<s>22<s>1<s>2|"
+ + ">:child-of$<i>7<i>4<i>7<s>23<s>2<s>2|"
+ + "<:child-of$<i>7<b>0<i>5<s>24<s>2<s>1|"
+ + "<:child-of$<i>7<b>0<i>6<s>25<s>2<s>1|"
+ + "<:dep$<i>6<s>3<s>1<s>1]" +
"[(6-7)s:Mutter.|_6#31-38|pos:NN$<s>1|"
- + ">:child-of$<i>5<i>7<s>1<s>1<s>2|"
- + ">:dep$<i>6<s>2<s>1<s>1|" + "<:dep$<i>5<s>3<s>1<s>1|"
- + "r@:func=head$<i>6<i>7<s>3]");
+ + ">:child-of$<i>5<i>7<s>26<s>1<s>2|"
+ + ">:dep$<i>5<s>2<s>1<s>1|" + "<:dep$<i>4<s>3<s>1<s>1|"
+ + "r@:func=head$<i>5<i>7<s>3]");
return fd;
}
@@ -262,163 +262,173 @@
/**
* Relations with attributes
- * NEED focusMulti on span relation query before
+ * need focusMulti on span relation query before
* SpanWithAttributeQuery
* */
- /* @Test
- public void testCase3() throws IOException {
- ki.addDoc(createFieldDoc2());
- ki.commit();
-
- // child-of relations
- SpanRelationQuery srq= new SpanRelationQuery(new SpanTermQuery(
- new Term("base", ">:child-of")), true);
- kr = ki.search(srq,(short) 20);
-
- assertEquals((long) 13, kr.getTotalResults());
+ @Test
+ public void testCase3 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
- // child-of with attr func=sbj
- SpanWithAttributeQuery wq =
- new SpanWithAttributeQuery(srq,
- new SpanAttributeQuery(
- new SpanTermQuery(new Term("base", "r@:func=sbj")),
- true),
- true
- );
-
- kr = ki.search(wq,(short) 10);
- assertEquals((long) 1, kr.getTotalResults());
- assertEquals(0,kr.getMatch(0).getStartPos()); // token
- assertEquals(1, kr.getMatch(0).getEndPos());
-
- // child-of without attr func=sbj
- wq =
- new SpanWithAttributeQuery(srq,
- new SpanAttributeQuery(
- new SpanTermQuery(new Term("base", "r@:func=sbj")),
- true, true),
- true
- );
- kr = ki.search(wq,(short) 20);
- assertEquals((long) 12, kr.getTotalResults());
+ // child-of relations
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("base", ">:child-of")), true);
+ kr = ki.search(srq, (short) 20);
- // child-of with attr func-obj
- wq = new SpanWithAttributeQuery(srq,
- new SpanAttributeQuery(
- new SpanTermQuery( new Term("base", "r@:func=obj")),
- true),
- true
- );
-
- kr = ki.search(wq,(short) 10);
- assertEquals((long) 1, kr.getTotalResults());
- assertEquals(2,kr.getMatch(0).getStartPos()); // element
- assertEquals(4,kr.getMatch(0).getEndPos());
+ assertEquals((long) 13, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(1, kr.getMatch(0).getEndPos());
+ assertEquals(0, kr.getMatch(1).getStartPos());
+ assertEquals(1, kr.getMatch(1).getEndPos());
+ assertEquals(1, kr.getMatch(2).getStartPos());
+ assertEquals(2, kr.getMatch(2).getEndPos());
+ assertEquals(1, kr.getMatch(3).getStartPos());
+ assertEquals(7, kr.getMatch(3).getEndPos());
+ assertEquals(2, kr.getMatch(4).getStartPos());
+ assertEquals(3, kr.getMatch(4).getEndPos());
+ assertEquals(2, kr.getMatch(5).getStartPos());
+ assertEquals(4, kr.getMatch(5).getEndPos());
- // target of a dependency relation
- srq = new SpanRelationQuery(
- new SpanTermQuery(new Term("base", "<:dep")), true);
- kr = ki.search(srq,(short) 10);
-
- assertEquals((long) 6, kr.getTotalResults());
+ ArrayList<Byte> classNumbers = new ArrayList<Byte>();
+ classNumbers.add((byte) 1);
+ classNumbers.add((byte) 2);
- // target of a dependency relation, which is also a head
- wq = new SpanWithAttributeQuery(srq,
- new SpanAttributeQuery(
- new SpanTermQuery( new Term("base", "r@:func=head")),
- true),
- true
- );
-
- kr = ki.search(wq,(short) 20);
- // for (Match km : kr.getMatches()) {
- // System.out.println(km.getStartPos() + "," + km.getEndPos() + " "
- // + km.getSnippetBrackets());
- // }
- assertEquals((long) 2, kr.getTotalResults());
- assertEquals(3, kr.getMatch(0).getStartPos());
- assertEquals(4,kr.getMatch(0).getEndPos());
- assertEquals(6, kr.getMatch(1).getStartPos());
- assertEquals(7, kr.getMatch(1).getEndPos());
-
+ SpanFocusQuery fq = new SpanFocusQuery(srq, classNumbers);
+ kr = ki.search(fq, (short) 20);
+ /*
+ * for (Match km : kr.getMatches()) {
+ * System.out.println(km.getStartPos() + "," + km.getEndPos()
+ * + " " + km.getSnippetBrackets()); }
+ */
+ assertEquals((long) 13, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(1, kr.getMatch(0).getEndPos());
+ assertEquals(0, kr.getMatch(1).getStartPos());
+ assertEquals(7, kr.getMatch(1).getEndPos());
+ assertEquals(0, kr.getMatch(2).getStartPos());
+ assertEquals(7, kr.getMatch(2).getEndPos());
+ assertEquals(1, kr.getMatch(3).getStartPos());
+ assertEquals(7, kr.getMatch(3).getEndPos());
+ assertEquals(1, kr.getMatch(4).getStartPos());
+ assertEquals(7, kr.getMatch(4).getEndPos());
}
-
- // FOCUS has not sorted
- /** Relation with variable
- * match right, return left
- * sort by right, then sort by left
- * @throws IOException
- * */
- /*@Test
- public void testCase4() throws IOException {
- ki.addDoc(createFieldDoc2());
- ki.commit();
-
- //return all children of np
- SpanQuery rv = new SpanFocusQuery(
- new SpanSegmentQuery(
- new SpanRelationQuery(
- new SpanTermQuery(new Term("base","<:child-of")), true),
- new SpanElementQuery("base","np"),
- true),
- (byte) 2);
-
- kr = ki.search(rv,(short) 10);
- for (Match km : kr.getMatches()) {
- System.out.println(km.getStartPos() + "," + km.getEndPos()
- // + " "+ km.getSnippetBrackets()
- );
- }
- assertEquals((long) 7, kr.getTotalResults());
- assertEquals(0,kr.getMatch(0).getStartPos());
- assertEquals(1,kr.getMatch(0).getEndPos());
- assertEquals(2,kr.getMatch(1).getStartPos());
- assertEquals(3,kr.getMatch(1).getEndPos());
- assertEquals(2,kr.getMatch(2).getStartPos());
- assertEquals(4,kr.getMatch(2).getEndPos());
- assertEquals(3,kr.getMatch(3).getStartPos());
- assertEquals(4,kr.getMatch(3).getEndPos());
- assertEquals(4,kr.getMatch(4).getStartPos());
- assertEquals(7,kr.getMatch(4).getEndPos());
- assertEquals(5,kr.getMatch(5).getStartPos());
- assertEquals(6,kr.getMatch(5).getEndPos());
- assertEquals(6,kr.getMatch(6).getStartPos());
- assertEquals(7,kr.getMatch(6).getEndPos());
- // sorting left problem (solved)
-
- //return all children of np that are articles
- SpanSegmentQuery rv2 = new SpanSegmentQuery(rv, new SpanTermQuery(new Term("base","pos:ART")));
- kr = ki.search(rv2,(short) 10);
-
- assertEquals((long) 2, kr.getTotalResults());
- assertEquals(2,kr.getMatch(0).getStartPos());
- assertEquals(3,kr.getMatch(0).getEndPos());
- assertEquals(5,kr.getMatch(1).getStartPos());
- assertEquals(6,kr.getMatch(1).getEndPos());
-
- // return all nps whose children are articles
- /*SpanRelationPartQuery rv3 =
- new SpanRelationPartQuery(rv,
- new SpanTermWithIdQuery(new Term("base","pos:ART"), true),
- false, true, true);
- kr = ki.search(rv3,(short) 10);
-
- assertEquals((long) 2, kr.getTotalResults());
- assertEquals(2,kr.getMatch(0).getStartPos());
- assertEquals(4,kr.getMatch(0).getEndPos());
- assertEquals(5,kr.getMatch(1).getStartPos());
- assertEquals(7,kr.getMatch(1).getEndPos());
-
- */
- //}
+
+ @Test
+ public void testCase4 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("base", ">:child-of")), true);
+
+ ArrayList<Byte> classNumbers = new ArrayList<Byte>();
+ classNumbers.add((byte) 1);
+ classNumbers.add((byte) 2);
+
+ SpanWithAttributeQuery wq = new SpanWithAttributeQuery(
+ new SpanFocusQuery(srq, classNumbers), new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base", "r@:func=sbj")),
+ true), true);
+
+ kr = ki.search(wq, (short) 20);
+ assertEquals((long) 1, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos()); // token
+ assertEquals(7, kr.getMatch(0).getEndPos());
+
+ // child-of without attr func=sbj
+ wq = new SpanWithAttributeQuery(new SpanFocusQuery(srq, classNumbers),
+ new SpanAttributeQuery(new SpanTermQuery(new Term("base",
+ "r@:func=sbj")), true, true), true);
+ kr = ki.search(wq, (short) 20);
+ assertEquals((long) 12, kr.getTotalResults());
+ }
+
+
+ @Test
+ public void testCase5 () throws IOException {
+
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("base", "<:dep")), true);
+ kr = ki.search(srq, (short) 10);
+
+ ArrayList<Byte> classNumbers = new ArrayList<Byte>();
+ classNumbers.add((byte) 1);
+ classNumbers.add((byte) 2);
+
+ SpanFocusQuery fq = new SpanFocusQuery(srq, classNumbers);
+ kr = ki.search(fq, (short) 10);
+ // for (Match km : kr.getMatches()) {
+ // System.out.println(km.getStartPos() + "," + km.getEndPos()
+ // + " "
+ // + km.getSnippetBrackets());
+ // }
+
+ SpanAttributeQuery saq = new SpanAttributeQuery(new SpanTermQuery(
+ new Term("base", "r@:func=obj")), true);
+ kr = ki.search(saq, (short) 10);
+
+ // child-of with attr func-obj
+ SpanWithAttributeQuery wq = new SpanWithAttributeQuery(
+ new SpanFocusQuery(srq, classNumbers), new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base", "r@:func=obj")),
+ true), true);
+
+ kr = ki.search(wq, (short) 10);
+ assertEquals((long) 1, kr.getTotalResults());
+ assertEquals(1, kr.getMatch(0).getStartPos()); // element
+ assertEquals(4, kr.getMatch(0).getEndPos());
+ }
+
+
+ @Test
+ public void testCase10 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+ // target of a dependency relation
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("base", "<:dep")), true);
+ kr = ki.search(srq, (short) 10);
+ assertEquals((long) 6, kr.getTotalResults());
+
+ ArrayList<Byte> classNumbers = new ArrayList<Byte>();
+ classNumbers.add((byte) 1);
+ classNumbers.add((byte) 2);
+
+ SpanFocusQuery fq = new SpanFocusQuery(srq, classNumbers);
+ kr = ki.search(fq, (short) 10);
+ assertEquals((long) 6, kr.getTotalResults());
+
+ SpanAttributeQuery aq = new SpanAttributeQuery(new SpanTermQuery(
+ new Term("base", "r@:func=head")), true);
+ kr = ki.search(aq, (short) 10);
+
+ // dependency relation, which is also a head
+ SpanWithAttributeQuery wq = new SpanWithAttributeQuery(
+ new SpanFocusQuery(srq, classNumbers), new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base", "r@:func=head")),
+ true), true);
+
+ kr = ki.search(wq, (short) 20);
+
+ assertEquals((long) 2, kr.getTotalResults());
+ assertEquals(2, kr.getMatch(0).getStartPos());
+ assertEquals(4, kr.getMatch(0).getEndPos());
+ assertEquals(5, kr.getMatch(1).getStartPos());
+ assertEquals(7, kr.getMatch(1).getEndPos());
+
+ }
+
/**
* Match left return left
* Match right return right
* */
@Test
- public void testCase5 () throws IOException {
+ public void testCase6 () throws IOException {
ki.addDoc(createFieldDoc2());
ki.commit();
@@ -464,48 +474,44 @@
}
- // FOCUS has not sorted
/**
* Match left, return right
* sort by left, then sort by right
* */
- /*@Test
- public void testCase7() throws IOException {
- ki.addDoc(createFieldDoc2());
- ki.commit();
-
- // return all children that are NP
- SpanQuery rv = new SpanSegmentQuery(
- new SpanRelationQuery(
- new SpanTermQuery(new Term("base",">:child-of")), true),
- new SpanElementQuery("base","np"),
- true);
-
- //return all parents of np
- SpanQuery rv2 = new SpanFocusQuery(rv, (byte) 2);
- kr = ki.search(rv2, (short) 10);
- for (Match km : kr.getMatches()) {
- System.out.println(km.getStartPos() + "," + km.getEndPos()
- // + " "+ km.getSnippetBrackets()
- );
- }
- assertEquals((long) 4, kr.getTotalResults());
- assertEquals(0,kr.getMatch(0).getStartPos());
- assertEquals(7,kr.getMatch(0).getEndPos());
- assertEquals(1,kr.getMatch(1).getStartPos());
- assertEquals(7,kr.getMatch(1).getEndPos());
- assertEquals(2,kr.getMatch(2).getStartPos());
- assertEquals(7,kr.getMatch(2).getEndPos());
- assertEquals(4,kr.getMatch(3).getStartPos());
- assertEquals(7,kr.getMatch(3).getEndPos());
- // id problem (solved)
+ @Test
+ public void testCase7 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
- // return all parents of np that are PP
-
+ // return all children that are NP
+ SpanQuery rv = new SpanSegmentQuery(new SpanRelationQuery(
+ new SpanTermQuery(new Term("base", ">:child-of")), true),
+ new SpanElementQuery("base", "np"), true);
+
+ //return all parents of np
+ SpanFocusQuery rv2 = new SpanFocusQuery(rv, (byte) 2);
+ rv2.setSorted(false);
+ kr = ki.search(rv2, (short) 10);
+
+ assertEquals((long) 4, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(7, kr.getMatch(0).getEndPos());
+ assertEquals(1, kr.getMatch(1).getStartPos());
+ assertEquals(7, kr.getMatch(1).getEndPos());
+ assertEquals(2, kr.getMatch(2).getStartPos());
+ assertEquals(7, kr.getMatch(2).getEndPos());
+ assertEquals(4, kr.getMatch(3).getStartPos());
+ assertEquals(7, kr.getMatch(3).getEndPos());
+ // id problem (solved)
+
+ // return all parents of np that are PP
+
}
-
- /** Relations whose source/target do not embed
- * its counterparts.
+
+
+ /**
+ * Relations whose source/target do not embed
+ * its counterparts.
* */
@Test
public void testCase8 () throws IOException {
@@ -544,4 +550,67 @@
}
+
+ /**
+ * Relation with variable match right, return left sort by right,
+ * then sort by left
+ *
+ * @throws IOException
+ * */
+ @Test
+ public void testCase9 () throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ // return all children of np
+ SpanFocusQuery rv = new SpanFocusQuery(new SpanSegmentQuery(
+ new SpanRelationQuery(new SpanTermQuery(new Term("base",
+ "<:child-of")), true), new SpanElementQuery("base",
+ "np"), true), (byte) 2);
+ rv.setSorted(false);
+
+ kr = ki.search(rv, (short) 10);
+
+ assertEquals((long) 7, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(1, kr.getMatch(0).getEndPos());
+ assertEquals(2, kr.getMatch(1).getStartPos());
+ assertEquals(3, kr.getMatch(1).getEndPos());
+ assertEquals(2, kr.getMatch(2).getStartPos());
+ assertEquals(4, kr.getMatch(2).getEndPos());
+ assertEquals(3, kr.getMatch(3).getStartPos());
+ assertEquals(4, kr.getMatch(3).getEndPos());
+ assertEquals(4, kr.getMatch(4).getStartPos());
+ assertEquals(7, kr.getMatch(4).getEndPos());
+ assertEquals(5, kr.getMatch(5).getStartPos());
+ assertEquals(6, kr.getMatch(5).getEndPos());
+ assertEquals(6, kr.getMatch(6).getStartPos());
+ assertEquals(7, kr.getMatch(6).getEndPos());
+ // sorting left problem (solved)
+
+ // return all children of np that are articles
+ SpanSegmentQuery rv2 = new SpanSegmentQuery(rv, new SpanTermQuery(
+ new Term("base", "pos:ART")));
+ kr = ki.search(rv2, (short) 10);
+
+ assertEquals((long) 2, kr.getTotalResults());
+ assertEquals(2, kr.getMatch(0).getStartPos());
+ assertEquals(3, kr.getMatch(0).getEndPos());
+ assertEquals(5, kr.getMatch(1).getStartPos());
+ assertEquals(6, kr.getMatch(1).getEndPos());
+
+ // return all nps whose children are articles
+ SpanSegmentQuery rv3 = new SpanSegmentQuery(rv,
+ new SpanTermWithIdQuery(new Term("base", "pos:ART"), true));
+ kr = ki.search(rv3, (short) 10);
+
+ assertEquals((long) 2, kr.getTotalResults());
+
+ assertEquals(2, kr.getMatch(0).getStartPos());
+ assertEquals(3, kr.getMatch(0).getEndPos());
+ assertEquals(5, kr.getMatch(1).getStartPos());
+ assertEquals(6, kr.getMatch(1).getEndPos());
+
+ }
+
}
\ No newline at end of file