Added parameter checking,
changed SpanElementQuery and ElementSpans,
added comments.
diff --git a/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java b/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java
index 9a92307..11e330b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java
+++ b/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java
@@ -1,6 +1,6 @@
package de.ids_mannheim.korap.query;
-/** Specify constraints of distance in SpanDistanceQueries or
+/** Specify distance constraints in SpanDistanceQueries or
* SpanMultipleDistanceQueries
*
* @author margaretha
@@ -23,6 +23,10 @@
public DistanceConstraint(SpanElementQuery elementQuery, int min, int max, boolean
isOrdered, boolean exclusion) {
+ if (elementQuery == null){
+ throw new IllegalArgumentException("Element query cannot be null.");
+ }
+
this.unit = elementQuery.getElementStr();
this.minDistance = min;
this.maxDistance = max;
@@ -31,23 +35,6 @@
this.elementQuery = elementQuery;
}
-
-// public DistanceConstraint(int min, int max, boolean exclusion) {
-// this.unit = "w";
-// this.minDistance = min;
-// this.maxDistance = max;
-// this.exclusion = exclusion;
-// }
-//
-// public DistanceConstraint(SpanElementQuery elementQuery, int min, int max,
-// boolean exclusion) {
-// this.unit = elementQuery.getElementStr();
-// this.minDistance = min;
-// this.maxDistance = max;
-// this.exclusion = exclusion;
-// this.elementQuery = elementQuery;
-// }
-
public int getMinDistance() {
return minDistance;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
index 3582ca0..6bc3370 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
@@ -20,8 +20,11 @@
protected List<SpanQuery> clauseList;
private String field;
protected boolean collectPayloads;
-
+
public SimpleSpanQuery(SpanQuery firstClause, boolean collectPayloads) {
+ if (firstClause == null){
+ throw new IllegalArgumentException("The first clause cannot be null.");
+ }
this.field = firstClause.getField();
this.setFirstClause(firstClause);
this.collectPayloads = collectPayloads;
@@ -30,6 +33,9 @@
public SimpleSpanQuery(SpanQuery firstClause, SpanQuery secondClause,
boolean collectPayloads) {
this(firstClause,collectPayloads);
+ if (secondClause == null){
+ throw new IllegalArgumentException("The second clause cannot be null.");
+ }
checkField(secondClause);
this.setSecondClause(secondClause);
}
@@ -37,7 +43,18 @@
public SimpleSpanQuery(SpanQuery firstClause, List<SpanQuery>
secondClauses, boolean collectPayloads) {
this(firstClause,collectPayloads);
+
+ if (secondClauses == null){
+ throw new IllegalArgumentException("The list of second clauses cannot be null.");
+ }
+ if (secondClauses.size() < 1){
+ throw new IllegalArgumentException("The list of second clauses cannot be empty.");
+ }
+
for (SpanQuery secondClause : secondClauses){
+ if (secondClause == null){
+ throw new IllegalArgumentException("A second clause cannot be null.");
+ }
checkField(secondClause);
}
this.setClauseList(secondClauses);
@@ -90,7 +107,15 @@
// For rewriting fuzzy searches like wildcard and regex
@Override
public void extractTerms(Set<Term> terms) {
- firstClause.extractTerms(terms);
+
+ if (terms == null){
+ throw new IllegalArgumentException("The term set cannot be null.");
+ }
+
+ if (firstClause != null){
+ firstClause.extractTerms(terms);
+ }
+
if (secondClause != null){
secondClause.extractTerms(terms);
}
@@ -98,8 +123,7 @@
for (SpanQuery clause : clauseList){
clause.extractTerms(terms);
}
- }
-
+ }
};
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java
index 6b01fdb..b8b7bd3 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java
@@ -18,10 +18,10 @@
import de.ids_mannheim.korap.query.spans.UnorderedElementDistanceSpans;
import de.ids_mannheim.korap.query.spans.UnorderedTokenDistanceSpans;
-/** Match two ordered or unordered Spans with minimum and maximum
+/** Match two ordered or unordered spans with some minimum and maximum
* distance constraints. The distance unit can be word (token),
* sentence or paragraph. The distance can also be specified to match
- * some Spans which do NOT co-occur with some other Spans within a min
+ * some spans which do <em>not</em> co-occur with some other Spans within a min
* and max distance.
*
* @author margaretha
@@ -31,7 +31,7 @@
private boolean exclusion;
private boolean isOrdered;
private int minDistance, maxDistance;
- private SpanElementQuery elementQuery; // element distance unit
+ private SpanElementQuery elementQuery; // element distance unit (sentence or paragraph)
private String distanceUnit;
private String spanName;
private DistanceConstraint constraint;
@@ -39,6 +39,11 @@
public SpanDistanceQuery(SpanQuery firstClause, SpanQuery secondClause,
DistanceConstraint constraint, boolean collectPayloads) {
super(firstClause, secondClause, collectPayloads);
+
+ if (constraint == null){
+ throw new IllegalArgumentException("Distance constraint cannot be null.");
+ }
+
this.constraint = constraint;
this.minDistance = constraint.getMinDistance();
this.maxDistance = constraint.getMaxDistance();
@@ -53,32 +58,6 @@
else { spanName = "spanDistance"; }
}
-// public SpanDistanceQuery(SpanQuery firstClause, SpanQuery secondClause,
-// int minDistance, int maxDistance, boolean isOrdered,
-// boolean collectPayloads) {
-// super(firstClause, secondClause, collectPayloads);
-// init(minDistance, maxDistance, isOrdered);
-// distanceUnit = "w";
-// spanName = "spanDistance";
-// }
-//
-// public SpanDistanceQuery(SpanElementQuery elementQuery, SpanQuery firstClause,
-// SpanQuery secondClause, int minDistance, int maxDistance,
-// boolean isOrdered, boolean collectPayloads) {
-// super(firstClause, secondClause, collectPayloads);
-// init(minDistance, maxDistance, isOrdered);
-// this.elementQuery = elementQuery;
-// distanceUnit = elementQuery.getElementStr();
-// spanName = "spanElementDistance";
-// }
-//
-// private void init(int minDistance, int maxDistance,boolean isOrdered){
-// this.minDistance = minDistance;
-// this.maxDistance = maxDistance;
-// this.isOrdered = isOrdered;
-// this.exclusion = false;
-// }
-
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
index a67fd9f..1c30699 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
@@ -14,8 +14,8 @@
import de.ids_mannheim.korap.query.spans.ElementAttributeSpans;
-/** Span enumerations of elements having some specific attribute(s) or <em>not</em>
- * having some attribute(s).
+/** Span enumerations of elements having some specific attribute(s) or
+ * <em>not</em> having some attribute(s).
*
* @author margaretha
* */
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
index 692d7b0..1dab433 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
@@ -1,139 +1,95 @@
package de.ids_mannheim.korap.query;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.ToStringUtils;
-import org.apache.lucene.search.spans.Spans;
-
-import de.ids_mannheim.korap.query.spans.ElementSpans;
-
import java.io.IOException;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
+
+import de.ids_mannheim.korap.query.spans.ElementSpans;
+
/**
- * @author Nils Diewald
+ * @author Nils Diewald, Margaretha
*/
/** Matches spans wrapped by an element. */
-public class SpanElementQuery extends SpanQuery {
- protected Term element;
+public class SpanElementQuery extends SimpleSpanQuery {
+ protected static Term element;
private String elementStr;
- private String field;
/** Constructor. */
- public SpanElementQuery (String field, String term) {
- StringBuilder sb = new StringBuilder("<>:");
- this.field = field;
- this.elementStr = term;
- this.element = new Term(field, sb.append(term).toString());
+ public SpanElementQuery (String field, String term) {
+ super(new SpanTermQuery(
+ (element = new Term(field,"<>:"+term))
+ ),
+ true
+ );
+ this.elementStr = term;
};
-
- /** Return the element whose spans are matched. */
- public Term getElement() { return element; };
-
- @Override
- public String getField() { return element.field(); };
-
- @Override
- public void extractTerms(Set<Term> terms) {
- terms.add(element);
- };
-
- @Override
- public String toString(String field) {
- StringBuilder buffer = new StringBuilder("<");
- buffer.append(this.field).append(':').append(elementStr);
- buffer.append(ToStringUtils.boost(getBoost()));
- return buffer.append(" />").toString();
- };
-
- @Override
- public int hashCode() {
- final int prime = 37; // Instead of 31
- int result = super.hashCode();
- result = prime * result + ((element == null) ? 0 : element.hashCode());
- return result;
- };
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (!super.equals(obj))
- return false;
- if (getClass() != obj.getClass())
- return false;
- SpanElementQuery other = (SpanElementQuery) obj;
- if (element == null) {
- if (other.element != null)
- return false;
- } else if (!element.equals(other.element))
- return false;
- return true;
- };
-
+
@Override
public Spans getSpans(final AtomicReaderContext context,
Bits acceptDocs,
Map<Term,TermContext> termContexts) throws IOException {
- TermContext termContext = termContexts.get(element);
- final TermState state;
- if (termContext == null) {
- // this happens with span-not query,
- // as it doesn't include the NOT side in extractTerms()
- // so we seek to the term now in this segment...,
- // this sucks because its ugly mostly!
- final Fields fields = context.reader().fields();
- if (fields != null) {
- final Terms terms = fields.terms(element.field());
- if (terms != null) {
- final TermsEnum termsEnum = terms.iterator(null);
- if (termsEnum.seekExact(element.bytes(), true))
- state = termsEnum.termState();
- else
- state = null;
- }
- else
- state = null;
- }
- else
- state = null;
- }
- else
- state = termContext.get(context.ord);
-
-
- if (state == null) // term is not present in that reader
- return ElementSpans.EMPTY_ELEMENT_SPANS;
-
- final TermsEnum termsEnum = context.reader().terms(element.field()).iterator(null);
- termsEnum.seekExact(element.bytes(), state);
-
- final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
-
- if (postings != null){
- return new ElementSpans(postings, element);
- }
- // element does exist, but has no positions
- throw new IllegalStateException("field \"" + element.field() + "\" was indexed " +
- "without position data; cannot run " +
- "SpanElementQuery (element=" + element.text() + ")");
+ return new ElementSpans(this, context, acceptDocs, termContexts);
};
public String getElementStr () {
- return elementStr;
+ return elementStr;
};
public void setElementStr (String elementStr) {
- this.elementStr = elementStr;
+ this.elementStr = elementStr;
+ }
+
+ @Override
+ public SimpleSpanQuery clone() {
+ // TODO Auto-generated method stub
+ return null;
+ };
+
+ @Override
+ public void extractTerms(Set<Term> terms) {
+ terms.add(element);
};
+
+ @Override
+ public String toString(String field) {
+ StringBuilder buffer = new StringBuilder("<");
+ buffer.append(getField()).append(':').append(elementStr);
+ buffer.append(ToStringUtils.boost(getBoost()));
+ return buffer.append(" />").toString();
+ };
+
+ @Override
+ public int hashCode() {
+ final int prime = 37; // Instead of 31
+ int result = super.hashCode();
+ result = prime * result + ((element == null) ? 0 : element.hashCode());
+ return result;
+ };
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!super.equals(obj))
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ SpanElementQuery other = (SpanElementQuery) obj;
+ if (element == null) {
+ if (other.element != null)
+ return false;
+ } else if (!element.equals(other.element))
+ return false;
+ return true;
+ };
+
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
index 9e0741e..0c25081 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
@@ -18,8 +18,6 @@
* */
public class SpanSegmentQuery extends SimpleSpanQuery{
- private String spanName;
-
public SpanSegmentQuery(SpanQuery firstClause, SpanQuery secondClause) {
this(firstClause,secondClause,true);
}
@@ -27,7 +25,6 @@
public SpanSegmentQuery(SpanQuery firstClause, SpanQuery secondClause,
boolean collectPayloads) {
super(firstClause,secondClause,collectPayloads);
- spanName = "spanSegment";
}
@Override
@@ -40,10 +37,10 @@
@Override
public SpanSegmentQuery clone() {
SpanSegmentQuery spanSegmentQuery = new SpanSegmentQuery(
- (SpanQuery) firstClause.clone(),
- (SpanQuery) secondClause.clone(),
- collectPayloads
- );
+ (SpanQuery) firstClause.clone(),
+ (SpanQuery) secondClause.clone(),
+ collectPayloads
+ );
spanSegmentQuery.setBoost(getBoost());
return spanSegmentQuery;
}
@@ -51,18 +48,15 @@
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();
- sb.append(this.spanName);
- sb.append("(");
+ sb.append("spanSegment(");
sb.append(firstClause.toString(field));
- sb.append(", ");
+ sb.append(", ");
sb.append(secondClause.toString(field));
sb.append(")");
sb.append(ToStringUtils.boost(getBoost()));
return sb.toString();
}
- //TODO: Where is the hashmap?
-
@Override
public boolean equals(Object o) {
if (this == o) return true;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index 4f468ef..bdd084d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -47,7 +47,7 @@
if (hasMoreSpans) {
currentDoc = firstSpans.doc();
currentPosition = firstSpans.start();
- }
+ }
}
@Override
@@ -56,8 +56,11 @@
return advance();
}
+
+ /** Get the next match by first checking the candidate match list
+ * and setting the list when it is empty.
+ * */
private boolean advance() throws IOException {
-
while(hasMoreSpans || !candidateList.isEmpty()){
if (!candidateList.isEmpty()){
// set AttributeSpan from
@@ -72,16 +75,20 @@
else{
logger.info("Setting candidate list");
setCandidateList();
- for (CandidateAttributeSpan cs: candidateList){
- logger.info("cs ref "+cs.getElementRef());
- }
+// for (CandidateAttributeSpan cs: candidateList){
+// logger.info("cs ref "+cs.getElementRef());
+// }
currentDoc = firstSpans.doc();
currentPosition = firstSpans.start();
}
}
return false;
}
-
+
+ /** Collects all the attributes in the same start position and sort
+ * them by elementRef in reverse order (the ones with the bigger
+ * elementRef first).
+ * */
private void setCandidateList() throws IOException {
while (hasMoreSpans && firstSpans.doc() == currentDoc &&
@@ -96,7 +103,9 @@
Collections.sort(candidateList);
Collections.reverse(candidateList);
}
-
+
+ /** Get the elementRef from payload
+ * */
private short retrieveElementRef(Spans firstSpans) throws IOException {
List<byte[]> payload = (List<byte[]>) firstSpans.getPayload();
long s = System.nanoTime();
@@ -142,7 +151,8 @@
return firstSpans.cost();
}
-
+ /** Match candidate for attribute spans.
+ * */
class CandidateAttributeSpan extends CandidateSpan
implements Comparable<CandidateAttributeSpan>{
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
index 1c9eed8..d963772 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
@@ -64,11 +64,17 @@
isStartEnumeration=false;
return advance();
}
-
+
+ /** Search for the next match by first identify a possible
+ * element position, and then ensuring that the element contains
+ * all the attributes and <em>do not</em> contain any of the
+ * not attributes.
+ * */
private boolean advance() throws IOException {
while (hasMoreSpans && computeElementPosition()){
- logger.info("element: " + elements.start() + ","+ elements.end() +" ref:"+elements.getElementRef());
+ logger.info("element: " + elements.start() + ","+ elements.end() +
+ " ref:"+elements.getElementRef());
if (checkElementRef() && checkNotElementRef()){
this.matchDocNumber = elements.doc();
@@ -85,6 +91,9 @@
return false;
}
+ /** Ensuring all the attribute spans having the same elementRef with
+ * the actual element's elementRef.
+ * */
private boolean checkElementRef() throws IOException{
for (AttributeSpans attribute: attributeList){
@@ -102,7 +111,9 @@
return true;
}
-
+ /** Ensuring elements do not contain the not attributes. In other words,
+ * the elementRef is not the same as the not attribute's elementRefs.
+ * */
private boolean checkNotElementRef() throws IOException{
for (AttributeSpans notAttribute: notAttributeList){
if (elements.start() == notAttribute.start() &&
@@ -115,7 +126,9 @@
return true;
}
-
+ /** Search for a possible element having the same doc and start position as
+ * the attributes.
+ * */
private boolean computeElementPosition() throws IOException {
while (hasMoreSpans){
@@ -137,6 +150,11 @@
return false;
}
+ /** Advancing the not attributes to be in the same or greater doc# than
+ * element doc#. If a not attribute is in the same doc, advance it to
+ * be in the same or greater start position than the element.
+ *
+ * */
private boolean checkNotAttributeListPosition() throws IOException{
for (AttributeSpans a : notAttributeList){
@@ -155,6 +173,9 @@
return true;
}
+ /** Advancing the attributes to be in the same doc and start position
+ * as the element.
+ * */
private boolean checkAttributeListPosition() throws IOException{
int currentPosition = elements.start();
boolean isSame = true;
@@ -178,7 +199,9 @@
return isSame;
}
-
+ /** Advance the element or attribute spans to be in the same doc
+ * and start position.
+ * */
private boolean ensureSamePosition(ElementSpans elements,
AttributeSpans attributes) throws IOException {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index c2a6da1..b5a2f97 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -1,470 +1,223 @@
package de.ids_mannheim.korap.query.spans;
-import de.ids_mannheim.korap.query.spans.KorapTermSpan;
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.util.BytesRef;
-
+import java.io.IOException;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.search.spans.TermSpans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Collection;
-import java.util.LinkedList;
-import java.util.ArrayList;
-import java.util.List;
-
-// TODO: Store payloads in 12 byte instead of the complicated ByteBuffer stuff!
-// Todo: Use copyFrom() instead of clone()
+import de.ids_mannheim.korap.query.SpanElementQuery;
/**
* @author Nils Diewald, margaretha
*
* Use copyFrom instead of clone
*/
-public class ElementSpans extends Spans {
+public class ElementSpans extends SimpleSpans {
- private byte[] payloadByte;
- private ByteBuffer bb = ByteBuffer.allocate(4);
-
- protected final DocsAndPositionsEnum postings;
- protected final Term term;
- private int freq = 0, count = 0;
-
- private LinkedList<KorapTermSpan> memory;
- private KorapTermSpan overflow, current, temp;
-
- public boolean isElementRef = false; // A dummy flag for
-
- public static final ElementSpans EMPTY_ELEMENT_SPANS
- = new EmptyElementSpans();
-
- private final static Logger log = LoggerFactory.getLogger(ElementSpans.class);
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
-
- /**
- * The constructor.
- */
- public ElementSpans(DocsAndPositionsEnum postings, Term term) {
- this.postings = postings;
- this.term = term;
-
- // storedPayload = null;
- this.memory = new LinkedList<KorapTermSpan>();
-
- // Overflow span
- this.overflow = new KorapTermSpan();
-
- // Current span
- this.current = new KorapTermSpan();
-
- // Temporary span
- this.temp = new KorapTermSpan();
- };
-
- // only for EmptyElementSpans (below)
- public ElementSpans() {
- this.term = null;
- this.postings = null;
- };
-
- @Override
- public boolean next() throws IOException {
+ private List<CandidateElementSpans> candidateList;
+ private int currentDoc, currentPosition;
+ private short elementRef;
+ private TermSpans termSpans;
- // There is a memory
- if (this.memory.size() > 0) {
- this.setToCurrent(memory.removeFirst(), 1);
-
- if (DEBUG)
- log.trace(" --- MATCH --- Fetch from memory {}",
- this.current.toString());
-
- return true;
- };
-
- // Last element in document is reached
- if (this.count == this.freq) {
-
- if (this.postings == null)
- return false;
-
-
- // There is an overflow
- if (this.overflow.doc != -1) {
- if (DEBUG)
- log.trace("Fetch from overflow");
-
- this.setToCurrent(this.overflow, 2);
-
- // Reset overflow
- this.overflow.reset();
-
- if (DEBUG)
- log.trace(" --- MATCH --- Fetch from memory {}",
- this.current.toString());
-
- return true;
- };
-
- // There is no next document
- if (!this.nextDoc())
- return false;
- };
-
- // overflow is not empty - let's treat this as current
- if (this.overflow.doc != -1) {
-
- if (DEBUG)
- log.trace("Overflow is not empty");
-
- this.setToCurrent(this.overflow, 3);
-
- // TODO: newOverflow() ???
- this.overflow.reset();
- }
- else {
- if (DEBUG)
- log.trace("Overflow is empty");
-
- // Get next posting - count is still < freq
- this.setToCurrent(4);
-
- if (this.count == this.freq) {
- if (DEBUG)
- log.trace(" --- MATCH --- Direct {}",
- this.current.toString());
- return true;
- };
- };
-
- while (this.count < this.freq) {
-
- // Temp is now the old current
- this.setCurrentToTemp();
-
- // Get new current
- this.setToCurrent(5);
-
- if (DEBUG)
- log.trace("Compare {} with {}",
- this.current.toString(),
- this.temp.toString());
-
- // The next span is not at the same position
- if (this.current.start != this.temp.start) {
-
- // Add this to memory
- if (this.memory.size() > 0) {
- if (DEBUG)
- log.trace("[1] Add to memory {}", this.temp.toString());
- this.memory.add((KorapTermSpan) this.temp.clone());
- this.overflow = this.current;
- break;
- };
-
- // There is no reason to start a memory
- this.overflow = this.current;
- this.current = this.temp;
-
- if (DEBUG)
- log.trace(" --- MATCH --- Fetch from memory {}",
- this.current.toString());
-
- return true;
- }
-
- // The positions are equal
- else {
- if (DEBUG)
- log.trace("[2] Add to memory {}", this.temp.toString());
- this.memory.add((KorapTermSpan) this.temp.clone());
- };
- };
-
- if (this.temp.doc == this.current.doc &&
- this.temp.start == this.current.start) {
- if (DEBUG)
- log.trace("[3] Add to memory {}", this.current.toString());
- this.memory.add((KorapTermSpan) this.current.clone());
- };
-
- // Sort the memory
- Collections.sort(memory);
-
- // There is now a memory
- return this.next();
- };
-
-
- // get next doc
- private boolean nextDoc () throws IOException {
-
- // Check if this doc is the last
- if (this.current.doc == DocIdSetIterator.NO_MORE_DOCS)
- return false;
-
- if (DEBUG)
- log.trace("Go to next document");
-
- this.current.reset();
-
- // Advance to next doc
- this.current.doc = this.postings.nextDoc();
-
- // Check if this doc is the last
- if (this.current.doc == DocIdSetIterator.NO_MORE_DOCS)
- return false;
+ public boolean isElementRef = false; // A dummy flag
- // check frequencies
- this.freq = this.postings.freq();
-
- if (DEBUG)
- log.trace("Document <{}> has {} occurrences",
- this.current.doc,
- this.freq);
-
-
- this.count = 0;
- return true;
- };
-
-
- @Override
- public boolean skipTo(int target) throws IOException {
-
- assert target > this.current.doc;
-
- // Get this doc
- this.current.doc = postings.advance(target);
-
- if (this.current.doc == DocIdSetIterator.NO_MORE_DOCS)
- return false;
-
- if (this.memory != null)
- this.memory.clear();
-
- this.overflow.reset();
+ protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
-
- this.freq = this.postings.freq();
-
- if (DEBUG)
- log.trace("Document {} has {} occurrences", this.current.doc, this.freq);
-
-
- this.count = 0;
-
- if (this.next())
- return true;
-
- return false;
- };
-
-
- @Override
- public int doc() {
- return this.current.doc;
- };
-
-
- @Override
- public int start() {
- return this.current.start;
- };
-
-
- @Override
- public int end() {
- if (!this.current.isPayloadRead){
- try {
- readPayload();
- } catch (IOException e) {
- e.printStackTrace();
- }
+ public ElementSpans(SpanElementQuery spanElementQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(spanElementQuery, context, acceptDocs, termContexts);
+ candidateList = new ArrayList<>();
+ termSpans = (TermSpans) firstSpans;
+ hasMoreSpans = termSpans.next();
+ if (hasMoreSpans) {
+ currentDoc = termSpans.doc();
+ currentPosition = termSpans.start();
}
- return this.current.end;
- };
+ }
- public short getElementRef() throws IOException{
- if (!this.current.isPayloadRead){
- readPayload();
- }
- return this.current.elementRef;
- }
-
- private void readPayload() throws IOException {
-
- this.current.clearPayload();
- BytesRef payload = postings.getPayload();
-
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration=false;
+ return advance();
+ }
+
+ /** Get the next match by first checking the candidate match list
+ * and setting the list when it is empty.
+ * */
+ private boolean advance() throws IOException {
+ while(hasMoreSpans || !candidateList.isEmpty()){
+ if (!candidateList.isEmpty()){
+ CandidateElementSpans cs = candidateList.get(0);
+ this.matchDocNumber = cs.getDoc();
+ this.matchStartPosition = cs.getStart();
+ this.matchEndPosition = cs.getEnd();
+ this.matchPayload = cs.getPayloads();
+ this.setElementRef(cs.getElementRef());
+ candidateList.remove(0);
+ return true;
+ }
+ else{
+ logger.info("Setting candidate list");
+ setCandidateList();
+ currentDoc = termSpans.doc();
+ currentPosition = termSpans.start();
+ }
+ }
+ return false;
+ }
+
+ /** Collect all the elements in the same start position and sort them by
+ * end position (smallest first).
+ * */
+ private void setCandidateList() throws IOException {
+ while (hasMoreSpans && termSpans.doc() == currentDoc &&
+ termSpans.start() == currentPosition){
+ CandidateElementSpans cs = new CandidateElementSpans(termSpans,
+ elementRef);
+ readPayload(cs);
+ candidateList.add(cs);
+ hasMoreSpans = termSpans.next();
+ }
+ Collections.sort(candidateList);
+ }
+
+
+ /** This method reads the payload of the termSpan and assigns the end
+ * position and element ref to the candidate match. The character offset
+ * payload is set as the candidate match payload.
+ * <br/><br/>
+ * <em>Note</em>: payloadbuffer should actually collects all other payload
+ * beside end position and element ref, but KorapIndex identify element's
+ * payload by its length (8), which is only the character offsets. So
+ * these offsets are directly set as the candidate match payload.
+ *
+ * @author margaretha
+ * */
+ private void readPayload(CandidateElementSpans cs) throws IOException {
+ BytesRef payload = termSpans.getPostings().getPayload();
+ //ByteBuffer payloadBuffer = ByteBuffer.allocate(128);
+
if (payload != null) {
- //System.out.println(payload.bytes.length);
-
// Copy some payloads like start character and end character
- this.current.payload.put(payload.bytes, payload.offset, 8);
-
- this.current.end = readEndPostion(payload);
+ //payloadBuffer.put(payload.bytes, payload.offset, 8);
+
+ cs.setEnd(readEndPostion(payload));
if (isElementRef ){
// Copy rest of payloads after the end position and elementref
- this.current.payload.put(payload.bytes, payload.offset + 14, payload.length - 14);
- this.current.elementRef = readElementRef(payload);
+ //payloadBuffer.put(payload.bytes, payload.offset + 14, payload.length - 14);
+ cs.setElementRef(readElementRef(payload));
}
else{
// Copy rest of payloads after the end position
- this.current.payload.put(payload.bytes, payload.offset + 12, payload.length - 12);
- this.current.elementRef = -1;
+ //payloadBuffer.put(payload.bytes, payload.offset + 12, payload.length - 12);
+ cs.setElementRef((short) -1);
}
+
+ //byte[] offsetCharacters = new byte[8];
+ //System.arraycopy(payloadBuffer.array(), 0, offsetCharacters, 0, 8);
+
+ cs.setPayloads(Collections.singletonList(readOffset(payload)));
}
else {
- this.current.end = this.current.start;
- this.current.elementRef = -1;
- };
-
- this.current.isPayloadRead = true;
-
+ cs.setEnd(cs.getStart());
+ cs.setElementRef((short) -1);
+ cs.setPayloads(null);
+ }
}
-
- private short readElementRef(BytesRef payload) {
+
+
+ /** Get the offset bytes from the payload.
+ * */
+ private byte[] readOffset(BytesRef payload){
+ byte[] b = new byte[8];
+ System.arraycopy(payload.bytes, payload.offset, b, 0, 8);
+ return b;
+ }
+
+ /** Get the end position bytes from the payload and cast it to int.
+ * */
+ private int readEndPostion(BytesRef payload) {
+ byte[] b = new byte[4];
+ System.arraycopy(payload.bytes, payload.offset + 8, b, 0, 4);
+ return ByteBuffer.wrap(b).getInt();
+ }
+
+ /** Get the elementRef bytes from the payload and cast it into short.
+ * */
+ private short readElementRef(BytesRef payload) {
byte[] b = new byte[2];
System.arraycopy(payload.bytes, payload.offset + 12, b, 0, 2);
- ByteBuffer wrapper = ByteBuffer.wrap(b);
- return wrapper.getShort();
- }
-
-
-
- private int readEndPostion(BytesRef payload) {
-
- this.payloadByte = new byte[4];
- // Copy end position integer to payloadByte
- System.arraycopy(payload.bytes, payload.offset + 8, this.payloadByte, 0, 4);
-
- bb.clear();
- int t = bb.wrap(payloadByte).getInt();
-
- if (DEBUG)
- log.trace("Get Endposition and payload: {}-{} with end position {} in doc {}",
- this.current.payload.getInt(0),
- this.current.payload.getInt(4),
- t,
- this.current.doc);
-
- return t;
+ return ByteBuffer.wrap(b).getShort();
}
@Override
- public long cost() {
- // ???
- return this.postings.cost();
- };
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)){
+ if (!firstSpans.skipTo(target)){
+ candidateList.clear();
+ return false;
+ }
+ }
+ setCandidateList();
+ matchPayload.clear();
+ isStartEnumeration=false;
+ return advance();
+ }
-
- @Override
- public Collection<byte[]> getPayload() throws IOException {
- byte[] offsetCharacters = new byte[8];
- if (!this.current.isPayloadRead)
- readPayload();
-
- System.arraycopy(this.current.payload.array(), 0, offsetCharacters, 0, 8);
-
- return Collections.singletonList(offsetCharacters);
- };
-
-
- /**
- * Sets KorapTermSpan to current element
- */
- private void setToCurrent (KorapTermSpan act, int debugNumber) {
-
- if (DEBUG)
- log.trace(
- "[{}] Set to current with {}",
- debugNumber,
- act.toString()
- );
-
- this.current = (KorapTermSpan) act.clone();
- };
-
- /**
- * Sets KorapTermSpan to current element
- */
- private void setToCurrent (int debugNumber) throws IOException {
+ @Override
+ public long cost() {
+ return termSpans.cost();
+ }
- this.current.start = this.postings.nextPosition();
- // This will directly save stored payloads
- //this.current.end = this.getPayloadEndPosition();
- readPayload();
+ public short getElementRef() {
+ return elementRef;
+ }
- if (DEBUG)
- log.trace(
- "[{}] Set new to current with {}",
- debugNumber,
- this.current.toString()
- );
-
- this.count++;
- };
-
- private void setCurrentToTemp () {
- this.temp = (KorapTermSpan) this.current.clone();
- // this.temp.copyFrom(this.current);
- };
-
-
- @Override
- public boolean isPayloadAvailable() throws IOException {
-
- if (current.payload != null)
- return true;
+ public void setElementRef(short elementRef) {
+ this.elementRef = elementRef;
+ }
- return false;
- };
-
-
- @Override
- public String toString() {
- return "spans(" + this.term.toString() + ")@" +
- (this.current.doc == -1 ? "START" : (this.current.doc == Integer.MAX_VALUE) ? "END" : this.current.doc + "-" + this.current.start);
- };
-
- public DocsAndPositionsEnum getPostings() {
- return postings;
- };
-
- private static final class EmptyElementSpans extends ElementSpans {
-
- @Override
- public boolean next() { return false; };
-
- @Override
- public boolean skipTo(int target) { return false; };
-
- @Override
- public int doc() { return DocIdSetIterator.NO_MORE_DOCS; };
-
- @Override
- public int start() { return -1; };
-
- @Override
- public int end() { return -1; };
-
- @Override
- public Collection<byte[]> getPayload() { return null; };
-
- @Override
- public boolean isPayloadAvailable() { return false; };
-
- @Override
- public long cost() { return 0; };
- };
+ /** Match candidate for element spans.
+ * */
+ class CandidateElementSpans extends CandidateSpan
+ implements Comparable<CandidateElementSpans>{
+
+ private short elementRef;
+
+ public CandidateElementSpans(Spans span, short elementRef)
+ throws IOException {
+ super(span);
+ setElementRef(elementRef);
+ }
+
+ public void setElementRef(short elementRef) {
+ this.elementRef = elementRef;
+ }
+ public short getElementRef() {
+ return elementRef;
+ }
+
+ @Override
+ public int compareTo(CandidateElementSpans o) {
+ if (this.getEnd() == o.getEnd())
+ return 0;
+ else if (this.getEnd() > o.getEnd() )
+ return 1;
+ return -1;
+ }
+ }
};