Added parameter checking, changed SpanElementQuery and ElementSpans, added comments.

commit: c7fb73173e3570140d2d7149b29c546ac9b7111d [log] [tgz]
author: Eliza Margaretha <margaretha@ids-mannheim.de> Fri Jul 25 14:11:36 2014 +0000
committer: Eliza Margaretha <margaretha@ids-mannheim.de> Fri Jul 25 14:11:36 2014 +0000
tree: 9c285e9be6e035e6479ad03793f794c5f6af03c9
parent: dc8dc34f33018dd7bb2d038a798ced88ab2b0208 [diff]
diff --git a/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java b/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java
index 9a92307..11e330b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java
+++ b/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java

@@ -1,6 +1,6 @@
 package de.ids_mannheim.korap.query;
 
-/**	Specify constraints of distance in SpanDistanceQueries or 
+/**	Specify distance constraints in SpanDistanceQueries or 
  * 	SpanMultipleDistanceQueries
  * 	 
  * 	@author margaretha
@@ -23,6 +23,10 @@
 	
 	public DistanceConstraint(SpanElementQuery elementQuery, int min, int max, boolean 
 			isOrdered, boolean exclusion) {
+		if (elementQuery == null){
+			throw new IllegalArgumentException("Element query cannot be null.");
+		}
+		
 		this.unit = elementQuery.getElementStr();
 		this.minDistance = min;
 		this.maxDistance = max;
@@ -31,23 +35,6 @@
 		this.elementQuery = elementQuery;
 	}
 	
-	
-//	public DistanceConstraint(int min, int max, boolean exclusion) {
-//		this.unit = "w";
-//		this.minDistance = min;
-//		this.maxDistance = max;
-//		this.exclusion = exclusion;
-//	}
-//	
-//	public DistanceConstraint(SpanElementQuery elementQuery, int min, int max, 
-//			boolean exclusion) {
-//		this.unit = elementQuery.getElementStr();
-//		this.minDistance = min;
-//		this.maxDistance = max;
-//		this.exclusion = exclusion;		
-//		this.elementQuery = elementQuery;		
-//	}
-	
 	public int getMinDistance() {
 		return minDistance;
 	}

diff --git a/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
index 3582ca0..6bc3370 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java

@@ -20,8 +20,11 @@
 	protected List<SpanQuery> clauseList;
 	private String field;
 	protected boolean collectPayloads;
-    
+    	
 	public SimpleSpanQuery(SpanQuery firstClause, boolean collectPayloads) {
+		if (firstClause == null){
+			throw new IllegalArgumentException("The first clause cannot be null.");
+		}
     	this.field = firstClause.getField();
     	this.setFirstClause(firstClause);
     	this.collectPayloads = collectPayloads;
@@ -30,6 +33,9 @@
     public SimpleSpanQuery(SpanQuery firstClause, SpanQuery secondClause, 
     		boolean collectPayloads) {
     	this(firstClause,collectPayloads);
+    	if (secondClause == null){
+			throw new IllegalArgumentException("The second clause cannot be null.");
+		}
     	checkField(secondClause);
     	this.setSecondClause(secondClause);  	
 	}
@@ -37,7 +43,18 @@
     public SimpleSpanQuery(SpanQuery firstClause, List<SpanQuery> 
     		secondClauses, boolean collectPayloads) {
     	this(firstClause,collectPayloads);
+    	
+		if (secondClauses == null){
+			throw new IllegalArgumentException("The list of second clauses cannot be null.");
+		}
+		if (secondClauses.size() < 1){
+			throw new IllegalArgumentException("The list of second clauses cannot be empty.");
+		}
+    	
     	for (SpanQuery secondClause : secondClauses){
+    		if (secondClause == null){
+    			throw new IllegalArgumentException("A second clause cannot be null.");
+    		}
 	    	checkField(secondClause);
 		}
     	this.setClauseList(secondClauses);
@@ -90,7 +107,15 @@
 	// For rewriting fuzzy searches like wildcard and regex
 	@Override
     public void extractTerms(Set<Term> terms) {
-		firstClause.extractTerms(terms);
+		
+		if (terms == null){
+			throw new IllegalArgumentException("The term set cannot be null.");
+		}
+		
+		if (firstClause != null){
+			firstClause.extractTerms(terms);
+		}
+		
 		if (secondClause != null){
 			secondClause.extractTerms(terms);
 		}
@@ -98,8 +123,7 @@
 			for (SpanQuery clause : clauseList){
 				clause.extractTerms(terms);
 			}
-		}
-			
+		}			
     };
     
 	@Override

diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java
index 6b01fdb..b8b7bd3 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java

@@ -18,10 +18,10 @@
 import de.ids_mannheim.korap.query.spans.UnorderedElementDistanceSpans;
 import de.ids_mannheim.korap.query.spans.UnorderedTokenDistanceSpans;
 
-/** Match two ordered or unordered Spans with minimum and maximum 
+/** Match two ordered or unordered spans with some minimum and maximum 
  * 	distance constraints. The distance unit can be word (token), 
  * 	sentence or paragraph. The distance can also be specified to match 
- * 	some Spans which do NOT co-occur with some other Spans within a min 
+ * 	some spans which do <em>not</em> co-occur with some other Spans within a min 
  * 	and max distance. 
  * 
  * 	@author margaretha
@@ -31,7 +31,7 @@
 	private boolean exclusion;
 	private boolean isOrdered;	
 	private int minDistance, maxDistance;	
-	private SpanElementQuery elementQuery; // element distance unit
+	private SpanElementQuery elementQuery; // element distance unit (sentence or paragraph)
 	private String distanceUnit;
 	private String spanName;
 	private DistanceConstraint constraint;
@@ -39,6 +39,11 @@
 	public SpanDistanceQuery(SpanQuery firstClause, SpanQuery secondClause, 
 			DistanceConstraint constraint, boolean collectPayloads) {
 		super(firstClause, secondClause, collectPayloads);
+		
+		if (constraint == null){
+			throw new IllegalArgumentException("Distance constraint cannot be null.");
+		}
+		
 		this.constraint = constraint;
 		this.minDistance = constraint.getMinDistance();
 		this.maxDistance = constraint.getMaxDistance();
@@ -53,32 +58,6 @@
 		else { spanName = "spanDistance"; }
 	}
 	
-//	public SpanDistanceQuery(SpanQuery firstClause, SpanQuery secondClause, 
-//			int minDistance, int maxDistance, boolean isOrdered, 
-//			boolean collectPayloads) {		
-//		super(firstClause, secondClause, collectPayloads); 
-//		init(minDistance, maxDistance, isOrdered);
-//		distanceUnit = "w";
-//		spanName = "spanDistance";
-//	}
-//	
-//	public SpanDistanceQuery(SpanElementQuery elementQuery, SpanQuery firstClause, 
-//			SpanQuery secondClause, int minDistance, int maxDistance, 
-//			boolean isOrdered, boolean collectPayloads) {
-//		super(firstClause, secondClause, collectPayloads);
-//    	init(minDistance, maxDistance, isOrdered);
-//		this.elementQuery = elementQuery;
-//		distanceUnit = elementQuery.getElementStr();
-//		spanName = "spanElementDistance";
-//	}
-//	
-//	private void init(int minDistance, int maxDistance,boolean isOrdered){
-//		this.minDistance = minDistance;
-//		this.maxDistance = maxDistance;
-//		this.isOrdered = isOrdered;
-//		this.exclusion = false;
-//	}
-	
 	@Override
 	public String toString(String field) {
 		StringBuilder sb = new StringBuilder();

diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
index a67fd9f..1c30699 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java

@@ -14,8 +14,8 @@
 
 import de.ids_mannheim.korap.query.spans.ElementAttributeSpans;
 
-/** Span enumerations of elements having some specific attribute(s) or <em>not</em>
- * 	having some attribute(s).
+/** Span enumerations of elements having some specific attribute(s) or 
+ * 	<em>not</em> having some attribute(s).
  * 
  * 	@author margaretha
  * */

diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
index 692d7b0..1dab433 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java

@@ -1,139 +1,95 @@
 package de.ids_mannheim.korap.query;
 
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.ToStringUtils;
-import org.apache.lucene.search.spans.Spans;
-
-import de.ids_mannheim.korap.query.spans.ElementSpans;
-
 import java.io.IOException;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
+
+import de.ids_mannheim.korap.query.spans.ElementSpans;
+
 /** 
- * 	@author Nils Diewald
+ * 	@author Nils Diewald, Margaretha
  */
 
 /** Matches spans wrapped by an element. */
-public class SpanElementQuery extends SpanQuery {
-    protected Term element;
+public class SpanElementQuery extends SimpleSpanQuery {
+    protected static Term element;
     private String elementStr;
-    private String field;
     
     /** Constructor. */
-    public SpanElementQuery (String field, String term) {
-	StringBuilder sb = new StringBuilder("<>:");
-	this.field = field;
-	this.elementStr = term;
-	this.element = new Term(field, sb.append(term).toString());
+    public SpanElementQuery (String field, String term) {   
+    	super(new SpanTermQuery(
+    			(element = new Term(field,"<>:"+term))
+    		  ),
+    		true
+		);
+    	this.elementStr = term;
     };
-        
-    /** Return the element whose spans are matched. */
-    public Term getElement() { return element; };
-
-    @Override
-    public String getField() { return element.field(); };
-  
-    @Override
-    public void extractTerms(Set<Term> terms) {
-	terms.add(element);
-    };
-
-    @Override
-    public String toString(String field) {
-	StringBuilder buffer = new StringBuilder("<");
-	buffer.append(this.field).append(':').append(elementStr);
-	buffer.append(ToStringUtils.boost(getBoost()));
-	return buffer.append(" />").toString();
-    };
-
-    @Override
-    public int hashCode() {
-	final int prime = 37; // Instead of 31
-	int result = super.hashCode();
-	result = prime * result + ((element == null) ? 0 : element.hashCode());
-	return result;
-    };
-
-    @Override
-    public boolean equals(Object obj) {
-	if (this == obj)
-	    return true;
-	if (!super.equals(obj))
-	    return false;
-	if (getClass() != obj.getClass())
-	    return false;
-	SpanElementQuery other = (SpanElementQuery) obj;
-	if (element == null) {
-	    if (other.element != null)
-		return false;
-	} else if (!element.equals(other.element))
-	    return false;
-	return true;
-    };
-
+    
     @Override
     public Spans getSpans(final AtomicReaderContext context,
 			  Bits acceptDocs,
 			  Map<Term,TermContext> termContexts) throws IOException {
-	TermContext termContext = termContexts.get(element);
-	final TermState state;
-	if (termContext == null) {
-	    // this happens with span-not query,
-	    // as it doesn't include the NOT side in extractTerms()
-	    // so we seek to the term now in this segment...,
-	    // this sucks because its ugly mostly!
-	    final Fields fields = context.reader().fields();
-	    if (fields != null) {
-		final Terms terms = fields.terms(element.field());
-		if (terms != null) {
-		    final TermsEnum termsEnum = terms.iterator(null);
-		    if (termsEnum.seekExact(element.bytes(), true))
-			state = termsEnum.termState();
-		    else
-			state = null;
-		}
-		else
-		    state = null;
-	    }
-	    else
-		state = null;
-	}
-	else
-	    state = termContext.get(context.ord);
-
-	
-	if (state == null) // term is not present in that reader
-	    return ElementSpans.EMPTY_ELEMENT_SPANS;
-    
-	final TermsEnum termsEnum = context.reader().terms(element.field()).iterator(null);
-	termsEnum.seekExact(element.bytes(), state);
-    
-	final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
-	
-	if (postings != null){		
-	    return new ElementSpans(postings, element);
-	}
-	// element does exist, but has no positions
-	throw new IllegalStateException("field \"" + element.field() + "\" was indexed " +
-					"without position data; cannot run " +
-					"SpanElementQuery (element=" + element.text() + ")");
+    	return new ElementSpans(this, context, acceptDocs, termContexts);
     };
 
 	public String getElementStr () {
-	return elementStr;
+		return elementStr;
     };
 
     public void setElementStr (String elementStr) {
-	this.elementStr = elementStr;
+    	this.elementStr = elementStr;
+    }
+
+	@Override
+	public SimpleSpanQuery clone() {
+		// TODO Auto-generated method stub
+		return null;
+	};
+	
+    @Override
+    public void extractTerms(Set<Term> terms) {
+    	terms.add(element);
     };
+
+    @Override
+    public String toString(String field) {
+    	StringBuilder buffer = new StringBuilder("<");
+    	buffer.append(getField()).append(':').append(elementStr);
+    	buffer.append(ToStringUtils.boost(getBoost()));
+    	return buffer.append(" />").toString();
+    };
+    
+    @Override
+    public int hashCode() {
+    	final int prime = 37; // Instead of 31
+    	int result = super.hashCode();
+    	result = prime * result + ((element == null) ? 0 : element.hashCode());
+    	return result;
+    };
+
+    @Override
+    public boolean equals(Object obj) {
+		if (this == obj)
+		    return true;
+		if (!super.equals(obj))
+		    return false;
+		if (getClass() != obj.getClass())
+		    return false;
+		SpanElementQuery other = (SpanElementQuery) obj;
+		if (element == null) {
+		    if (other.element != null)
+			return false;
+		} else if (!element.equals(other.element))
+		    return false;
+		return true;
+    };
+ 
 };

diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
index 9e0741e..0c25081 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java

@@ -18,8 +18,6 @@
  * */
 public class SpanSegmentQuery extends SimpleSpanQuery{
 	
-	private String spanName;
-	
 	public SpanSegmentQuery(SpanQuery firstClause, SpanQuery secondClause) {
 		this(firstClause,secondClause,true);
 	}
@@ -27,7 +25,6 @@
 	public SpanSegmentQuery(SpanQuery firstClause, SpanQuery secondClause, 
 			boolean collectPayloads) { 
     	super(firstClause,secondClause,collectPayloads);
-    	spanName = "spanSegment";
 	}
 	
 	@Override
@@ -40,10 +37,10 @@
 	@Override
 	public SpanSegmentQuery clone() {
 		SpanSegmentQuery spanSegmentQuery = new SpanSegmentQuery(
-			    (SpanQuery) firstClause.clone(),
-			    (SpanQuery) secondClause.clone(),
-			    collectPayloads
-		        );
+		    (SpanQuery) firstClause.clone(),
+		    (SpanQuery) secondClause.clone(),
+		    collectPayloads
+        );
 		spanSegmentQuery.setBoost(getBoost());
 		return spanSegmentQuery;		
 	}
@@ -51,18 +48,15 @@
     @Override
 	public String toString(String field) {
 		StringBuilder sb = new StringBuilder();
-		sb.append(this.spanName);
-		sb.append("(");
+		sb.append("spanSegment(");
 		sb.append(firstClause.toString(field));
-	        sb.append(", ");
+        sb.append(", ");
 		sb.append(secondClause.toString(field));
 		sb.append(")");
 		sb.append(ToStringUtils.boost(getBoost()));
 		return sb.toString();	
     }
 	
-	//TODO: Where is the hashmap?
-		
     @Override
     public boolean equals(Object o) {
 		if (this == o) return true;

diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index 4f468ef..bdd084d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java

@@ -47,7 +47,7 @@
 		if (hasMoreSpans) {
 			currentDoc = firstSpans.doc();
 			currentPosition = firstSpans.start();
-		}
+		}		
 	}
 
 	@Override
@@ -56,8 +56,11 @@
 		return advance();
 	}
 
+
+	/**	Get the next match by first checking the candidate match list
+	 * 	and setting the list when it is empty.
+	 * */
 	private boolean advance() throws IOException {		
-		
 		while(hasMoreSpans || !candidateList.isEmpty()){
 			if (!candidateList.isEmpty()){
 				// set AttributeSpan from 
@@ -72,16 +75,20 @@
 			else{
 				logger.info("Setting candidate list");
 				setCandidateList();
-				for (CandidateAttributeSpan cs: candidateList){
-					logger.info("cs ref "+cs.getElementRef());
-				}
+//				for (CandidateAttributeSpan cs: candidateList){
+//					logger.info("cs ref "+cs.getElementRef());
+//				}
 				currentDoc = firstSpans.doc();
 				currentPosition = firstSpans.start();
 			}
 		}
 		return false;
 	}
-
+	
+	/**	Collects all the attributes in the same start position and sort
+	 * 	them by elementRef in reverse order (the ones with the bigger 
+	 * 	elementRef first). 
+	 * */
 	private void setCandidateList() throws IOException {
 		
 		while (hasMoreSpans &&	firstSpans.doc() == currentDoc && 
@@ -96,7 +103,9 @@
 		Collections.sort(candidateList);
 		Collections.reverse(candidateList);
 	}
-
+	
+	/**	Get the elementRef from payload
+	 * */
 	private short retrieveElementRef(Spans firstSpans) throws IOException {		
 		List<byte[]> payload = (List<byte[]>) firstSpans.getPayload();
 		long s = System.nanoTime();
@@ -142,7 +151,8 @@
 		return firstSpans.cost();
 	}
 	
-	
+	/** Match candidate for attribute spans.
+	 * */
 	class CandidateAttributeSpan extends CandidateSpan 
 			implements Comparable<CandidateAttributeSpan>{
 

diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
index 1c9eed8..d963772 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java

@@ -64,11 +64,17 @@
 		isStartEnumeration=false;
 		return advance();
 	}
-		
+	
+	/** Search for the next match by first identify a possible 
+	 * 	element position, and then ensuring that the element contains
+	 * 	all the attributes and <em>do not</em> contain any of the 
+	 *  not attributes.
+	 * */
 	private boolean advance() throws IOException {
 		
 		while (hasMoreSpans && computeElementPosition()){			
-			logger.info("element: " + elements.start() + ","+ elements.end() +" ref:"+elements.getElementRef());
+			logger.info("element: " + elements.start() + ","+ elements.end() +
+					" ref:"+elements.getElementRef());
 			
 			if (checkElementRef() && checkNotElementRef()){			
 				this.matchDocNumber = elements.doc();
@@ -85,6 +91,9 @@
 		return false;
 	}
 	
+	/** Ensuring all the attribute spans having the same elementRef with 
+	 * 	the actual element's elementRef.
+	 * */
 	private boolean checkElementRef() throws IOException{
 		
 		for (AttributeSpans attribute: attributeList){			
@@ -102,7 +111,9 @@
 		return true;
 	}
 	
-	
+	/** Ensuring elements do not contain the not attributes. In other words, 
+	 * 	the elementRef is not the same as the not attribute's elementRefs. 
+	 * */
 	private boolean checkNotElementRef() throws IOException{
 		for (AttributeSpans notAttribute: notAttributeList){
 			if (elements.start() == notAttribute.start() &&
@@ -115,7 +126,9 @@
 		return true;
 	}
 	
-	
+	/**	Search for a possible element having the same doc and start position as
+	 * 	the attributes.
+	 * */
 	private boolean computeElementPosition() throws IOException {		
 
 		while (hasMoreSpans){
@@ -137,6 +150,11 @@
 		return false;
 	}
 	
+	/**	Advancing the not attributes to be in the same or greater doc# than 
+	 * 	element doc#. If a not attribute is in the same doc, advance it to
+	 * 	be in the same or greater start position than the element.
+	 * 
+	 * */
 	private boolean checkNotAttributeListPosition() throws IOException{
 		
 		for (AttributeSpans a : notAttributeList){
@@ -155,6 +173,9 @@
 		return true;
 	}
 	
+	/** Advancing the attributes to be in the same doc and start position 
+	 * 	as the element.
+	 * */
 	private boolean checkAttributeListPosition() throws IOException{
 		int currentPosition = elements.start();
 		boolean isSame = true;
@@ -178,7 +199,9 @@
 		return isSame;
 	}
 	
-	
+	/** Advance the element or attribute spans to be in the same doc 
+	 * 	and start position.
+	 * */
 	private boolean ensureSamePosition(ElementSpans elements,
 			AttributeSpans attributes) throws IOException {
 		

diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index c2a6da1..b5a2f97 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java

@@ -1,470 +1,223 @@
 package de.ids_mannheim.korap.query.spans;
 
-import de.ids_mannheim.korap.query.spans.KorapTermSpan;
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.util.BytesRef;
-
+import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
 
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.search.spans.TermSpans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Collection;
-import java.util.LinkedList;
-import java.util.ArrayList;
-import java.util.List;
-
-// TODO: Store payloads in 12 byte instead of the complicated ByteBuffer stuff!
-// Todo: Use copyFrom() instead of clone()
+import de.ids_mannheim.korap.query.SpanElementQuery;
 
 /**  
  * @author Nils Diewald, margaretha
  *
  * Use copyFrom instead of clone
  */
-public class ElementSpans extends Spans {
+public class ElementSpans extends SimpleSpans {
 
-    private byte[] payloadByte;
-    private ByteBuffer bb = ByteBuffer.allocate(4);
-
-    protected final DocsAndPositionsEnum postings;
-    protected final Term term;
-    private int freq = 0, count = 0;
-    
-    private LinkedList<KorapTermSpan> memory;
-    private KorapTermSpan overflow, current, temp;
-    
-	public boolean isElementRef = false; // A dummy flag for 
-    
-    public static final ElementSpans EMPTY_ELEMENT_SPANS
-	= new EmptyElementSpans();
-
-    private final static Logger log = LoggerFactory.getLogger(ElementSpans.class);
-    // This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = false;
-
-    
-    /**
-     * The constructor.
-     */
-    public ElementSpans(DocsAndPositionsEnum postings, Term term) {
-	this.postings = postings;
-	this.term = term;
-
-	// storedPayload = null;
-	this.memory   = new LinkedList<KorapTermSpan>();
-
-	// Overflow span
-	this.overflow = new KorapTermSpan();
-
-	// Current span
-	this.current = new KorapTermSpan();
-
-    	// Temporary span
-	this.temp = new KorapTermSpan();
-    };
-    
-    // only for EmptyElementSpans (below)
-    public ElementSpans() {
-	this.term = null;
-	this.postings = null;
-    };
-
-    @Override
-    public boolean next() throws IOException {
+	private List<CandidateElementSpans> candidateList;
+	private int currentDoc, currentPosition;
+	private short elementRef;
+	private TermSpans termSpans;
 	
-	// There is a memory
-	if (this.memory.size() > 0) {
-	    this.setToCurrent(memory.removeFirst(), 1);
-
-	    if (DEBUG)
-		log.trace(" --- MATCH --- Fetch from memory {}",
-			  this.current.toString());
-	    
-	    return true;
-	};
-
-	// Last element in document is reached
-	if (this.count == this.freq) {
-
-	    if (this.postings == null)
-		return false;
-
-
-	    // There is an overflow
-	    if (this.overflow.doc != -1) {
-		if (DEBUG)
-		    log.trace("Fetch from overflow");
-
-		this.setToCurrent(this.overflow, 2);
-
-		// Reset overflow
-		this.overflow.reset();
-
-		if (DEBUG)
-		    log.trace(" --- MATCH --- Fetch from memory {}",
-			      this.current.toString());
-	       
-		return true;
-	    };
-
-	    // There is no next document
-	    if (!this.nextDoc())
-		return false;
-	};
-
-	// overflow is not empty - let's treat this as current
-	if (this.overflow.doc != -1) {
-
-	    if (DEBUG)
-		log.trace("Overflow is not empty");
-	    
-	    this.setToCurrent(this.overflow, 3);
-
-	    // TODO: newOverflow() ???
-	    this.overflow.reset();
-	}
-	else {
-	    if (DEBUG)
-		log.trace("Overflow is empty");
-
-	    // Get next posting - count is still < freq
-	    this.setToCurrent(4);
-
-	    if (this.count == this.freq) {
-		if (DEBUG)
-		    log.trace(" --- MATCH --- Direct {}",
-			      this.current.toString());
-		return true;
-	    };
-	};
-
-	while (this.count < this.freq) {
-
-	    // Temp is now the old current
-	    this.setCurrentToTemp();
-
-	    // Get new current
-	    this.setToCurrent(5);
-
-	    if (DEBUG)
-		log.trace("Compare {} with {}",
-			  this.current.toString(),
-			  this.temp.toString());
-
-	    // The next span is not at the same position
-	    if (this.current.start != this.temp.start) {
-
-		// Add this to memory
-		if (this.memory.size() > 0) {
-		    if (DEBUG)
-			log.trace("[1] Add to memory {}", this.temp.toString());
-		    this.memory.add((KorapTermSpan) this.temp.clone());
-		    this.overflow = this.current;
-		    break;
-		};
-
-		// There is no reason to start a memory
-		this.overflow = this.current;
-		this.current = this.temp;
-
-		if (DEBUG)
-		    log.trace(" --- MATCH --- Fetch from memory {}",
-			      this.current.toString());
-
-		return true;
-	    }
-
-	    // The positions are equal
-	    else {
-		if (DEBUG)
-		    log.trace("[2] Add to memory {}", this.temp.toString());
-		this.memory.add((KorapTermSpan) this.temp.clone());
-	    };
-	};
-
-	if (this.temp.doc == this.current.doc &&
-	    this.temp.start == this.current.start) {
-	    if (DEBUG)
-		log.trace("[3] Add to memory {}", this.current.toString());
-	    this.memory.add((KorapTermSpan) this.current.clone());
-	};
-
-	// Sort the memory
-	Collections.sort(memory);
-
-	// There is now a memory
-	return this.next();
-    };
-    
-
-    // get next doc
-    private boolean nextDoc () throws IOException {
-
-	// Check if this doc is the last
-	if (this.current.doc == DocIdSetIterator.NO_MORE_DOCS)
-	    return false;
-
-	if (DEBUG)
-	    log.trace("Go to next document");
-
-	this.current.reset();
-
-	// Advance to next doc
-	this.current.doc = this.postings.nextDoc();
-
-	// Check if this doc is the last
-	if (this.current.doc == DocIdSetIterator.NO_MORE_DOCS)
-	    return false;
+	public boolean isElementRef = false; // A dummy flag
 	
-	// check frequencies
-	this.freq = this.postings.freq();
-
-	if (DEBUG)
-	    log.trace("Document <{}> has {} occurrences",
-		      this.current.doc,
-		      this.freq);
-
-
-	this.count = 0;
-	return true;
-    };
-
-    
-    @Override
-    public boolean skipTo(int target) throws IOException {
-
-	assert target > this.current.doc;
-
-	// Get this doc
-	this.current.doc = postings.advance(target);
-
-	if (this.current.doc == DocIdSetIterator.NO_MORE_DOCS)
-	    return false;
-
-	if (this.memory != null)
-	    this.memory.clear();
-
-	this.overflow.reset();
+	protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
 	
-
-	this.freq = this.postings.freq();
-
-	if (DEBUG)
-	    log.trace("Document {} has {} occurrences", this.current.doc, this.freq);
-
-	
-	this.count = 0;
-
-	if (this.next())
-	    return true;
-
-	return false;
-    };
-
-    
-    @Override
-    public int doc() {
-	return this.current.doc;
-    };
-
-    
-    @Override
-    public int start() {
-	return this.current.start;
-    };
-
-    
-    @Override
-    public int end() {
-		if (!this.current.isPayloadRead){		    
-			try {
-				readPayload();
-			} catch (IOException e) {
-				e.printStackTrace();
-			}			
+	public ElementSpans(SpanElementQuery spanElementQuery,
+			AtomicReaderContext context, Bits acceptDocs,
+			Map<Term, TermContext> termContexts) throws IOException {
+		super(spanElementQuery, context, acceptDocs, termContexts);
+		candidateList = new ArrayList<>();
+		termSpans = (TermSpans) firstSpans;
+		hasMoreSpans = termSpans.next();
+		if (hasMoreSpans) {
+			currentDoc = termSpans.doc();
+			currentPosition = termSpans.start();
 		}
-		return this.current.end;
-    };
+	}
 
-    public short getElementRef() throws IOException{
-    	if (!this.current.isPayloadRead){
-    		readPayload();
-    	}
-    	return this.current.elementRef;
-    }
-    
-    private void readPayload() throws IOException {   	
-    	
-    	this.current.clearPayload();
-	    BytesRef payload = postings.getPayload();
-	    	    
+	@Override
+	public boolean next() throws IOException {
+		isStartEnumeration=false;
+		return advance();
+	}
+	
+	/**	Get the next match by first checking the candidate match list
+	 * 	and setting the list when it is empty.
+	 * */
+	private boolean advance() throws IOException {
+		while(hasMoreSpans || !candidateList.isEmpty()){
+			if (!candidateList.isEmpty()){
+				CandidateElementSpans cs = candidateList.get(0);
+				this.matchDocNumber = cs.getDoc();
+				this.matchStartPosition = cs.getStart();
+				this.matchEndPosition = cs.getEnd();
+				this.matchPayload = cs.getPayloads();				
+				this.setElementRef(cs.getElementRef());				
+				candidateList.remove(0);
+				return true;
+			}
+			else{
+				logger.info("Setting candidate list");
+				setCandidateList();				
+				currentDoc = termSpans.doc();
+				currentPosition = termSpans.start();
+			}
+		}
+		return false;
+	}
+
+	/**	Collect all the elements in the same start position and sort them by
+	 * 	end position (smallest first).
+	 * */
+	private void setCandidateList() throws IOException {
+		while (hasMoreSpans &&	termSpans.doc() == currentDoc && 
+				termSpans.start() == currentPosition){
+			CandidateElementSpans cs = new CandidateElementSpans(termSpans,
+					elementRef);
+			readPayload(cs);
+			candidateList.add(cs);
+			hasMoreSpans = termSpans.next();
+		}
+		Collections.sort(candidateList);
+	}
+	
+	
+	/**	This method reads the payload of the termSpan and assigns the end 
+	 * 	position and element ref to the candidate match. The character offset
+	 *  payload is set as the candidate match payload.
+	 *  <br/><br/>
+	 * 	<em>Note</em>: payloadbuffer should actually collects all other payload
+	 * 	beside end position and element ref, but KorapIndex identify element's 
+	 * 	payload by its length (8), which is only the character offsets. So
+	 * 	these offsets are directly set as the candidate match payload.	
+	 * 
+	 * 	@author margaretha
+	 * */
+	private void readPayload(CandidateElementSpans cs) throws IOException {   	
+	    BytesRef payload = termSpans.getPostings().getPayload();
+	    //ByteBuffer payloadBuffer = ByteBuffer.allocate(128);
+	    
 	    if (payload != null) {
-	    	//System.out.println(payload.bytes.length);
-
 			// Copy some payloads like start character and end character
-			this.current.payload.put(payload.bytes, payload.offset, 8);
-
-			this.current.end = readEndPostion(payload);
+	    	//payloadBuffer.put(payload.bytes, payload.offset, 8);
+			
+			cs.setEnd(readEndPostion(payload));
 			
 			if (isElementRef ){
 				// Copy rest of payloads after the end position and elementref
-				this.current.payload.put(payload.bytes, payload.offset + 14, payload.length - 14);				
-				this.current.elementRef = readElementRef(payload);
+				//payloadBuffer.put(payload.bytes, payload.offset + 14, payload.length - 14);				
+				cs.setElementRef(readElementRef(payload));
 			}
 			else{
 				// Copy rest of payloads after the end position
-				this.current.payload.put(payload.bytes, payload.offset + 12, payload.length - 12);
-				this.current.elementRef = -1;
+				//payloadBuffer.put(payload.bytes, payload.offset + 12, payload.length - 12);
+				cs.setElementRef((short) -1);
 			}
+			
+			//byte[] offsetCharacters = new byte[8];
+			//System.arraycopy(payloadBuffer.array(), 0, offsetCharacters, 0, 8);
+			
+			cs.setPayloads(Collections.singletonList(readOffset(payload)));
 	    }
 	    else {	
-			this.current.end = this.current.start;
-			this.current.elementRef = -1;
-    	};
-    	
-    	this.current.isPayloadRead = true;
-    	
+			cs.setEnd(cs.getStart());
+			cs.setElementRef((short) -1);
+			cs.setPayloads(null);
+    	}
 	}
-    
-    private short readElementRef(BytesRef payload) {
+	
+	
+	/**	Get the offset bytes from the payload.
+	 * */
+	private byte[] readOffset(BytesRef payload){
+		byte[] b = new byte[8];
+		System.arraycopy(payload.bytes, payload.offset, b, 0, 8);
+		return b;
+	}
+	
+	/**	Get the end position bytes from the payload and cast it to int. 
+	 * */
+	private int readEndPostion(BytesRef payload) {
+		byte[] b = new byte[4];
+		System.arraycopy(payload.bytes, payload.offset + 8, b, 0, 4);
+		return ByteBuffer.wrap(b).getInt();		
+	}
+	
+	/**	Get the elementRef bytes from the payload and cast it into short.
+	 * */
+	private short readElementRef(BytesRef payload) {
     	byte[] b = new byte[2];
     	System.arraycopy(payload.bytes, payload.offset + 12, b, 0, 2);
-    	ByteBuffer wrapper = ByteBuffer.wrap(b);
-		return wrapper.getShort();
-	}
-
-    
-
-	private int readEndPostion(BytesRef payload) {
-		
-		this.payloadByte = new byte[4];
-		// Copy end position integer to payloadByte
-		System.arraycopy(payload.bytes, payload.offset + 8, this.payloadByte, 0, 4);
-		
-		bb.clear();
-		int t = bb.wrap(payloadByte).getInt();
-
-		if (DEBUG)
-		    log.trace("Get Endposition and payload: {}-{} with end position {} in doc {}",
-			      this.current.payload.getInt(0),
-			      this.current.payload.getInt(4),
-			      t,
-			      this.current.doc);
-		
-		return t;
+    	return ByteBuffer.wrap(b).getShort();
 	}
 
 	@Override
-    public long cost() {
-	// ???
-	return this.postings.cost();
-    };
+	public boolean skipTo(int target) throws IOException {
+		if (hasMoreSpans && (firstSpans.doc() < target)){
+  			if (!firstSpans.skipTo(target)){
+  				candidateList.clear();
+  				return false;
+  			}
+  		}		
+		setCandidateList();
+		matchPayload.clear();
+		isStartEnumeration=false;
+		return advance();
+	}
 
-    
-    @Override
-    public Collection<byte[]> getPayload() throws IOException {
-	byte[] offsetCharacters = new byte[8];
-	if (!this.current.isPayloadRead)
-	    readPayload();
-
-	System.arraycopy(this.current.payload.array(), 0, offsetCharacters, 0, 8);
-
-	return Collections.singletonList(offsetCharacters);
-    };
-
-
-    /**
-     * Sets KorapTermSpan to current element
-     */
-    private void setToCurrent (KorapTermSpan act, int debugNumber) {
-
-	if (DEBUG)
-	    log.trace(
-		"[{}] Set to current with {}",
-		debugNumber,
-		act.toString()
-	    );
-
-	this.current = (KorapTermSpan) act.clone();
-    };
-
-    /**
-     * Sets KorapTermSpan to current element
-     */
-    private void setToCurrent (int debugNumber) throws IOException {
+	@Override
+	public long cost() {
+		return termSpans.cost();
+	}
 	
-	this.current.start = this.postings.nextPosition();
-	// This will directly save stored payloads
-	//this.current.end = this.getPayloadEndPosition();
-	readPayload();
+	public short getElementRef() {
+		return elementRef;
+	}
 
-	if (DEBUG)
-	    log.trace(
-		"[{}] Set new to current with {}",
-		debugNumber,
-		this.current.toString()
-	    );
-
-	this.count++;
-    };
-
-    private void setCurrentToTemp () {
-	this.temp = (KorapTermSpan) this.current.clone();
-	// this.temp.copyFrom(this.current);
-    };
-
-
-    @Override
-    public boolean isPayloadAvailable() throws IOException {
-
-	if (current.payload != null)
-	    return true;
+	public void setElementRef(short elementRef) {
+		this.elementRef = elementRef;
+	}
 	
-	return false;
-    };
-
-    
-    @Override
-    public String toString() {
-	return "spans(" + this.term.toString() + ")@" +
-            (this.current.doc == -1 ? "START" : (this.current.doc == Integer.MAX_VALUE) ? "END" : this.current.doc + "-" + this.current.start);
-    };
-
-    public DocsAndPositionsEnum getPostings() {
-	return postings;
-    };
-
-    private static final class EmptyElementSpans extends ElementSpans {
-
-	@Override
-	public boolean next() { return false; };
-
-	@Override
-	public boolean skipTo(int target) { return false; };
-
-	@Override
-	public int doc() { return DocIdSetIterator.NO_MORE_DOCS; };
-	
-	@Override
-	public int start() { return -1; };
-
-	@Override
-	public int end() { return -1; };
-
-	@Override
-	public Collection<byte[]> getPayload() { return null; };
-
-	@Override
-	public boolean isPayloadAvailable() { return false; };
-	
-	@Override
-	public long cost() { return 0; };
-    };
+	/** Match candidate for element spans.
+	 * */	
+	class CandidateElementSpans extends CandidateSpan 
+			implements Comparable<CandidateElementSpans>{
+		
+		private short elementRef;
+		
+		public CandidateElementSpans(Spans span, short elementRef) 
+				throws IOException {
+			super(span);
+			setElementRef(elementRef);
+		}
+		
+		public void setElementRef(short elementRef) {
+			this.elementRef = elementRef;
+		}
+		public short getElementRef() {
+			return elementRef;
+		}
+		
+		@Override
+		public int compareTo(CandidateElementSpans o) {
+			if (this.getEnd() == o.getEnd())
+				return 0;
+			else if (this.getEnd() > o.getEnd() )
+				return 1;
+			return -1;			
+		}
+	}
 };
commit	c7fb73173e3570140d2d7149b29c546ac9b7111d	[log] [tgz]
author	Eliza Margaretha <margaretha@ids-mannheim.de>	Fri Jul 25 14:11:36 2014 +0000
committer	Eliza Margaretha <margaretha@ids-mannheim.de>	Fri Jul 25 14:11:36 2014 +0000
tree	9c285e9be6e035e6479ad03793f794c5f6af03c9
parent	dc8dc34f33018dd7bb2d038a798ced88ab2b0208 [diff]