blob: 86c5d565862c5309904b7fcb4507d690d1b2c034 [file] [log] [blame]
package de.ids_mannheim.korap.query;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.spans.Spans;
import de.ids_mannheim.korap.query.spans.ElementSpans;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
/** Matches spans wrapped by an element. */
public class SpanElementQuery extends SpanQuery {
protected Term element;
private String elementStr;
private String field;
/** Constructor. */
public SpanElementQuery(String field, String term) {
StringBuilder sb = new StringBuilder("<>:");
this.field = field;
this.elementStr = term;
this.element = new Term(field, sb.append(term).toString());
};
/** Return the element whose spans are matched. */
public Term getElement() { return element; };
@Override
public String getField() { return element.field(); };
@Override
public void extractTerms(Set<Term> terms) {
terms.add(element);
};
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder("<");
buffer.append(this.field).append(':').append(elementStr);
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.append(" />").toString();
};
@Override
public int hashCode() {
final int prime = 37; // Instead of 31
int result = super.hashCode();
result = prime * result + ((element == null) ? 0 : element.hashCode());
return result;
};
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (!super.equals(obj))
return false;
if (getClass() != obj.getClass())
return false;
SpanElementQuery other = (SpanElementQuery) obj;
if (element == null) {
if (other.element != null)
return false;
} else if (!element.equals(other.element))
return false;
return true;
};
@Override
public Spans getSpans(final AtomicReaderContext context,
Bits acceptDocs,
Map<Term,TermContext> termContexts) throws IOException {
TermContext termContext = termContexts.get(element);
final TermState state;
if (termContext == null) {
// this happens with span-not query,
// as it doesn't include the NOT side in extractTerms()
// so we seek to the term now in this segment...,
// this sucks because its ugly mostly!
final Fields fields = context.reader().fields();
if (fields != null) {
final Terms terms = fields.terms(element.field());
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(element.bytes(), true)) {
state = termsEnum.termState();
} else {
state = null;
}
} else {
state = null;
}
} else {
state = null;
}
} else {
state = termContext.get(context.ord);
};
if (state == null) { // term is not present in that reader
return ElementSpans.EMPTY_ELEMENT_SPANS;
};
final TermsEnum termsEnum = context.reader().terms(element.field()).iterator(null);
termsEnum.seekExact(element.bytes(), state);
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
if (postings != null) {
return new ElementSpans(postings, element);
};
// element does exist, but has no positions
throw new IllegalStateException("field \"" + element.field() + "\" was indexed without position data; cannot run SpanElementQuery (element=" + element.text() + ")");
};
};