Added javadoc comments
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationPartQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationPartQuery.java
index 6989a81..508aa99 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationPartQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationPartQuery.java
@@ -13,9 +13,40 @@
import de.ids_mannheim.korap.query.spans.RelationPartSpans;
/**
- * This query match a part of a relation (either left or right) to certain
+ * This query matches a part of a relation (either left or right) to certain
* elements or terms. If inversed, the start and end positions of the right part
- * of the relation are set as positions of the match.
+ * of the relation are set as the positions of the match.
+ *
+ * Examples:
+ * <ul>
+ * <li>retrieve all dependency relations "<:xip/syntax-dep_rel" whose sources
+ * (right side) are noun phrases. This query matches the right side of the
+ * relations to NP.
+ *
+ * <pre>
+ * SpanRelationQuery sq = new SpanRelationQuery(new SpanTermQuery(new Term(
+ * "tokens", "<:xip/syntax-dep_rel")), true);
+ *
+ * SpanRelationPartQuery rv = new SpanRelationPartQuery(sq, new SpanElementQuery(
+ * "tokens", "np"), true, false, true);
+ * </pre>
+ *
+ * </li>
+ *
+ * <li>returns all the children of NP using "<:child-of" relations where the
+ * left side is the parent and then right side is the child. This query matches
+ * the left side to NP and requires inverse on the relations, because the it
+ * asks for the children which are on the right side of the relations.</li>
+ *
+ * <pre>
+ * SpanRelationQuery sq = new SpanRelationQuery(new SpanTermQuery(new Term(
+ * "tokens", "<:child-of")), true);
+ *
+ * SpanRelationPartQuery rv = new SpanRelationPartQuery(sq, new SpanElementQuery(
+ * "base", "np"), false, true, true);
+ * </pre>
+ *
+ * </ul>
*
* @author margaretha
* */
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index 3792f7e..201e9df 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -30,13 +30,17 @@
* secondly by the element/relation id descendingly. This order helps to match
* element and attributes faster.
*
+ * AttributeSpans contain information about the elements they belongs to, thus
+ * querying them alone is sufficient to get
+ * "any element having a specific attribute".
+ *
* @author margaretha
* */
public class AttributeSpans extends SimpleSpans {
private List<CandidateAttributeSpan> candidateList;
private int currentDoc, currentPosition;
- private short spanId;
+ private short referentId;
private boolean isFinish;
private int elementEnd;
@@ -86,7 +90,7 @@
this.matchDocNumber = cs.getDoc();
this.matchStartPosition = cs.getStart();
this.matchEndPosition = cs.getEnd();
- this.setSpanId(cs.getSpanId());
+ this.setReferentId(cs.getSpanId());
this.setElementEnd(cs.getElementEnd());
candidateList.remove(0);
return true;
@@ -145,19 +149,19 @@
*
* @return a span id, for instance a relation id or an element id
*/
- public short getSpanId() {
- return this.spanId;
+ public short getReferentId() {
+ return this.referentId;
}
/**
* Sets the span id to which an attribute span belongs, for instance a
* relation id or an element id.
*
- * @param spanId the span id to which an attribute span belongs, for
+ * @param refId the span id to which an attribute span belongs, for
* instance a relation id or an element id.
*/
- public void setSpanId(short spanId) {
- this.spanId = spanId;
+ public void setReferentId(short refId) {
+ this.referentId = refId;
}
/**
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
index 528aec1..914bf7f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
@@ -8,49 +8,110 @@
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanRelationQuery;
import de.ids_mannheim.korap.query.SpanWithIdQuery;
-public abstract class RelationBaseSpans extends SpansWithId{
+/**
+ * RelationBaseSpans is a base class for relation spans containing properties
+ * about the start and end positions of right side of the relation. It can also
+ * store information about the id of the left/right side, for instance, when it
+ * is an element or another relation.
+ *
+ * @author margaretha
+ *
+ */
+public abstract class RelationBaseSpans extends SpansWithId {
- protected short leftId, rightId;
- protected int rightStart, rightEnd;
-
- public RelationBaseSpans(SpanWithIdQuery spanWithIdQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(spanWithIdQuery, context, acceptDocs, termContexts);
- // TODO Auto-generated constructor stub
- }
-
- public short getLeftId() {
- return leftId;
- }
+ protected short leftId, rightId;
+ protected int rightStart, rightEnd;
- public void setLeftId(short leftId) {
- this.leftId = leftId;
- }
+ /**
+ * Create a RelationBaseSpans based on the given SpanWithIdQuery.
+ *
+ * @param spanWithIdQuery a SpanWithIdQuery, for instance a
+ * {@link SpanElementQuery} or {@link SpanRelationQuery}.
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @throws IOException
+ */
+ public RelationBaseSpans(SpanWithIdQuery spanWithIdQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(spanWithIdQuery, context, acceptDocs, termContexts);
+ }
- public short getRightId() {
- return rightId;
- }
+ /**
+ * Returns the id of the left hand side of the relation.
+ *
+ * @return an id
+ */
+ public short getLeftId() {
+ return leftId;
+ }
- public void setRightId(short rightId) {
- this.rightId = rightId;
- }
+ /**
+ * Sets the id of the left hand side of the relation.
+ *
+ * @param leftId the id of the left hand side of the relation.
+ */
+ public void setLeftId(short leftId) {
+ this.leftId = leftId;
+ }
- public int getRightStart() {
- return rightStart;
- }
+ /**
+ * Returns the id of the right hand side of the relation.
+ *
+ * @return an id
+ */
+ public short getRightId() {
+ return rightId;
+ }
- public void setRightStart(int rightStart) {
- this.rightStart = rightStart;
- }
+ /**
+ * Sets the id of the right hand side of the relation.
+ *
+ * @param rightId the id of the right hand side of the relation.
+ */
+ public void setRightId(short rightId) {
+ this.rightId = rightId;
+ }
- public int getRightEnd() {
- return rightEnd;
- }
+ /**
+ * Returns the start position of the right hand side of the relation.
+ *
+ * @return the start position
+ */
+ public int getRightStart() {
+ return rightStart;
+ }
- public void setRightEnd(int rightEnd) {
- this.rightEnd = rightEnd;
- }
+ /**
+ * Sets the start position of the right hand side of the relation.
+ *
+ * @param rightStart the start position of the right hand side of the
+ * relation.
+ */
+ public void setRightStart(int rightStart) {
+ this.rightStart = rightStart;
+ }
+
+ /**
+ * Returns the end position of the right hand side of the relation.
+ *
+ * @return the end position
+ */
+ public int getRightEnd() {
+ return rightEnd;
+ }
+
+ /**
+ * Sets the start position of the right hand side of the relation.
+ *
+ * @param rightEnd the end position of the right hand side of the relation.
+ */
+ public void setRightEnd(int rightEnd) {
+ this.rightEnd = rightEnd;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationPartSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationPartSpans.java
index 871298e..98ccf18 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationPartSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationPartSpans.java
@@ -13,309 +13,373 @@
import de.ids_mannheim.korap.query.SpanRelationPartQuery;
-/** This span enumeration returns the right part of relation spans
- * whose left part token/element positions matching the second spans,
- * or vice versa.
- *
- * Relations within a certain window, e.g element-based or token-
- * distance-based, are sorted to resolve reference within that window.
- * Resolution is limited only within an window.
+/**
+ * This span enumeration returns the right part of relation spans whose left
+ * part token/element positions matching the second spans, or vice versa.
*
- * @author margaretha
+ * All relations within a certain window, e.g element-based or token-
+ * distance-based, are sorted to resolve reference within that window.
+ * Resolution is limited only within a window.
+ *
+ * @author margaretha
* */
-public class RelationPartSpans extends RelationBaseSpans{
-
- private RelationBaseSpans relationSpans;
- private SpansWithId matcheeSpans;
- private ElementSpans element;
- private List<CandidateRelationSpan> candidateRelations;
-
- private boolean matchRight;
- private boolean inverse;
- private boolean hasMoreMatchees;
-
-// private short leftId, rightId;
- private int window;
-
- public RelationPartSpans(SpanRelationPartQuery query,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(query, context, acceptDocs, termContexts);
- if (query.getElementQuery() != null){
- element = (ElementSpans) query.getElementQuery().getSpans(context, acceptDocs,
- termContexts);
- }
- else{
- window = query.getWindow();
- }
- relationSpans = (RelationBaseSpans) firstSpans;
- matcheeSpans = (SpansWithId) secondSpans;
- // hack
- matcheeSpans.hasSpanId = true;
-
- hasMoreMatchees = matcheeSpans.next();
- hasMoreSpans = relationSpans.next() && hasMoreMatchees;
- if (element != null){
- hasMoreSpans &= element.next();
- }
- candidateRelations = new ArrayList<CandidateRelationSpan>();
- matchRight = query.isMatchRight();
- inverse = query.isInverseRelation();
- }
+public class RelationPartSpans extends RelationBaseSpans {
- @Override
- public boolean next() throws IOException {
- isStartEnumeration=false;
- matchPayload.clear();
- return advance();
- }
-
- protected boolean advance() throws IOException {
- while (candidateRelations.size() > 0 || hasMoreSpans){
- if (candidateRelations.size() > 0){
- setMatchSpan(candidateRelations.get(0));
- candidateRelations.remove(0);
- return true;
- }
- else if (element != null){
- setCandidateList();
- }
- else { setCandidateListWithWindow(); }
- }
- return false;
- }
-
- private void setMatchSpan(CandidateRelationSpan relationSpan) {
- matchDocNumber = relationSpan.getDoc();
- if (!inverse){
- matchStartPosition = relationSpan.getStart();
- matchEndPosition = relationSpan.getEnd();
- setRightStart(relationSpan.getRightStart());
- setRightEnd(relationSpan.getRightEnd());
- }
- else{ // maybe a bit confusing -- inverse relation
- matchStartPosition = relationSpan.getRightStart();
- matchEndPosition = relationSpan.getRightEnd();
- setRightStart(relationSpan.getStart());
- setRightEnd(relationSpan.getEnd());
- }
-
- setLeftId(relationSpan.getLeftId());
- setRightId(relationSpan.getRightId());
- setSpanId(relationSpan.getSpanId());
- }
-
- /** A window starts at the same token position as a relation span,
- * and ends at the start + window length.
- * */
- private void setCandidateListWithWindow() throws IOException {
- if (hasMoreSpans && ensureSameDoc(relationSpans, matcheeSpans) ){
- int windowEnd = relationSpans.start() + window;
- if (relationSpans.end() > windowEnd){
- throw new IllegalArgumentException("The window length "+window
- +" is too small. The relation span ("+relationSpans.start()+
- ","+relationSpans.end()+") is longer than " +"the window " +
- "length.");
- }
- else {
- collectRelations(relationSpans.doc(), windowEnd);
- // sort results
- Collections.sort(candidateRelations);
- }
- }
- }
+ private RelationBaseSpans relationSpans;
+ private SpansWithId matcheeSpans;
+ private ElementSpans element; // element as the window
+ private List<CandidateRelationSpan> candidateRelations;
- private void setCandidateList() throws IOException {
- while (hasMoreSpans && findSameDoc(element, relationSpans, matcheeSpans) ){
- // if the relation is within a sentence
- if (relationSpans.start() >= element.start() &&
- relationSpans.end() <= element.end()){
- collectRelations(element.doc(),element.end());
- // sort results
- Collections.sort(candidateRelations);
- }
- else if (relationSpans.end() < element.end()){
- hasMoreSpans = relationSpans.next();
- }
- else {
- hasMoreSpans = element.next();
- }
- }
- }
+ private boolean matchRight;
+ private boolean inverse;
+ private boolean hasMoreMatchees;
- /** Collect all relations within an element whose left side matching the secondspans.
- * */
- private void collectRelations(int currentDoc, int windowEnd) throws IOException {
- List<CandidateRelationSpan> temp = new ArrayList<CandidateRelationSpan>();
- boolean sortRight = false;
- if (matchRight) sortRight = true;
- // collect all relations within an element
- while (hasMoreSpans &&
- relationSpans.doc() == currentDoc &&
- relationSpans.end() <= windowEnd){
- temp.add(new CandidateRelationSpan(relationSpans,sortRight));
- hasMoreSpans = relationSpans.next();
- }
-
- if(matchRight) Collections.sort(temp);
-
- // do the matching for each relation
- int i=0;
- CandidateRelationSpan r;
- while (hasMoreMatchees && i < temp.size()){
- r = temp.get(i);
- if (matchRight){
- /*System.out.println(r.getStart()+","+r.getEnd()+" "+
- r.getRightStart()+","+r.getRightEnd()+
- " #"+r.getRightId()+
- " "+matcheeSpans.start()+","+matcheeSpans.end()+
- " #"+matcheeSpans.getSpanId()
- );*/
- i = matchRelation(i, r,r.getRightStart(), r.getRightEnd());
- }
- else{
- /*System.out.println(r.getStart()+","+r.getEnd()+" "+
- r.getRightStart()+","+r.getRightEnd()+" "
- +matcheeSpans.start()+","+matcheeSpans.end()+
- " #"+matcheeSpans.getSpanId());*/
- i = matchRelation(i, r,r.getStart(), r.getEnd());
- }
- }
-
- hasMoreSpans &= hasMoreMatchees;
- }
-
- private int matchRelation(int i, CandidateRelationSpan r, int startPos,
- int endPos) throws IOException {
-
- if(startPos == matcheeSpans.start() ){
- if (endPos == matcheeSpans.end()){
-
- int id;
- if ( matcheeSpans instanceof RelationPartSpans){
- if (matchRight) {
- id = ((RelationPartSpans) matcheeSpans).getRightId();
- }
- else { id = ((RelationPartSpans) matcheeSpans).getLeftId(); }
- }
- else { id = matcheeSpans.getSpanId(); }
-
- if (!inverse && r.getRightId() == id){
- r.sortRight = false;
- candidateRelations.add(r);
- }
- else if (inverse && r.getLeftId() == id) {
- r.sortRight = true;
- candidateRelations.add(r);
- }
- i++;
- }
- else if (endPos <= matcheeSpans.end()){
- i++;
- }
- else { hasMoreMatchees = matcheeSpans.next(); }
- }
- else if (startPos < matcheeSpans.start()){
- i++;
- }
- else { hasMoreMatchees = matcheeSpans.next(); }
- return i;
- }
+ private int window; // number of tokens as the window
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (relationSpans.doc() < target)){
- if (!relationSpans.skipTo(target)){
- candidateRelations.clear();
- return false;
- }
- }
- setCandidateList();
- matchPayload.clear();
- isStartEnumeration=false;
- return advance();
- }
+ /**
+ * Creates a RelationPartSpans from the specified
+ * {@link SpanRelationPartQuery}.
+ *
+ * @param query a SpanRelationPartQuery
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @throws IOException
+ */
+ public RelationPartSpans(SpanRelationPartQuery query,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ if (query.getElementQuery() != null) {
+ element = (ElementSpans) query.getElementQuery().getSpans(context,
+ acceptDocs, termContexts);
+ } else {
+ window = query.getWindow();
+ }
+ relationSpans = (RelationBaseSpans) firstSpans;
+ matcheeSpans = (SpansWithId) secondSpans;
+ // hack
+ matcheeSpans.hasSpanId = true;
- @Override
- public long cost() {
- // TODO Auto-generated method stub
- return 0;
- }
+ hasMoreMatchees = matcheeSpans.next();
+ hasMoreSpans = relationSpans.next() && hasMoreMatchees;
+ if (element != null) {
+ hasMoreSpans &= element.next();
+ }
+ candidateRelations = new ArrayList<CandidateRelationSpan>();
+ matchRight = query.isMatchRight();
+ inverse = query.isInverseRelation();
+ }
- class CandidateRelationSpan extends CandidateSpan implements Comparable<CandidateSpan>{
-
- private int rightStart, rightEnd;
- private short leftId, rightId;
- private boolean sortRight;
-
-
- public CandidateRelationSpan(RelationBaseSpans span, boolean sortRight)
- throws IOException {
- super(span);
- this.rightStart = span.getRightStart();
- this.rightEnd = span.getRightEnd();
- this.sortRight = sortRight;
- this.leftId = span.getLeftId();
- this.rightId = span.getRightId();
- this.spanId = span.getSpanId();
- }
-
- @Override
- public int compareTo(CandidateSpan o) {
- CandidateRelationSpan cs = (CandidateRelationSpan) o;
- if (sortRight)
- return sortByRight(cs);
-
- return super.compareTo(o);
- }
-
- private int sortByRight(CandidateRelationSpan cs) {
- if (this.getRightStart() == cs.getRightStart()){
- if (this.getRightEnd() == cs.getRightEnd())
- return 0;
- if (this.getRightEnd() > cs.getRightEnd() )
- return 1;
- else return -1;
- }
- else if (this.getRightStart() < cs.getRightStart())
- return -1;
- else return 1;
- }
-
- /*private void sortByLeft(CandidateSpan o) {
- super.compareTo(o);
- }*/
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration = false;
+ matchPayload.clear();
+ return advance();
+ }
- public int getRightStart() {
- return rightStart;
- }
+ /**
+ * Advances to the next match, by setting the first candidate relation from
+ * candidateRelations list, if it is not empty. Otherwise, set the candidate
+ * list first based on element or token window.
+ *
+ * @return
+ * @throws IOException
+ */
+ protected boolean advance() throws IOException {
+ while (candidateRelations.size() > 0 || hasMoreSpans) {
+ if (candidateRelations.size() > 0) {
+ setMatchSpan(candidateRelations.get(0));
+ candidateRelations.remove(0);
+ return true;
+ } else if (element != null) {
+ setCandidateList();
+ } else {
+ setCandidateListWithWindow();
+ }
+ }
+ return false;
+ }
- public void setRightStart(int rightStart) {
- this.rightStart = rightStart;
- }
+ /**
+ * Sets the specified {@link CandidateRelationSpan} as the current match. If
+ * the match should be sorted by the right side positions of the original
+ * relation, then it should be inverted. In this case, the start and end
+ * positions of the original <em>right</em> side, will be set as the match
+ * <em>left</em> start and end positions, and vice versa.
+ *
+ * @param relationSpan a CandidateRelationSpan
+ */
+ private void setMatchSpan(CandidateRelationSpan relationSpan) {
+ matchDocNumber = relationSpan.getDoc();
+ if (!inverse) {
+ matchStartPosition = relationSpan.getStart();
+ matchEndPosition = relationSpan.getEnd();
+ setRightStart(relationSpan.getRightStart());
+ setRightEnd(relationSpan.getRightEnd());
+ } else { // maybe a bit confusing -- inverse relation
+ matchStartPosition = relationSpan.getRightStart();
+ matchEndPosition = relationSpan.getRightEnd();
+ setRightStart(relationSpan.getStart());
+ setRightEnd(relationSpan.getEnd());
+ }
- public int getRightEnd() {
- return rightEnd;
- }
+ setLeftId(relationSpan.getLeftId());
+ setRightId(relationSpan.getRightId());
+ setSpanId(relationSpan.getSpanId());
+ }
- public void setRightEnd(int rightEnd) {
- this.rightEnd = rightEnd;
- }
+ /**
+ * Sets the candidate relation list based on token window that starts at the
+ * same token position as a relation span, and ends at the start + window
+ * length.
+ *
+ * @throws IOException
+ */
+ private void setCandidateListWithWindow() throws IOException {
+ if (hasMoreSpans && ensureSameDoc(relationSpans, matcheeSpans)) {
+ int windowEnd = relationSpans.start() + window;
+ if (relationSpans.end() > windowEnd) {
+ throw new IllegalArgumentException("The window length "
+ + window + " is too small. The relation span ("
+ + relationSpans.start() + "," + relationSpans.end()
+ + ") is longer than " + "the window " + "length.");
+ } else {
+ collectRelations(relationSpans.doc(), windowEnd);
+ // sort results
+ Collections.sort(candidateRelations);
+ }
+ }
+ }
- public short getLeftId() {
- return leftId;
- }
+ /**
+ * Sets the candidate relation list based on the element window.
+ *
+ * @throws IOException
+ */
+ private void setCandidateList() throws IOException {
+ while (hasMoreSpans
+ && findSameDoc(element, relationSpans, matcheeSpans)) {
+ // if the relation is within a sentence
+ if (relationSpans.start() >= element.start()
+ && relationSpans.end() <= element.end()) {
+ collectRelations(element.doc(), element.end());
+ // sort results
+ Collections.sort(candidateRelations);
+ } else if (relationSpans.end() < element.end()) {
+ hasMoreSpans = relationSpans.next();
+ } else {
+ hasMoreSpans = element.next();
+ }
+ }
+ }
- public void setLeftId(short leftId) {
- this.leftId = leftId;
- }
+ /**
+ * Collects all relations whose end position is before or identical to the
+ * given window end, within the specified document number, and match either
+ * the left or right side of the relation to the matcheeSpans.
+ *
+ * @param currentDoc the current document number
+ * @param windowEnd the end position of the current window
+ * @throws IOException
+ */
+ private void collectRelations(int currentDoc, int windowEnd)
+ throws IOException {
+ List<CandidateRelationSpan> temp = new ArrayList<CandidateRelationSpan>();
+ boolean sortRight = false;
+ if (matchRight)
+ sortRight = true;
+ // collect all relations within an element
+ while (hasMoreSpans && relationSpans.doc() == currentDoc
+ && relationSpans.end() <= windowEnd) {
+ temp.add(new CandidateRelationSpan(relationSpans, sortRight));
+ hasMoreSpans = relationSpans.next();
+ }
- public short getRightId() {
- return rightId;
- }
+ if (matchRight)
+ Collections.sort(temp);
- public void setRightId(short rightId) {
- this.rightId = rightId;
- }
- }
+ // do the matching for each relation
+ int i = 0;
+ CandidateRelationSpan r;
+ while (hasMoreMatchees && i < temp.size()) {
+ r = temp.get(i);
+ if (matchRight) {
+ /*
+ * System.out.println(r.getStart()+","+r.getEnd()+" "+
+ * r.getRightStart()+","+r.getRightEnd()+ " #"+r.getRightId()+
+ * " "+matcheeSpans.start()+","+matcheeSpans.end()+
+ * " #"+matcheeSpans.getSpanId() );
+ */
+ i = matchRelation(i, r, r.getRightStart(), r.getRightEnd());
+ } else {
+ /*
+ * System.out.println(r.getStart()+","+r.getEnd()+" "+
+ * r.getRightStart()+","+r.getRightEnd()+" "
+ * +matcheeSpans.start()+","+matcheeSpans.end()+
+ * " #"+matcheeSpans.getSpanId());
+ */
+ i = matchRelation(i, r, r.getStart(), r.getEnd());
+ }
+ }
+
+ hasMoreSpans &= hasMoreMatchees;
+ }
+
+ /**
+ * Matches the relation part from the given candidate relation, and start
+ * and end positions to the matcheeSpans.
+ *
+ * @param i the position counter for iterating the collected relations
+ * @param r a CandidateRelationSpan
+ * @param startPos the start position of the relation part to match
+ * @param endPos the end position of the relation part to match
+ * @return the next position counter to compute
+ * @throws IOException
+ */
+ private int matchRelation(int i, CandidateRelationSpan r, int startPos,
+ int endPos) throws IOException {
+
+ if (startPos == matcheeSpans.start()) {
+ if (endPos == matcheeSpans.end()) {
+
+ int id;
+ if (matcheeSpans instanceof RelationPartSpans) {
+ if (matchRight) {
+ id = ((RelationPartSpans) matcheeSpans).getRightId();
+ } else {
+ id = ((RelationPartSpans) matcheeSpans).getLeftId();
+ }
+ } else {
+ id = matcheeSpans.getSpanId();
+ }
+
+ if (!inverse && r.getRightId() == id) {
+ r.sortRight = false;
+ candidateRelations.add(r);
+ } else if (inverse && r.getLeftId() == id) {
+ r.sortRight = true;
+ candidateRelations.add(r);
+ }
+ i++;
+ } else if (endPos <= matcheeSpans.end()) {
+ i++;
+ } else {
+ hasMoreMatchees = matcheeSpans.next();
+ }
+ } else if (startPos < matcheeSpans.start()) {
+ i++;
+ } else {
+ hasMoreMatchees = matcheeSpans.next();
+ }
+ return i;
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (relationSpans.doc() < target)) {
+ if (!relationSpans.skipTo(target)) {
+ candidateRelations.clear();
+ return false;
+ }
+ }
+ setCandidateList();
+ matchPayload.clear();
+ isStartEnumeration = false;
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ /**
+ * CandidateRelationSpan stores a state of RelationSpans and enables sorting
+ * a relation list by the right side positions of the relations. Normally,
+ * such a list are sorted by left side positions of the relations.
+ *
+ */
+ class CandidateRelationSpan extends CandidateSpan implements
+ Comparable<CandidateSpan> {
+
+ private int rightStart, rightEnd;
+ private short leftId, rightId;
+ private boolean sortRight;
+
+ public CandidateRelationSpan(RelationBaseSpans span, boolean sortRight)
+ throws IOException {
+ super(span);
+ this.rightStart = span.getRightStart();
+ this.rightEnd = span.getRightEnd();
+ this.sortRight = sortRight;
+ this.leftId = span.getLeftId();
+ this.rightId = span.getRightId();
+ this.spanId = span.getSpanId();
+ }
+
+ @Override
+ public int compareTo(CandidateSpan o) {
+ CandidateRelationSpan cs = (CandidateRelationSpan) o;
+ if (sortRight)
+ return sortByRight(cs);
+
+ return super.compareTo(o);
+ }
+
+ /**
+ * Determines the position of this CandidateRelationSpan relative to the
+ * given CandidateRelationSpan.
+ *
+ * @param cs a CandidateRelationSpan
+ * @return 0 if this CandidateRelationSpan has identical position as cs,
+ * 1 if it should follows cs, and -1 if it should preceeds cs.
+ */
+ private int sortByRight(CandidateRelationSpan cs) {
+ if (this.getRightStart() == cs.getRightStart()) {
+ if (this.getRightEnd() == cs.getRightEnd())
+ return 0;
+ if (this.getRightEnd() > cs.getRightEnd())
+ return 1;
+ else
+ return -1;
+ } else if (this.getRightStart() < cs.getRightStart())
+ return -1;
+ else
+ return 1;
+ }
+
+ public int getRightStart() {
+ return rightStart;
+ }
+
+ public void setRightStart(int rightStart) {
+ this.rightStart = rightStart;
+ }
+
+ public int getRightEnd() {
+ return rightEnd;
+ }
+
+ public void setRightEnd(int rightEnd) {
+ this.rightEnd = rightEnd;
+ }
+
+ public short getLeftId() {
+ return leftId;
+ }
+
+ public void setLeftId(short leftId) {
+ this.leftId = leftId;
+ }
+
+ public short getRightId() {
+ return rightId;
+ }
+
+ public void setRightId(short rightId) {
+ this.rightId = rightId;
+ }
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
index 2c4bdb6..05de6d5 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
@@ -18,248 +18,293 @@
import de.ids_mannheim.korap.query.SpanRelationQuery;
-/** Enumeration of spans denoting relations between two tokens/elements. The start and end of
- * a RelationSpan always denote the start and end of the left-side token/element.
+/**
+ * Enumeration of spans denoting relations between two tokens/elements. The
+ * start and end of a RelationSpan always denote the start and end of the
+ * left-side token/element.
*
- * There are 4 types of relations, which is differentiated by the payload length in bytes.
- * 1. Token to token relation (1 int & 3 short, length: 10)
- * 2. Token to span (2 int & 3 short, length: 14)
- * 3. Span to token (int, byte, int, 3 short, length: 15)
- * 4. Span to Span (3 int & 3 short, length: 18)
- *
- * Every integer value denotes the start/end position of the start/target of a relation,
- * in this format: (sourceEndPos?, startTargetPos, endTargetPos?). The end position of a token is
- * identical to its start position, and therefore not is saved in a payload.
- *
- * The short values denote the relation id, left id, and right id.
- * The byte in relation #3 is just a dummy to create a different length from the relation #2.
+ * There are 4 types of relations, which is differentiated by the payload length
+ * in bytes.
+ * <ol>
+ * <li>Token to token relation (1 int & 3 short, length: 10)</li>
+ * <li>Token to span (2 int & 3 short, length: 14)</li>
+ * <li>Span to token (int, byte, int, 3 short, length: 15)</li>
+ * <li>Span to Span (3 int & 3 short, length: 18)</li>
+ * </ol>
+ * Every integer value denotes the start/end position of the start/target of a
+ * relation, in this format: (sourceEndPos?, startTargetPos, endTargetPos?). The
+ * end position of a token is identical to its start position, and therefore not
+ * is saved in a payload.
*
- * NOTE: Sorting of the candidate spans can alternatively be done in indexing, instead of here.
- * (first by left positions and then by right positions)
+ * The short values denote the relation id, left id, and right id. The byte in
+ * relation #3 is just a dummy to create a different length from the relation
+ * #2.
*
- * @author margaretha
+ * NOTE: Sorting of the candidate spans can alternatively be done in indexing,
+ * instead of here. (first by left positions and then by right positions)
+ *
+ * @author margaretha
* */
-public class RelationSpans extends RelationBaseSpans{
+public class RelationSpans extends RelationBaseSpans {
- //short relationId;
-// private int rightStart, rightEnd;
- private int currentDoc, currentPosition;
-// private short leftId, rightId;
-
- private TermSpans relationTermSpan;
-
- protected Logger logger = LoggerFactory.getLogger(RelationSpans.class);
- private List<CandidateRelationSpan> candidateList;
-
- public RelationSpans(SpanRelationQuery relationSpanQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(relationSpanQuery, context, acceptDocs, termContexts);
- candidateList = new ArrayList<>();
- relationTermSpan = (TermSpans) firstSpans;
- hasMoreSpans = relationTermSpan.next();
- }
+ private int currentDoc, currentPosition;
+ private TermSpans relationTermSpan;
- @Override
- public boolean next() throws IOException {
- isStartEnumeration=false;
- return advance();
- }
-
- private boolean advance() throws IOException{
- while(hasMoreSpans || !candidateList.isEmpty()){
- if (!candidateList.isEmpty()){
- CandidateRelationSpan cs = candidateList.get(0);
- this.matchDocNumber = cs.getDoc();
- this.matchStartPosition = cs.getStart();
- this.matchEndPosition = cs.getEnd();
- this.setRightStart(cs.getRightStart());
- this.setRightEnd(cs.getRightEnd());
- this.spanId = cs.getSpanId(); // relation id
- this.leftId = cs.getLeftId();
- this.rightId = cs.getRightId();
- candidateList.remove(0);
- return true;
- }
- else{
- setCandidateList();
- currentDoc = relationTermSpan.doc();
- currentPosition = relationTermSpan.start();
- }
- }
- return false;
- }
+ protected Logger logger = LoggerFactory.getLogger(RelationSpans.class);
+ private List<CandidateRelationSpan> candidateList;
- private void setCandidateList() throws IOException {
- while (hasMoreSpans && relationTermSpan.doc() == currentDoc &&
- relationTermSpan.start() == currentPosition){
- CandidateRelationSpan cs = new CandidateRelationSpan(relationTermSpan);
- readPayload(cs);
-
- candidateList.add(cs);
- hasMoreSpans = relationTermSpan.next();
- }
- Collections.sort(candidateList);
-
-// for (CandidateRelationSpan cs:candidateList){
-// System.out.println(cs.getStart()+","+cs.getEnd() //+" <size:" +payload.get(0).length
-// +" target "+cs.getRightStart()+","+cs.getRightEnd() +" id:"+cs.getSpanId());
-// }
- }
+ /**
+ * Creates a RelationSpans from the given {@link SpanRelationQuery}.
+ *
+ * @param relationSpanQuery a SpanRelationQuery
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @throws IOException
+ */
+ public RelationSpans(SpanRelationQuery relationSpanQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(relationSpanQuery, context, acceptDocs, termContexts);
+ candidateList = new ArrayList<>();
+ relationTermSpan = (TermSpans) firstSpans;
+ hasMoreSpans = relationTermSpan.next();
+ }
- private void readPayload(CandidateRelationSpan cs) {
- List<byte[]> payload = (List<byte[]>) cs.getPayloads();
- int length = payload.get(0).length;
- ByteBuffer bb = ByteBuffer.allocate(length);
- bb.put(payload.get(0));
-
- int i;
- switch (length) {
- case 10: // Token to token
- i = bb.getInt(0);
- cs.setRightStart(i-1);
- cs.setRightEnd(i);
- break;
-
- case 14: // Token to span
- cs.setRightStart(bb.getInt(0));
- cs.setRightEnd(bb.getInt(4));
- break;
-
- case 15: // Span to token
- cs.setEnd(bb.getInt(0));
- i = bb.getInt(5);
- cs.setRightStart(i-1);
- cs.setRightEnd(i);
- break;
-
- case 18: // Span to span
- cs.setEnd(bb.getInt(0));
- cs.setRightStart(bb.getInt(4));
- cs.setRightEnd(bb.getInt(8));
- break;
- }
-
- cs.setRightId(bb.getShort(length-2)); //right id
- cs.setLeftId(bb.getShort(length-4)); //left id
- cs.setSpanId(bb.getShort(length-6)); //relation id
- // Payload is cleared.
- }
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration = false;
+ return advance();
+ }
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (firstSpans.doc() < target)){
- if (!firstSpans.skipTo(target)){
- candidateList.clear();
- return false;
- }
- }
- setCandidateList();
- matchPayload.clear();
- isStartEnumeration=false;
- return advance();
- }
-
- @Override
- public long cost() {
- return firstSpans.cost();
- }
+ /**
+ * Returns true if there is a next match by checking if the CandidateList is
+ * not empty and set the first element of the list as the next match.
+ * Otherwise, if the RelationSpan has not ended yet, try to set the
+ * CandidateList.
+ *
+ * @return true if there is a next match.
+ * @throws IOException
+ */
+ private boolean advance() throws IOException {
+ while (hasMoreSpans || !candidateList.isEmpty()) {
+ if (!candidateList.isEmpty()) {
+ CandidateRelationSpan cs = candidateList.get(0);
+ this.matchDocNumber = cs.getDoc();
+ this.matchStartPosition = cs.getStart();
+ this.matchEndPosition = cs.getEnd();
+ this.setRightStart(cs.getRightStart());
+ this.setRightEnd(cs.getRightEnd());
+ this.spanId = cs.getSpanId(); // relation id
+ this.leftId = cs.getLeftId();
+ this.rightId = cs.getRightId();
+ candidateList.remove(0);
+ return true;
+ } else {
+ setCandidateList();
+ currentDoc = relationTermSpan.doc();
+ currentPosition = relationTermSpan.start();
+ }
+ }
+ return false;
+ }
- public int getRightStart() {
- return rightStart;
- }
+ /**
+ * Setting the CandidateList by adding all relationTermSpan whose start
+ * position is the same as the current span position, and sort the
+ * candidateList.
+ *
+ * @throws IOException
+ */
+ private void setCandidateList() throws IOException {
+ while (hasMoreSpans && relationTermSpan.doc() == currentDoc
+ && relationTermSpan.start() == currentPosition) {
+ CandidateRelationSpan cs = new CandidateRelationSpan(
+ relationTermSpan);
+ readPayload(cs);
- public void setRightStart(int rightStart) {
- this.rightStart = rightStart;
- }
+ candidateList.add(cs);
+ hasMoreSpans = relationTermSpan.next();
+ }
+ Collections.sort(candidateList);
- public int getRightEnd() {
- return rightEnd;
- }
+ // for (CandidateRelationSpan cs:candidateList){
+ // System.out.println(cs.getStart()+","+cs.getEnd() //+" <size:" +payload.get(0).length
+ // +" target "+cs.getRightStart()+","+cs.getRightEnd() +" id:"+cs.getSpanId());
+ // }
+ }
- public void setRightEnd(int rightEnd) {
- this.rightEnd = rightEnd;
- }
+ /**
+ * Identify the relation type of the given {@link CandidateRelationSpan} by
+ * checking the length of its payloads, and set some properties of the span
+ * based on the payloads.
+ *
+ * @param cs a CandidateRelationSpan
+ */
+ private void readPayload(CandidateRelationSpan cs) {
+ List<byte[]> payload = (List<byte[]>) cs.getPayloads();
+ int length = payload.get(0).length;
+ ByteBuffer bb = ByteBuffer.allocate(length);
+ bb.put(payload.get(0));
-// public short getLeftId() {
-// return leftId;
-// }
-//
-// public void setLeftId(short leftId) {
-// this.leftId = leftId;
-// }
-//
-// public short getRightId() {
-// return rightId;
-// }
-//
-// public void setRightId(short rightId) {
-// this.rightId = rightId;
-// }
+ int i;
+ switch (length) {
+ case 10: // Token to token
+ i = bb.getInt(0);
+ cs.setRightStart(i - 1);
+ cs.setRightEnd(i);
+ break;
- class CandidateRelationSpan extends CandidateSpan implements Comparable<CandidateSpan>{
-
- private int rightStart, rightEnd;
- private short leftId, rightId;
-
- public CandidateRelationSpan(Spans span) throws IOException{
- super(span);
- }
+ case 14: // Token to span
+ cs.setRightStart(bb.getInt(0));
+ cs.setRightEnd(bb.getInt(4));
+ break;
- @Override
- public int compareTo(CandidateSpan o) {
+ case 15: // Span to token
+ cs.setEnd(bb.getInt(0));
+ i = bb.getInt(5);
+ cs.setRightStart(i - 1);
+ cs.setRightEnd(i);
+ break;
- int sourcePositionComparison = super.compareTo(o);
-
- CandidateRelationSpan cs = (CandidateRelationSpan) o;
- if (sourcePositionComparison == 0){
- if (this.getRightStart() == cs.getRightStart()){
- if (this.getRightEnd() == cs.getRightEnd())
- return 0;
- if (this.getRightEnd() > cs.getRightEnd() )
- return 1;
- else return -1;
- }
- else if (this.getRightStart() < cs.getRightStart())
- return -1;
- else return 1;
- }
+ case 18: // Span to span
+ cs.setEnd(bb.getInt(0));
+ cs.setRightStart(bb.getInt(4));
+ cs.setRightEnd(bb.getInt(8));
+ break;
+ }
- return sourcePositionComparison;
- }
-
- public int getRightEnd() {
- return rightEnd;
- }
+ cs.setRightId(bb.getShort(length - 2)); //right id
+ cs.setLeftId(bb.getShort(length - 4)); //left id
+ cs.setSpanId(bb.getShort(length - 6)); //relation id
+ // Payload is cleared.
+ }
- public void setRightEnd(int rightEnd) {
- this.rightEnd = rightEnd;
- }
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)) {
+ if (!firstSpans.skipTo(target)) {
+ candidateList.clear();
+ return false;
+ }
+ }
+ setCandidateList();
+ matchPayload.clear();
+ isStartEnumeration = false;
+ return advance();
+ }
- public int getRightStart() {
- return rightStart;
- }
+ @Override
+ public long cost() {
+ return firstSpans.cost();
+ }
- public void setRightStart(int rightStart) {
- this.rightStart = rightStart;
- }
+ /**
+ * Returns the right start position of the current RelationSpan.
+ *
+ * @return the right start position of the current RelationSpan.
+ */
+ public int getRightStart() {
+ return rightStart;
+ }
- public short getLeftId() {
- return leftId;
- }
+ /**
+ * Sets the right start position of the current RelationSpan.
+ *
+ * @param rightStart the right start position of the current RelationSpan
+ */
+ public void setRightStart(int rightStart) {
+ this.rightStart = rightStart;
+ }
- public void setLeftId(short leftId) {
- this.leftId = leftId;
- }
+ /**
+ * Returns the right end position of the current RelationSpan.
+ *
+ * @return the right end position of the current RelationSpan.
+ */
+ public int getRightEnd() {
+ return rightEnd;
+ }
- public short getRightId() {
- return rightId;
- }
+ /**
+ * Sets the right end position of the current RelationSpan.
+ *
+ * @param rightEnd the right end position of the current RelationSpan.
+ */
+ public void setRightEnd(int rightEnd) {
+ this.rightEnd = rightEnd;
+ }
- public void setRightId(short rightId) {
- this.rightId = rightId;
- }
+ /**
+ * CandidateRelationSpan stores a state of RelationSpans. In a list,
+ * CandidateRelationSpans are ordered first by the position of the relation
+ * left side and then by the position of the relation right side.
+ */
+ class CandidateRelationSpan extends CandidateSpan implements
+ Comparable<CandidateSpan> {
- }
-
+ private int rightStart, rightEnd;
+ private short leftId, rightId;
+
+ public CandidateRelationSpan(Spans span) throws IOException {
+ super(span);
+ }
+
+ @Override
+ public int compareTo(CandidateSpan o) {
+
+ int sourcePositionComparison = super.compareTo(o);
+
+ CandidateRelationSpan cs = (CandidateRelationSpan) o;
+ if (sourcePositionComparison == 0) {
+ if (this.getRightStart() == cs.getRightStart()) {
+ if (this.getRightEnd() == cs.getRightEnd())
+ return 0;
+ if (this.getRightEnd() > cs.getRightEnd())
+ return 1;
+ else
+ return -1;
+ } else if (this.getRightStart() < cs.getRightStart())
+ return -1;
+ else
+ return 1;
+ }
+
+ return sourcePositionComparison;
+ }
+
+ public int getRightEnd() {
+ return rightEnd;
+ }
+
+ public void setRightEnd(int rightEnd) {
+ this.rightEnd = rightEnd;
+ }
+
+ public int getRightStart() {
+ return rightStart;
+ }
+
+ public void setRightStart(int rightStart) {
+ this.rightStart = rightStart;
+ }
+
+ public short getLeftId() {
+ return leftId;
+ }
+
+ public void setLeftId(short leftId) {
+ this.leftId = leftId;
+ }
+
+ public short getRightId() {
+ return rightId;
+ }
+
+ public void setRightId(short rightId) {
+ this.rightId = rightId;
+ }
+
+ }
+
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java
index 344d974..a9723e8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithAttribute.java
@@ -16,244 +16,293 @@
import de.ids_mannheim.korap.query.SpanAttributeQuery;
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
-/** Span enumeration of element or relation spans having and/or <em>not</em>
- * having some attributes. This class handles <em>and</em> operation on attributes.
+/**
+ * Span enumeration of element or relation spans (referent spans) having and/or
+ * <em>not</em> having some attributes. This class only handles <em>and</em>
+ * operation on attributes.
*
- * Use SpanOrQuery to perform <em>or</em> operation on attributes, i.e. choose
- * between two elements with some attribute constraints. Note that the attribute
- * constraints have to be in Conjunctive Normal Form (CNF).
- *
- * @author margaretha
+ * Use SpanOrQuery to perform <em>or</em> operation on attributes, i.e. choose
+ * between two elements with some attribute constraints. Note that the attribute
+ * constraints have to be formulated in Conjunctive Normal Form (CNF).
+ *
+ * @author margaretha
* */
-public class SpansWithAttribute extends SpansWithId{
-
- private SpansWithId withAttributeSpans;
- private List<AttributeSpans> attributeList;
- private List<AttributeSpans> notAttributeList;
-
- protected Logger logger = LoggerFactory.getLogger(SpansWithAttribute.class);
+public class SpansWithAttribute extends SpansWithId {
- public SpansWithAttribute(SpanWithAttributeQuery spanWithAttributeQuery,
- SpansWithId withIdSpans,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(spanWithAttributeQuery, context, acceptDocs, termContexts);
- withAttributeSpans = withIdSpans;
- withAttributeSpans.hasSpanId = true; // dummy setting enabling reading elementRef
- hasMoreSpans = withAttributeSpans.next();
-
- attributeList = new ArrayList<AttributeSpans>();
- notAttributeList = new ArrayList<AttributeSpans>();
-
- List<SpanQuery> sqs = spanWithAttributeQuery.getClauseList();
- if (sqs != null){
- for (SpanQuery sq: sqs){
- addAttributes(sq, context, acceptDocs, termContexts);
- }
- }
- else {
- addAttributes(spanWithAttributeQuery.getSecondClause(),
- context, acceptDocs, termContexts);
- }
- }
-
- private void addAttributes(SpanQuery sq, AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- AttributeSpans as = (AttributeSpans) sq.getSpans(context, acceptDocs, termContexts);
- if (((SpanAttributeQuery) sq).isNegation()){
- notAttributeList.add(as);
- as.next();
- }
- else {
- attributeList.add(as);
- hasMoreSpans &= as.next();
- }
- }
+ private SpansWithId referentSpans;
+ private List<AttributeSpans> attributeList;
+ private List<AttributeSpans> notAttributeList;
- @Override
- public boolean next() throws IOException {
- isStartEnumeration=false;
- return advance();
- }
-
- /** Search for the next match by first identify a possible
- * element position, and then ensuring that the element contains
- * all the attributes and <em>do not</em> contain any of the
- * not attributes.
- * */
- private boolean advance() throws IOException {
-
- while (hasMoreSpans && searchSpanPosition()){
- //logger.info("element: " + withAttributeSpans.start() + ","+ withAttributeSpans.end() +
- // " ref:"+withAttributeSpans.getSpanId());
-
- if (checkSpanId() && checkNotSpanId()){
- this.matchDocNumber = withAttributeSpans.doc();
- this.matchStartPosition = withAttributeSpans.start();
- this.matchEndPosition = withAttributeSpans.end();
- this.matchPayload = withAttributeSpans.getPayload();
- this.spanId = withAttributeSpans.getSpanId();
-
- if (attributeList.size() > 0)
- hasMoreSpans = attributeList.get(0).next();
-
- //logger.info("MATCH "+matchDocNumber);
-
- hasMoreSpans &= withAttributeSpans.next();
- return true;
- }
- }
- return false;
- }
-
- /** Ensuring all the attribute spans having the same elementRef with
- * the actual element's elementRef.
- * */
- private boolean checkSpanId() throws IOException{
-
- for (AttributeSpans attribute: attributeList){
- if (withAttributeSpans.getSpanId() != attribute.getSpanId()){
-// logger.info("attribute ref doesn't match");
- if (withAttributeSpans.getSpanId() < attribute.getSpanId())
- hasMoreSpans = attribute.next();
- else {
- hasMoreSpans = withAttributeSpans.next();
- }
-
- return false;
- }
- }
- return true;
- }
-
- /** Ensuring elements do not contain the not attributes. In other words,
- * the elementRef is not the same as the not attribute's elementRefs.
- * */
- private boolean checkNotSpanId() throws IOException{
- for (AttributeSpans notAttribute: notAttributeList){
- if (!notAttribute.isFinish() &&
- withAttributeSpans.start() == notAttribute.start() &&
- withAttributeSpans.getSpanId() == notAttribute.getSpanId()){
-// logger.info("not attribute ref exists");
- hasMoreSpans = withAttributeSpans.next();
- return false;
- }
- }
- return true;
- }
-
- /** Search for a possible element having the same doc and start position as
- * the attributes.
- * */
- private boolean searchSpanPosition() throws IOException {
+ protected Logger logger = LoggerFactory.getLogger(SpansWithAttribute.class);
- while (hasMoreSpans){
-
- if (withAttributeSpans.getSpanId() < 1){ // the element does not have an attribute
- hasMoreSpans = withAttributeSpans.next();
-// logger.info("skip");
- continue;
- }
-
- if (checkAttributeListPosition() &&
- checkNotAttributeListPosition()){
-// logger.info("element is found: "+ withAttributeSpans.start());
- return true;
- }
- }
-
- return false;
- }
-
- /** Advancing the not attributes to be in the same or greater doc# than
- * element doc#. If a not attribute is in the same doc, advance it to
- * be in the same or greater start position than the element.
- *
- * */
- private boolean checkNotAttributeListPosition() throws IOException{
-
- for (AttributeSpans a : notAttributeList){
- // advance the doc# of not AttributeSpans
- // logger.info("a "+a.start());
- while (!a.isFinish() && a.doc() <= withAttributeSpans.doc()){
-
- if (a.doc() == withAttributeSpans.doc() &&
- a.start() >= withAttributeSpans.start())
- break;
-
- if (!a.next()) a.setFinish(true);
- }
- }
-
- return true;
- }
-
- /** Advancing the attributes to be in the same doc and start position
- * as the element.
- * */
- private boolean checkAttributeListPosition() throws IOException{
- int currentPosition = withAttributeSpans.start();
- boolean isSame = true;
- boolean isFirst = true;
-
- for (AttributeSpans a : attributeList){
- if(!ensureSamePosition(withAttributeSpans, a)) return false;
- // logger.info("pos:" + withAttributeSpans.start());
- if (isFirst){
- isFirst = false;
- currentPosition = withAttributeSpans.start();
- }
- else if (currentPosition != withAttributeSpans.start()){
- currentPosition = withAttributeSpans.start();
- isSame = false;
-
- }
- }
- // logger.info("same pos: "+isSame+ ", pos "+withAttributeSpans.start());
- return isSame;
- }
-
- /** Advance the element or attribute spans to be in the same doc
- * and start position.
- * */
- private boolean ensureSamePosition(SpansWithId spans,
- AttributeSpans attributes) throws IOException {
-
- while (hasMoreSpans && ensureSameDoc(spans, attributes)){
- if (attributes.start() == spans.start())
- return true;
- else if (attributes.start() > spans.start())
- hasMoreSpans = spans.next();
- else
- hasMoreSpans= attributes.next();
- }
-
- return false;
- }
+ /**
+ * Creates a SpansWithAttribute from the given
+ * {@link SpanWithAttributeQuery} and {@link SpansWithId}, such as
+ * elementSpans and relationSpans.
+ *
+ * @param spanWithAttributeQuery a spanWithAttributeQuery
+ * @param spansWithId a SpansWithId
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @throws IOException
+ */
+ public SpansWithAttribute(SpanWithAttributeQuery spanWithAttributeQuery,
+ SpansWithId spansWithId, AtomicReaderContext context,
+ Bits acceptDocs, Map<Term, TermContext> termContexts)
+ throws IOException {
+ super(spanWithAttributeQuery, context, acceptDocs, termContexts);
+ referentSpans = spansWithId;
+ referentSpans.hasSpanId = true; // dummy setting enabling reading elementRef
+ hasMoreSpans = referentSpans.next();
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (withAttributeSpans.doc() < target)){
- if (!withAttributeSpans.skipTo(target)){
- return false;
- }
- }
- isStartEnumeration=false;
- return advance();
- }
+ attributeList = new ArrayList<AttributeSpans>();
+ notAttributeList = new ArrayList<AttributeSpans>();
- @Override
- public long cost() {
-
- long cost = 0;
- for (AttributeSpans as: attributeList){
- cost += as.cost();
- }
- for (AttributeSpans as: notAttributeList){
- cost += as.cost();
- }
- return withAttributeSpans.cost() + cost;
- }
+ List<SpanQuery> sqs = spanWithAttributeQuery.getClauseList();
+ if (sqs != null) {
+ for (SpanQuery sq : sqs) {
+ addAttributes((SpanAttributeQuery) sq, context, acceptDocs,
+ termContexts);
+ }
+ } else {
+ addAttributes(
+ (SpanAttributeQuery) spanWithAttributeQuery
+ .getSecondClause(),
+ context, acceptDocs, termContexts);
+ }
+ }
+ /**
+ * Adds the given {@link SpanAttributeQuery} to the attributeList or
+ * notAttributeList depending on the query, whether it is a negation or not.
+ *
+ * @param sq a SpanAttributeQuery
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @throws IOException
+ */
+ private void addAttributes(SpanAttributeQuery sq,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ AttributeSpans as = (AttributeSpans) sq.getSpans(context, acceptDocs,
+ termContexts);
+ if (sq.isNegation()) {
+ notAttributeList.add(as);
+ as.next();
+ } else {
+ attributeList.add(as);
+ hasMoreSpans &= as.next();
+ }
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration = false;
+ return advance();
+ }
+
+ /**
+ * Searches for the next match by first identify a possible element
+ * position, and then ensuring that the element contains all the attributes
+ * and <em>do not</em> contain any of the not attributes.
+ *
+ * @return <code>true</code> if the a match is found, <code>false</code>
+ * otherwise.
+ * @throws IOException
+ */
+ private boolean advance() throws IOException {
+
+ while (hasMoreSpans && searchSpanPosition()) {
+ //logger.info("element: " + withAttributeSpans.start() + ","+ withAttributeSpans.end() +
+ // " ref:"+withAttributeSpans.getSpanId());
+
+ if (checkReferentId() && checkNotReferentId()) {
+ this.matchDocNumber = referentSpans.doc();
+ this.matchStartPosition = referentSpans.start();
+ this.matchEndPosition = referentSpans.end();
+ this.matchPayload = referentSpans.getPayload();
+ this.spanId = referentSpans.getSpanId();
+
+ if (attributeList.size() > 0)
+ hasMoreSpans = attributeList.get(0).next();
+
+ hasMoreSpans &= referentSpans.next();
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Searches for a possible referentSpan having the same document number and
+ * start position as the attributes', and the position is different from the
+ * <em>not attributes'</em> positions.
+ *
+ * @return <code>true</code> if the referentSpan position is valid,
+ * <code>false</code> otherwise.
+ * @throws IOException
+ */
+ private boolean searchSpanPosition() throws IOException {
+ while (hasMoreSpans) {
+ if (referentSpans.getSpanId() < 1) { // the element does not have an attribute
+ hasMoreSpans = referentSpans.next();
+ continue;
+ }
+ if (checkAttributeListPosition()) {
+ advanceNotAttributes();
+ // logger.info("element is found: "+ withAttributeSpans.start());
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Advances the attributes to be in the same document and start position as
+ * the referentSpan.
+ *
+ * @return <code>true</code> if the attributes are in the same document and
+ * start position as the referentSpan.
+ * @throws IOException
+ */
+ private boolean checkAttributeListPosition() throws IOException {
+ int currentPosition = referentSpans.start();
+ boolean isSame = true;
+ boolean isFirst = true;
+
+ for (AttributeSpans a : attributeList) {
+ if (!ensureSamePosition(referentSpans, a))
+ return false;
+ // logger.info("pos:" + withAttributeSpans.start());
+ if (isFirst) {
+ isFirst = false;
+ currentPosition = referentSpans.start();
+ } else if (currentPosition != referentSpans.start()) {
+ currentPosition = referentSpans.start();
+ isSame = false;
+
+ }
+ }
+ // logger.info("same pos: "+isSame+ ", pos "+withAttributeSpans.start());
+ return isSame;
+ }
+
+ /**
+ * Advances the element or attribute spans to be in the same document and
+ * start position.
+ * */
+ private boolean ensureSamePosition(SpansWithId spans,
+ AttributeSpans attributes) throws IOException {
+
+ while (hasMoreSpans && ensureSameDoc(spans, attributes)) {
+ if (attributes.start() == spans.start())
+ return true;
+ else if (attributes.start() > spans.start())
+ hasMoreSpans = spans.next();
+ else
+ hasMoreSpans = attributes.next();
+ }
+
+ return false;
+ }
+
+ /**
+ * Advances the <em>not-attributes</em> to be in the same or greater
+ * document number than referentSpans' document number. If a
+ * <em>not-attribute</em> is in the same document, it is advanced to be in
+ * the same as or greater start position than the current referentSpan.
+ *
+ * @throws IOException
+ */
+ private void advanceNotAttributes() throws IOException {
+
+ for (AttributeSpans a : notAttributeList) {
+ // advance the doc# of not AttributeSpans
+ // logger.info("a "+a.start());
+ while (!a.isFinish() && a.doc() <= referentSpans.doc()) {
+
+ if (a.doc() == referentSpans.doc()
+ && a.start() >= referentSpans.start())
+ break;
+
+ if (!a.next())
+ a.setFinish(true);
+ }
+ }
+ //return true;
+ }
+
+ /**
+ * Ensures that the referent id of each attributeSpans in the attributeList
+ * is the same as the spanId of the actual referentSpans.
+ *
+ * @return <code>true</code> if the spanId of the current referentSpans is
+ * the same as all the referentId of all the attributeSpans in the
+ * attributeList, <code>false</code> otherwise.
+ * @throws IOException
+ */
+ private boolean checkReferentId() throws IOException {
+ for (AttributeSpans attribute : attributeList) {
+ if (referentSpans.getSpanId() != attribute.getReferentId()) {
+ if (referentSpans.getSpanId() < attribute.getReferentId())
+ hasMoreSpans = attribute.next();
+ else {
+ hasMoreSpans = referentSpans.next();
+ }
+
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Ensures that the referentSpans do <em>not</em> contain the
+ * <em>not attributes</em> (with negation). In other words, the spanId must
+ * not the same as the <em>not attribute</em>'s referentId.
+ *
+ * @return <code>true</code> if the referentSpan does not have the same
+ * spanId as the referentIds of all the not attributes,
+ * <code>false</code> otherwise.
+ * @throws IOException
+ */
+ private boolean checkNotReferentId() throws IOException {
+ for (AttributeSpans notAttribute : notAttributeList) {
+ if (!notAttribute.isFinish()
+ && referentSpans.start() == notAttribute.start()
+ && referentSpans.getSpanId() == notAttribute
+ .getReferentId()) {
+ hasMoreSpans = referentSpans.next();
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (referentSpans.doc() < target)) {
+ if (!referentSpans.skipTo(target)) {
+ return false;
+ }
+ }
+ isStartEnumeration = false;
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+
+ long cost = 0;
+ for (AttributeSpans as : attributeList) {
+ cost += as.cost();
+ }
+ for (AttributeSpans as : notAttributeList) {
+ cost += as.cost();
+ }
+ return referentSpans.cost() + cost;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java b/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
index 17844ae..b34a3f5 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
@@ -13,65 +13,88 @@
import de.ids_mannheim.korap.query.SpanTermWithIdQuery;
-/** Enumeration of termSpans whose an id. This class just wraps the usual Lucene TermSpans,
- * and adds spanid property. It reads the term-id from a term span payload. The term-id
- * is encoded in a short, starting from (offset) 0 in the payload.
+/**
+ * Enumeration of termSpans having an id. This class just wraps the usual Lucene
+ * TermSpans, and adds spanid property. It reads the term-id from a term span
+ * payload. The term-id is encoded in a short, starting from (offset) 0 in the
+ * payload.
*
- * @author margaretha
+ * @author margaretha
* */
-public class TermSpansWithId extends SpansWithId{
+public class TermSpansWithId extends SpansWithId {
- private TermSpans termSpans;
+ private TermSpans termSpans;
- public TermSpansWithId(SpanTermWithIdQuery spanTermWithIdQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(spanTermWithIdQuery, context, acceptDocs, termContexts);
- termSpans = (TermSpans) firstSpans;
- hasMoreSpans = termSpans.next();
- }
+ /**
+ * Creates TermSpansWithId from the given spanTermWithIdQuery.
+ *
+ * @param spanTermWithIdQuery a spanTermWithIdQuery
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @throws IOException
+ */
+ public TermSpansWithId(SpanTermWithIdQuery spanTermWithIdQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(spanTermWithIdQuery, context, acceptDocs, termContexts);
+ termSpans = (TermSpans) firstSpans;
+ hasMoreSpans = termSpans.next();
+ }
- @Override
- public boolean next() throws IOException {
- isStartEnumeration=false;
- return advance();
- }
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration = false;
+ return advance();
+ }
- private boolean advance() throws IOException{
- while (hasMoreSpans){
- readPayload();
- matchDocNumber = firstSpans.doc();
- matchStartPosition = firstSpans.start();
- matchEndPosition = firstSpans.end();
- hasMoreSpans = firstSpans.next();
- return true;
- }
- return false;
- }
-
- private void readPayload() throws IOException{
- List<byte[]> payload = (List<byte[]>) firstSpans.getPayload();
- ByteBuffer bb = ByteBuffer.allocate(payload.get(0).length);
- bb.put(payload.get(0));
- setSpanId(bb.getShort(0)); //term id
- }
-
+ /**
+ * Advances to the next match and set it as the current match.
+ *
+ * @return <code>true</code> if a match is found, <code>false</code>
+ * otherwise.
+ * @throws IOException
+ */
+ private boolean advance() throws IOException {
+ while (hasMoreSpans) {
+ readPayload();
+ matchDocNumber = firstSpans.doc();
+ matchStartPosition = firstSpans.start();
+ matchEndPosition = firstSpans.end();
+ hasMoreSpans = firstSpans.next();
+ return true;
+ }
+ return false;
+ }
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (firstSpans.doc() < target)){
- if (!firstSpans.skipTo(target)){
- return false;
- }
- }
- matchPayload.clear();
- isStartEnumeration=false;
- return advance();
- }
+ /**
+ * Read the payloads of the current firstspan and set the term id info from
+ * the payloads.
+ *
+ * @throws IOException
+ */
+ private void readPayload() throws IOException {
+ List<byte[]> payload = (List<byte[]>) firstSpans.getPayload();
+ ByteBuffer bb = ByteBuffer.allocate(payload.get(0).length);
+ bb.put(payload.get(0));
+ setSpanId(bb.getShort(0)); //term id
+ }
- @Override
- public long cost() {
- return firstSpans.cost(); // plus cost from reading payload
- }
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)) {
+ if (!firstSpans.skipTo(target)) {
+ return false;
+ }
+ }
+ matchPayload.clear();
+ isStartEnumeration = false;
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return firstSpans.cost(); // plus cost from reading payload
+ }
}