blob: 979b3591c33a907f4b0cc59030a0832c1ca098a7 [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanRelationQuery;
/**
* Enumeration of spans denoting relations between two
* tokens/elements. The start and end of a RelationSpan always denote
* the start and end of the left-side token/element.
*
* There are 4 types of relations, which is differentiated by the
* payload length in bytes.
* <ol>
* <li>Token to token relation (1 int & 3 short, length: 10)</li>
* <li>Token to span (2 int & 3 short, length: 14)</li>
* <li>Span to token (int, byte, int, 3 short, length: 15)</li>
* <li>Span to Span (3 int & 3 short, length: 18)</li>
* </ol>
* Every integer value denotes the start/end position of the
* start/target of a relation, in this format: (sourceEndPos?,
* startTargetPos, endTargetPos?). The end position of a token is
* identical to its start position, and therefore not is saved in a
* payload.
*
* The short values denote the relation id, left id, and right id. The
* byte in relation #3 is just a dummy to create a different length
* from the relation #2.
*
* NOTE: Sorting of the candidate spans can alternatively be done in
* indexing, instead of here. (first by left positions and then by
* right positions)
*
* The class number of relation source is always 1 and that of
* relation target is always 2 regardless of the relation direction.
*
* @author margaretha
* */
public class RelationSpans extends RelationBaseSpans {
private int currentDoc, currentPosition;
private int direction;
private TermSpans relationTermSpan;
protected Logger logger = LoggerFactory.getLogger(RelationSpans.class);
private List<CandidateRelationSpan> candidateList;
private byte tempSourceNum, tempTargetNum;
private byte sourceClass, targetClass;
/**
* Constructs RelationSpans from the given
* {@link SpanRelationQuery}.
*
* @param relationSpanQuery
* a SpanRelationQuery
* @param context
* @param acceptDocs
* @param termContexts
* @throws IOException
*/
public RelationSpans (SpanRelationQuery relationSpanQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts)
throws IOException {
super(relationSpanQuery, context, acceptDocs, termContexts);
direction = relationSpanQuery.getDirection();
tempSourceNum = relationSpanQuery.getTempSourceNum();
tempTargetNum = relationSpanQuery.getTempTargetNum();
sourceClass = relationSpanQuery.getSourceClass();
targetClass = relationSpanQuery.getTargetClass();
candidateList = new ArrayList<>();
relationTermSpan = (TermSpans) firstSpans;
hasMoreSpans = relationTermSpan.next();
}
@Override
public boolean next () throws IOException {
isStartEnumeration = false;
return advance();
}
/**
* Returns true if there is a next match by checking if the
* CandidateList is
* not empty and set the first element of the list as the next
* match.
* Otherwise, if the RelationSpan has not ended yet, try to set
* the
* CandidateList.
*
* @return true if there is a next match.
* @throws IOException
*/
private boolean advance () throws IOException {
while (hasMoreSpans || !candidateList.isEmpty()) {
if (!candidateList.isEmpty()) {
CandidateRelationSpan cs = candidateList.get(0);
this.matchDocNumber = cs.getDoc();
this.matchStartPosition = cs.getStart();
this.matchEndPosition = cs.getEnd();
this.matchPayload = cs.getPayloads();
this.setRightStart(cs.getRightStart());
this.setRightEnd(cs.getRightEnd());
this.spanId = cs.getSpanId(); // relation id
this.leftId = cs.getLeftId();
this.rightId = cs.getRightId();
candidateList.remove(0);
return true;
}
else {
setCandidateList();
currentDoc = relationTermSpan.doc();
currentPosition = relationTermSpan.start();
}
}
return false;
}
/**
* Setting the CandidateList by adding all relationTermSpan whose
* start
* position is the same as the current span position, and sort the
* candidateList.
*
* @throws IOException
*/
private void setCandidateList () throws IOException {
while (hasMoreSpans && relationTermSpan.doc() == currentDoc
&& relationTermSpan.start() == currentPosition) {
CandidateRelationSpan cs = new CandidateRelationSpan(
relationTermSpan);
readPayload(cs);
setPayload(cs);
candidateList.add(cs);
hasMoreSpans = relationTermSpan.next();
}
Collections.sort(candidateList);
}
/**
* Identify the relation type of the given
* {@link CandidateRelationSpan} by
* checking the length of its payloads, and set some properties of
* the span
* based on the payloads.
*
* @param cs
* a CandidateRelationSpan
*/
private void readPayload (CandidateRelationSpan cs) {
List<byte[]> payload = (List<byte[]>) cs.getPayloads();
int length = payload.get(0).length;
ByteBuffer bb = ByteBuffer.allocate(length);
bb.put(payload.get(0));
cs.setLeftStart(cs.start);
int i;
switch (length) {
case 10: // Token to token
i = bb.getInt(0);
cs.setLeftEnd(cs.start + 1);
cs.setRightStart(i);
cs.setRightEnd(i + 1);
break;
case 14: // Token to span
cs.setLeftEnd(cs.start + 1);
cs.setRightStart(bb.getInt(0));
cs.setRightEnd(bb.getInt(4));
break;
case 15: // Span to token
cs.setEnd(bb.getInt(0));
cs.setLeftEnd(cs.end);
i = bb.getInt(5);
cs.setRightStart(i);
cs.setRightEnd(i + 1);
break;
case 18: // Span to span
cs.setEnd(bb.getInt(0));
cs.setLeftEnd(cs.end);
cs.setRightStart(bb.getInt(4));
cs.setRightEnd(bb.getInt(8));
break;
}
cs.setRightId(bb.getShort(length - 2)); //right id
cs.setLeftId(bb.getShort(length - 4)); //left id
cs.setSpanId(bb.getShort(length - 6)); //relation id
// Payload is cleared.
}
private void setPayload (CandidateRelationSpan cs) throws IOException {
ArrayList<byte[]> payload = new ArrayList<byte[]>();
if (relationTermSpan.isPayloadAvailable()) {
payload.addAll(relationTermSpan.getPayload());
}
if (direction == 0) {
payload.add(createClassPayload(cs.getLeftStart(), cs.getLeftEnd(),
tempSourceNum, false));
payload.add(createClassPayload(cs.getRightStart(),
cs.getRightEnd(), tempTargetNum, false));
if (sourceClass > 0) {
payload.add(createClassPayload(cs.getLeftStart(),
cs.getLeftEnd(), sourceClass, true));
}
if (targetClass > 0) {
payload.add(createClassPayload(cs.getRightStart(),
cs.getRightEnd(), targetClass, true));
}
}
else {
payload.add(createClassPayload(cs.getRightStart(),
cs.getRightEnd(), tempSourceNum, false));
payload.add(createClassPayload(cs.getLeftStart(), cs.getLeftEnd(),
tempTargetNum, false));
if (sourceClass > 0) {
payload.add(createClassPayload(cs.getRightStart(),
cs.getRightEnd(), sourceClass, true));
}
if (targetClass > 0) {
payload.add(createClassPayload(cs.getLeftStart(),
cs.getLeftEnd(), targetClass, true));
}
}
cs.setPayloads(payload);
}
private byte[] createClassPayload (int start, int end, byte classNumber,
boolean keep) {
ByteBuffer buffer = null;
if (keep) {
buffer = ByteBuffer.allocate(9);
}
else {
buffer = ByteBuffer.allocate(10);
}
buffer.putInt(start);
buffer.putInt(end);
buffer.put(classNumber);
return buffer.array();
}
@Override
public boolean skipTo (int target) throws IOException {
if (hasMoreSpans && (firstSpans.doc() < target)) {
if (!firstSpans.skipTo(target)) {
candidateList.clear();
return false;
}
}
setCandidateList();
matchPayload.clear();
isStartEnumeration = false;
return advance();
}
@Override
public long cost () {
return firstSpans.cost();
}
/**
* Returns the right start position of the current RelationSpan.
*
* @return the right start position of the current RelationSpan.
*/
public int getRightStart () {
return rightStart;
}
/**
* Sets the right start position of the current RelationSpan.
*
* @param rightStart
* the right start position of the current RelationSpan
*/
public void setRightStart (int rightStart) {
this.rightStart = rightStart;
}
/**
* Returns the right end position of the current RelationSpan.
*
* @return the right end position of the current RelationSpan.
*/
public int getRightEnd () {
return rightEnd;
}
/**
* Sets the right end position of the current RelationSpan.
*
* @param rightEnd
* the right end position of the current RelationSpan.
*/
public void setRightEnd (int rightEnd) {
this.rightEnd = rightEnd;
}
/**
* CandidateRelationSpan stores a state of RelationSpans. In a
* list,
* CandidateRelationSpans are ordered first by the position of the
* relation
* left side.
*/
class CandidateRelationSpan extends CandidateSpan {
private int rightStart, rightEnd;
private short leftId, rightId;
public CandidateRelationSpan (Spans span) throws IOException {
super(span);
}
public int getRightEnd () {
return rightEnd;
}
public void setRightEnd (int rightEnd) {
this.rightEnd = rightEnd;
}
public int getRightStart () {
return rightStart;
}
public void setRightStart (int rightStart) {
this.rightStart = rightStart;
}
public short getLeftId () {
return leftId;
}
public void setLeftId (short leftId) {
this.leftId = leftId;
}
public short getRightId () {
return rightId;
}
public void setRightId (short rightId) {
this.rightId = rightId;
}
}
}