blob: abb8fdacd9d7595e04630406750c7278087adddb [file] [log] [blame]
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
Eliza Margaretha0170b882014-10-29 15:49:31 +00004import java.nio.ByteBuffer;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +00005import java.util.ArrayList;
6import java.util.Collections;
7import java.util.List;
8import java.util.Map;
9
10import org.apache.lucene.index.AtomicReaderContext;
11import org.apache.lucene.index.Term;
12import org.apache.lucene.index.TermContext;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000013import org.apache.lucene.search.spans.Spans;
14import org.apache.lucene.search.spans.TermSpans;
15import org.apache.lucene.util.Bits;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000016import org.slf4j.Logger;
17import org.slf4j.LoggerFactory;
18
19import de.ids_mannheim.korap.query.SpanRelationQuery;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000020
Eliza Margaretha493bfa92015-01-13 16:16:38 +000021/**
22 * Enumeration of spans denoting relations between two tokens/elements. The
23 * start and end of a RelationSpan always denote the start and end of the
24 * left-side token/element.
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000025 *
Eliza Margaretha493bfa92015-01-13 16:16:38 +000026 * There are 4 types of relations, which is differentiated by the payload length
27 * in bytes.
28 * <ol>
29 * <li>Token to token relation (1 int & 3 short, length: 10)</li>
30 * <li>Token to span (2 int & 3 short, length: 14)</li>
31 * <li>Span to token (int, byte, int, 3 short, length: 15)</li>
32 * <li>Span to Span (3 int & 3 short, length: 18)</li>
33 * </ol>
34 * Every integer value denotes the start/end position of the start/target of a
35 * relation, in this format: (sourceEndPos?, startTargetPos, endTargetPos?). The
36 * end position of a token is identical to its start position, and therefore not
37 * is saved in a payload.
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000038 *
Eliza Margaretha493bfa92015-01-13 16:16:38 +000039 * The short values denote the relation id, left id, and right id. The byte in
40 * relation #3 is just a dummy to create a different length from the relation
41 * #2.
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000042 *
Eliza Margaretha493bfa92015-01-13 16:16:38 +000043 * NOTE: Sorting of the candidate spans can alternatively be done in indexing,
44 * instead of here. (first by left positions and then by right positions)
45 *
46 * @author margaretha
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000047 * */
Eliza Margaretha493bfa92015-01-13 16:16:38 +000048public class RelationSpans extends RelationBaseSpans {
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000049
Eliza Margaretha493bfa92015-01-13 16:16:38 +000050 private int currentDoc, currentPosition;
51 private TermSpans relationTermSpan;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000052
Eliza Margaretha493bfa92015-01-13 16:16:38 +000053 protected Logger logger = LoggerFactory.getLogger(RelationSpans.class);
54 private List<CandidateRelationSpan> candidateList;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000055
Eliza Margaretha493bfa92015-01-13 16:16:38 +000056 /**
Eliza Margaretha7612bde2015-01-14 10:28:42 +000057 * Constructs RelationSpans from the given {@link SpanRelationQuery}.
Eliza Margaretha493bfa92015-01-13 16:16:38 +000058 *
59 * @param relationSpanQuery a SpanRelationQuery
60 * @param context
61 * @param acceptDocs
62 * @param termContexts
63 * @throws IOException
64 */
65 public RelationSpans(SpanRelationQuery relationSpanQuery,
66 AtomicReaderContext context, Bits acceptDocs,
67 Map<Term, TermContext> termContexts) throws IOException {
68 super(relationSpanQuery, context, acceptDocs, termContexts);
69 candidateList = new ArrayList<>();
70 relationTermSpan = (TermSpans) firstSpans;
71 hasMoreSpans = relationTermSpan.next();
72 }
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000073
Eliza Margaretha493bfa92015-01-13 16:16:38 +000074 @Override
75 public boolean next() throws IOException {
76 isStartEnumeration = false;
77 return advance();
78 }
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000079
Eliza Margaretha493bfa92015-01-13 16:16:38 +000080 /**
81 * Returns true if there is a next match by checking if the CandidateList is
82 * not empty and set the first element of the list as the next match.
83 * Otherwise, if the RelationSpan has not ended yet, try to set the
84 * CandidateList.
85 *
86 * @return true if there is a next match.
87 * @throws IOException
88 */
89 private boolean advance() throws IOException {
90 while (hasMoreSpans || !candidateList.isEmpty()) {
91 if (!candidateList.isEmpty()) {
92 CandidateRelationSpan cs = candidateList.get(0);
93 this.matchDocNumber = cs.getDoc();
94 this.matchStartPosition = cs.getStart();
95 this.matchEndPosition = cs.getEnd();
96 this.setRightStart(cs.getRightStart());
97 this.setRightEnd(cs.getRightEnd());
98 this.spanId = cs.getSpanId(); // relation id
99 this.leftId = cs.getLeftId();
100 this.rightId = cs.getRightId();
101 candidateList.remove(0);
102 return true;
103 } else {
104 setCandidateList();
105 currentDoc = relationTermSpan.doc();
106 currentPosition = relationTermSpan.start();
107 }
108 }
109 return false;
110 }
Eliza Margaretha3e50bc42014-10-22 15:29:15 +0000111
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000112 /**
113 * Setting the CandidateList by adding all relationTermSpan whose start
114 * position is the same as the current span position, and sort the
115 * candidateList.
116 *
117 * @throws IOException
118 */
119 private void setCandidateList() throws IOException {
120 while (hasMoreSpans && relationTermSpan.doc() == currentDoc
121 && relationTermSpan.start() == currentPosition) {
122 CandidateRelationSpan cs = new CandidateRelationSpan(
123 relationTermSpan);
124 readPayload(cs);
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000125
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000126 candidateList.add(cs);
127 hasMoreSpans = relationTermSpan.next();
128 }
129 Collections.sort(candidateList);
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000130
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000131 // for (CandidateRelationSpan cs:candidateList){
132 // System.out.println(cs.getStart()+","+cs.getEnd() //+" <size:" +payload.get(0).length
133 // +" target "+cs.getRightStart()+","+cs.getRightEnd() +" id:"+cs.getSpanId());
134 // }
135 }
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000136
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000137 /**
138 * Identify the relation type of the given {@link CandidateRelationSpan} by
139 * checking the length of its payloads, and set some properties of the span
140 * based on the payloads.
141 *
142 * @param cs a CandidateRelationSpan
143 */
144 private void readPayload(CandidateRelationSpan cs) {
145 List<byte[]> payload = (List<byte[]>) cs.getPayloads();
146 int length = payload.get(0).length;
147 ByteBuffer bb = ByteBuffer.allocate(length);
148 bb.put(payload.get(0));
Eliza Margarethad12cabb2014-10-27 17:45:34 +0000149
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000150 int i;
151 switch (length) {
152 case 10: // Token to token
153 i = bb.getInt(0);
154 cs.setRightStart(i - 1);
155 cs.setRightEnd(i);
156 break;
Eliza Margarethad12cabb2014-10-27 17:45:34 +0000157
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000158 case 14: // Token to span
159 cs.setRightStart(bb.getInt(0));
160 cs.setRightEnd(bb.getInt(4));
161 break;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000162
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000163 case 15: // Span to token
164 cs.setEnd(bb.getInt(0));
165 i = bb.getInt(5);
166 cs.setRightStart(i - 1);
167 cs.setRightEnd(i);
168 break;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000169
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000170 case 18: // Span to span
171 cs.setEnd(bb.getInt(0));
172 cs.setRightStart(bb.getInt(4));
173 cs.setRightEnd(bb.getInt(8));
174 break;
175 }
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000176
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000177 cs.setRightId(bb.getShort(length - 2)); //right id
178 cs.setLeftId(bb.getShort(length - 4)); //left id
179 cs.setSpanId(bb.getShort(length - 6)); //relation id
180 // Payload is cleared.
181 }
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000182
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000183 @Override
184 public boolean skipTo(int target) throws IOException {
185 if (hasMoreSpans && (firstSpans.doc() < target)) {
186 if (!firstSpans.skipTo(target)) {
187 candidateList.clear();
188 return false;
189 }
190 }
191 setCandidateList();
192 matchPayload.clear();
193 isStartEnumeration = false;
194 return advance();
195 }
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000196
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000197 @Override
198 public long cost() {
199 return firstSpans.cost();
200 }
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000201
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000202 /**
203 * Returns the right start position of the current RelationSpan.
204 *
205 * @return the right start position of the current RelationSpan.
206 */
207 public int getRightStart() {
208 return rightStart;
209 }
Eliza Margarethad12cabb2014-10-27 17:45:34 +0000210
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000211 /**
212 * Sets the right start position of the current RelationSpan.
213 *
214 * @param rightStart the right start position of the current RelationSpan
215 */
216 public void setRightStart(int rightStart) {
217 this.rightStart = rightStart;
218 }
Eliza Margarethad12cabb2014-10-27 17:45:34 +0000219
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000220 /**
221 * Returns the right end position of the current RelationSpan.
222 *
223 * @return the right end position of the current RelationSpan.
224 */
225 public int getRightEnd() {
226 return rightEnd;
227 }
Eliza Margarethad12cabb2014-10-27 17:45:34 +0000228
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000229 /**
230 * Sets the right end position of the current RelationSpan.
231 *
232 * @param rightEnd the right end position of the current RelationSpan.
233 */
234 public void setRightEnd(int rightEnd) {
235 this.rightEnd = rightEnd;
236 }
Eliza Margarethad12cabb2014-10-27 17:45:34 +0000237
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000238 /**
239 * CandidateRelationSpan stores a state of RelationSpans. In a list,
240 * CandidateRelationSpans are ordered first by the position of the relation
241 * left side and then by the position of the relation right side.
242 */
243 class CandidateRelationSpan extends CandidateSpan implements
244 Comparable<CandidateSpan> {
Eliza Margarethad12cabb2014-10-27 17:45:34 +0000245
Eliza Margaretha493bfa92015-01-13 16:16:38 +0000246 private int rightStart, rightEnd;
247 private short leftId, rightId;
248
249 public CandidateRelationSpan(Spans span) throws IOException {
250 super(span);
251 }
252
253 @Override
254 public int compareTo(CandidateSpan o) {
255
256 int sourcePositionComparison = super.compareTo(o);
257
258 CandidateRelationSpan cs = (CandidateRelationSpan) o;
259 if (sourcePositionComparison == 0) {
260 if (this.getRightStart() == cs.getRightStart()) {
261 if (this.getRightEnd() == cs.getRightEnd())
262 return 0;
263 if (this.getRightEnd() > cs.getRightEnd())
264 return 1;
265 else
266 return -1;
267 } else if (this.getRightStart() < cs.getRightStart())
268 return -1;
269 else
270 return 1;
271 }
272
273 return sourcePositionComparison;
274 }
275
276 public int getRightEnd() {
277 return rightEnd;
278 }
279
280 public void setRightEnd(int rightEnd) {
281 this.rightEnd = rightEnd;
282 }
283
284 public int getRightStart() {
285 return rightStart;
286 }
287
288 public void setRightStart(int rightStart) {
289 this.rightStart = rightStart;
290 }
291
292 public short getLeftId() {
293 return leftId;
294 }
295
296 public void setLeftId(short leftId) {
297 this.leftId = leftId;
298 }
299
300 public short getRightId() {
301 return rightId;
302 }
303
304 public void setRightId(short rightId) {
305 this.rightId = rightId;
306 }
307
308 }
309
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000310}