blob: cf70cb9a8926f02636d52dceb1c429cd5aa22f6d [file] [log] [blame]
Eliza Margarethafb25cef2014-06-06 14:19:07 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
4import java.nio.ByteBuffer;
5import java.util.ArrayList;
6import java.util.Collections;
7import java.util.List;
8import java.util.Map;
9
10import org.apache.lucene.index.AtomicReaderContext;
11import org.apache.lucene.index.Term;
12import org.apache.lucene.index.TermContext;
13import org.apache.lucene.search.spans.Spans;
Eliza Margaretha8551e5b2014-12-15 16:46:18 +000014import org.apache.lucene.search.spans.TermSpans;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000015import org.apache.lucene.util.Bits;
16import org.slf4j.Logger;
17import org.slf4j.LoggerFactory;
18
Eliza Margarethafb25cef2014-06-06 14:19:07 +000019import de.ids_mannheim.korap.query.SpanAttributeQuery;
20
Nils Diewaldbb33da22015-03-04 16:24:25 +000021/**
22 * UPDATE THIS!
23 * Span enumeration of attributes which are term spans with special
24 * payload
25 * assignments referring to another span (e.g. element/relation span)
26 * to which
27 * an attribute span belongs. The class is basically a wrapper of
28 * Lucene {@link TermSpans} with additional functionality regarding
29 * element/relation
30 * reference. Element/relation id is annotated ascendingly starting
31 * from the
Eliza Margaretha8551e5b2014-12-15 16:46:18 +000032 * left side. <br/>
33 * <br/>
Nils Diewaldbb33da22015-03-04 16:24:25 +000034 * The enumeration is ordered firstly by the start position of the
35 * attribute and
36 * secondly by the element/relation id descendingly. This order helps
37 * to match
Eliza Margaretha8551e5b2014-12-15 16:46:18 +000038 * element and attributes faster.
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000039 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 * AttributeSpans contain information about the elements they belongs
41 * to, thus
Eliza Margaretha493bfa92015-01-13 16:16:38 +000042 * querying them alone is sufficient to get
43 * "any element having a specific attribute".
44 *
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000045 * @author margaretha
Eliza Margarethafb25cef2014-06-06 14:19:07 +000046 * */
margaretha9d0f76a2015-03-19 10:10:39 +010047public class AttributeSpans extends SimpleSpans {
Nils Diewald1455e1e2014-08-01 16:12:43 +000048
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000049 private List<CandidateAttributeSpan> candidateList;
50 private int currentDoc, currentPosition;
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000051 private boolean isFinish;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000052
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000053 protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
Eliza Margarethafb25cef2014-06-06 14:19:07 +000054
Nils Diewaldbb33da22015-03-04 16:24:25 +000055
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000056 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000057 * Constructs Attributespans based on the specified
58 * SpanAttributeQuery.
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000059 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000060 * @param spanAttributeQuery
61 * a spanAttributeQuery
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000062 * @param context
63 * @param acceptDocs
64 * @param termContexts
65 * @throws IOException
66 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000067 public AttributeSpans (SpanAttributeQuery spanAttributeQuery,
68 AtomicReaderContext context, Bits acceptDocs,
69 Map<Term, TermContext> termContexts)
70 throws IOException {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000071 super(spanAttributeQuery, context, acceptDocs, termContexts);
margaretha9d0f76a2015-03-19 10:10:39 +010072 this.hasSpanId = true;
73
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000074 candidateList = new ArrayList<>();
75 hasMoreSpans = firstSpans.next();
76 if (hasMoreSpans) {
77 currentDoc = firstSpans.doc();
78 currentPosition = firstSpans.start();
79 }
80 }
Eliza Margarethac7fb7312014-07-25 14:11:36 +000081
Nils Diewaldbb33da22015-03-04 16:24:25 +000082
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000083 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +000084 public boolean next () throws IOException {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000085 isStartEnumeration = false;
86 matchPayload.clear();
87 return advance();
88 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +000089
Nils Diewaldbb33da22015-03-04 16:24:25 +000090
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000091 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000092 * Moves to the next match by checking the candidate match list or
93 * setting
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +000094 * the list first when it is empty.
95 *
96 * @return true if a match is found
97 * @throws IOException
98 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000099 private boolean advance () throws IOException {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000100 while (hasMoreSpans || !candidateList.isEmpty()) {
101 if (!candidateList.isEmpty()) {
102 // set the current match from the first CandidateAttributeSpan
103 // in the candidate list
104 CandidateAttributeSpan cs = candidateList.get(0);
105 this.matchDocNumber = cs.getDoc();
106 this.matchStartPosition = cs.getStart();
107 this.matchEndPosition = cs.getEnd();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000108 this.setSpanId(cs.getSpanId()); // referentId
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000109 candidateList.remove(0);
110 return true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000111 }
112 else {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000113 setCandidateList();
114 currentDoc = firstSpans.doc();
115 currentPosition = firstSpans.start();
116 }
117 }
118 return false;
119 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000120
Nils Diewaldbb33da22015-03-04 16:24:25 +0000121
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000122 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000123 * Collects all the attributes in the same start position and sort
124 * them by
125 * element/relation Id in a reverse order (the ones with the
126 * bigger
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000127 * element/relation Id first).
128 *
129 * @throws IOException
130 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000131 private void setCandidateList () throws IOException {
Eliza Margaretha38a94662014-11-20 13:48:00 +0000132
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000133 while (hasMoreSpans && firstSpans.doc() == currentDoc
134 && firstSpans.start() == currentPosition) {
Eliza Margaretha38a94662014-11-20 13:48:00 +0000135
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000136 candidateList.add(createCandidateSpan());
137 hasMoreSpans = firstSpans.next();
138 }
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000139
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000140 Collections.sort(candidateList);
141 Collections.reverse(candidateList);
142 }
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000143
Nils Diewaldbb33da22015-03-04 16:24:25 +0000144
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000145 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000146 * Creates a CandidateAttributeSpan based on the child span and
147 * set the
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000148 * spanId and elementEnd from its payloads.
149 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000150 * @param firstSpans
151 * an AttributeSpans
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000152 * @return a CandidateAttributeSpan
153 * @throws IOException
154 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000155 private CandidateAttributeSpan createCandidateSpan () throws IOException {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000156 List<byte[]> payload = (List<byte[]>) firstSpans.getPayload();
157 ByteBuffer wrapper = ByteBuffer.wrap(payload.get(0));
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000158
Nils Diewaldbb33da22015-03-04 16:24:25 +0000159 short spanId;
160 int start = 0, end;
Eliza Margaretha2db5e232015-03-04 10:20:01 +0000161
Nils Diewaldbb33da22015-03-04 16:24:25 +0000162 if (payload.get(0).length == 6) {
163 end = wrapper.getInt(0);
164 spanId = wrapper.getShort(4);
165 return new CandidateAttributeSpan(firstSpans, spanId, end);
166 }
167 else if (payload.get(0).length == 10) {
margaretha9d0f76a2015-03-19 10:10:39 +0100168 start = wrapper.getInt(0);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000169 end = wrapper.getInt(4);
170 spanId = wrapper.getShort(8);
171 return new CandidateAttributeSpan(firstSpans, spanId, start, end);
172 }
173
174 throw new NullPointerException("Missing element end in payloads.");
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000175 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000176
Nils Diewaldbb33da22015-03-04 16:24:25 +0000177
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000178 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000179 * Tells if the enumeration of the AttributeSpans has come to an
180 * end.
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000181 *
182 * @return true if the enumeration has finished.
183 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000184 public boolean isFinish () {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000185 return isFinish;
186 }
187
Nils Diewaldbb33da22015-03-04 16:24:25 +0000188
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000189 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000190 * Sets true if the enumeration of the AttributeSpans has come to
191 * an end.
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000192 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000193 * @param isFinish
194 * <code>true</code> if the enumeration of the
195 * AttributeSpans has come to an end,
196 * <code>false</code> otherwise.
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000197 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000198 public void setFinish (boolean isFinish) {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000199 this.isFinish = isFinish;
200 }
201
Nils Diewaldbb33da22015-03-04 16:24:25 +0000202
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000203 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000204 public boolean skipTo (int target) throws IOException {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000205 if (hasMoreSpans && (firstSpans.doc() < target)) {
206 if (!firstSpans.skipTo(target)) {
207 candidateList.clear();
208 return false;
209 }
210 }
211 setCandidateList();
212 matchPayload.clear();
213 isStartEnumeration = false;
214 return advance();
215 }
216
Nils Diewaldbb33da22015-03-04 16:24:25 +0000217
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000218 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000219 public long cost () {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000220 return firstSpans.cost();
221 }
222
223 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000224 * CandidateAttributeSpan contains information about an Attribute
225 * span. All
226 * attribute spans occurring in an identical position are
227 * collected as
228 * CandidateAttributeSpans. The list of these
229 * CandidateAttributeSpans are
230 * sorted based on the span ids to which the attributes belong to.
231 * The
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000232 * attributes with smaller spanIds come first on the list.
233 *
234 * */
Eliza Margarethac8d59202014-12-16 16:21:16 +0000235 class CandidateAttributeSpan extends CandidateSpan implements
236 Comparable<CandidateSpan> {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000237
238 private short spanId;
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000239
Nils Diewaldbb33da22015-03-04 16:24:25 +0000240
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000241 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000242 * Construct a CandidateAttributeSpan based on the given span,
243 * spanId,
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000244 * and elementEnd.
245 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000246 * @param span
247 * an AttributeSpans
248 * @param spanId
249 * the element or relation span id to which the
250 * current
251 * state of the specified AttributeSpans belongs
252 * to.
253 * @param elementEnd
254 * the end position of the element or relation span
255 * to
256 * which the current state of the specified
257 * AttributeSpans
258 * belongs to.
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000259 * @throws IOException
260 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000261 public CandidateAttributeSpan (Spans span, short spanId, int elementEnd)
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000262 throws IOException {
263 super(span);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000264 setSpanId(spanId);
265 this.end = elementEnd;
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000266 }
267
Eliza Margaretha2db5e232015-03-04 10:20:01 +0000268
Nils Diewaldbb33da22015-03-04 16:24:25 +0000269 public CandidateAttributeSpan (Spans span, short spanId, int start,
270 int end) throws IOException {
271 super(span);
272 setSpanId(spanId);
273 this.start = start;
274 this.end = end;
275 }
276
277
278 public void setSpanId (short spanId) {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000279 this.spanId = spanId;
280 }
281
Nils Diewaldbb33da22015-03-04 16:24:25 +0000282
283 public short getSpanId () {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000284 return spanId;
285 }
286
Nils Diewaldbb33da22015-03-04 16:24:25 +0000287
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000288 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000289 public int compareTo (CandidateSpan o) {
Eliza Margaretha6a2e80b2014-12-02 17:03:23 +0000290 CandidateAttributeSpan cs = (CandidateAttributeSpan) o;
291 if (this.spanId == cs.spanId)
292 return 0;
293 else if (this.spanId > cs.spanId)
294 return 1;
295 return -1;
296 }
297 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000298}