blob: 0362bfad023164fed662b0735c4612ce4e3b6238 [file] [log] [blame]
Eliza Margarethafb25cef2014-06-06 14:19:07 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
Eliza Margaretha997ccde2014-07-04 09:20:35 +00004import java.util.ArrayList;
5import java.util.List;
Eliza Margarethafb25cef2014-06-06 14:19:07 +00006import java.util.Map;
7
8import org.apache.lucene.index.AtomicReaderContext;
9import org.apache.lucene.index.Term;
10import org.apache.lucene.index.TermContext;
Eliza Margaretha997ccde2014-07-04 09:20:35 +000011import org.apache.lucene.search.spans.SpanQuery;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000012import org.apache.lucene.util.Bits;
13import org.slf4j.Logger;
14import org.slf4j.LoggerFactory;
15
Eliza Margaretha997ccde2014-07-04 09:20:35 +000016import de.ids_mannheim.korap.query.SpanAttributeQuery;
Eliza Margaretha98c200e2014-10-15 13:59:58 +000017import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000018
Eliza Margaretha493bfa92015-01-13 16:16:38 +000019/**
20 * Span enumeration of element or relation spans (referent spans) having and/or
21 * <em>not</em> having some attributes. This class only handles <em>and</em>
22 * operation on attributes.
Eliza Margarethafb25cef2014-06-06 14:19:07 +000023 *
Eliza Margaretha493bfa92015-01-13 16:16:38 +000024 * Use SpanOrQuery to perform <em>or</em> operation on attributes, i.e. choose
25 * between two elements with some attribute constraints. Note that the attribute
26 * constraints have to be formulated in Conjunctive Normal Form (CNF).
27 *
28 * @author margaretha
Eliza Margarethafb25cef2014-06-06 14:19:07 +000029 * */
Eliza Margaretha493bfa92015-01-13 16:16:38 +000030public class SpansWithAttribute extends SpansWithId {
Nils Diewald1455e1e2014-08-01 16:12:43 +000031
Eliza Margaretha493bfa92015-01-13 16:16:38 +000032 private SpansWithId referentSpans;
33 private List<AttributeSpans> attributeList;
34 private List<AttributeSpans> notAttributeList;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000035
Eliza Margaretha493bfa92015-01-13 16:16:38 +000036 protected Logger logger = LoggerFactory.getLogger(SpansWithAttribute.class);
Eliza Margaretha997ccde2014-07-04 09:20:35 +000037
Eliza Margaretha493bfa92015-01-13 16:16:38 +000038 /**
Eliza Margaretha7612bde2015-01-14 10:28:42 +000039 * Constructs SpansWithAttribute from the given
Eliza Margaretha493bfa92015-01-13 16:16:38 +000040 * {@link SpanWithAttributeQuery} and {@link SpansWithId}, such as
41 * elementSpans and relationSpans.
42 *
43 * @param spanWithAttributeQuery a spanWithAttributeQuery
44 * @param spansWithId a SpansWithId
45 * @param context
46 * @param acceptDocs
47 * @param termContexts
48 * @throws IOException
49 */
50 public SpansWithAttribute(SpanWithAttributeQuery spanWithAttributeQuery,
51 SpansWithId spansWithId, AtomicReaderContext context,
52 Bits acceptDocs, Map<Term, TermContext> termContexts)
53 throws IOException {
54 super(spanWithAttributeQuery, context, acceptDocs, termContexts);
55 referentSpans = spansWithId;
56 referentSpans.hasSpanId = true; // dummy setting enabling reading elementRef
57 hasMoreSpans = referentSpans.next();
Eliza Margarethafb25cef2014-06-06 14:19:07 +000058
Eliza Margaretha493bfa92015-01-13 16:16:38 +000059 attributeList = new ArrayList<AttributeSpans>();
60 notAttributeList = new ArrayList<AttributeSpans>();
Eliza Margarethafb25cef2014-06-06 14:19:07 +000061
Eliza Margaretha493bfa92015-01-13 16:16:38 +000062 List<SpanQuery> sqs = spanWithAttributeQuery.getClauseList();
63 if (sqs != null) {
64 for (SpanQuery sq : sqs) {
65 addAttributes((SpanAttributeQuery) sq, context, acceptDocs,
66 termContexts);
67 }
68 } else {
69 addAttributes(
70 (SpanAttributeQuery) spanWithAttributeQuery
71 .getSecondClause(),
72 context, acceptDocs, termContexts);
73 }
74 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +000075
Eliza Margaretha493bfa92015-01-13 16:16:38 +000076 /**
77 * Adds the given {@link SpanAttributeQuery} to the attributeList or
78 * notAttributeList depending on the query, whether it is a negation or not.
79 *
80 * @param sq a SpanAttributeQuery
81 * @param context
82 * @param acceptDocs
83 * @param termContexts
84 * @throws IOException
85 */
86 private void addAttributes(SpanAttributeQuery sq,
87 AtomicReaderContext context, Bits acceptDocs,
88 Map<Term, TermContext> termContexts) throws IOException {
89 AttributeSpans as = (AttributeSpans) sq.getSpans(context, acceptDocs,
90 termContexts);
91 if (sq.isNegation()) {
92 notAttributeList.add(as);
93 as.next();
94 } else {
95 attributeList.add(as);
96 hasMoreSpans &= as.next();
97 }
98 }
99
100 @Override
101 public boolean next() throws IOException {
102 isStartEnumeration = false;
103 return advance();
104 }
105
106 /**
107 * Searches for the next match by first identify a possible element
108 * position, and then ensuring that the element contains all the attributes
109 * and <em>do not</em> contain any of the not attributes.
110 *
111 * @return <code>true</code> if the a match is found, <code>false</code>
112 * otherwise.
113 * @throws IOException
114 */
115 private boolean advance() throws IOException {
116
117 while (hasMoreSpans && searchSpanPosition()) {
118 //logger.info("element: " + withAttributeSpans.start() + ","+ withAttributeSpans.end() +
119 // " ref:"+withAttributeSpans.getSpanId());
120
121 if (checkReferentId() && checkNotReferentId()) {
122 this.matchDocNumber = referentSpans.doc();
123 this.matchStartPosition = referentSpans.start();
124 this.matchEndPosition = referentSpans.end();
125 this.matchPayload = referentSpans.getPayload();
126 this.spanId = referentSpans.getSpanId();
127
128 if (attributeList.size() > 0)
129 hasMoreSpans = attributeList.get(0).next();
130
131 hasMoreSpans &= referentSpans.next();
132 return true;
133 }
134 }
135 return false;
136 }
137
138 /**
139 * Searches for a possible referentSpan having the same document number and
140 * start position as the attributes', and the position is different from the
141 * <em>not attributes'</em> positions.
142 *
143 * @return <code>true</code> if the referentSpan position is valid,
144 * <code>false</code> otherwise.
145 * @throws IOException
146 */
147 private boolean searchSpanPosition() throws IOException {
148 while (hasMoreSpans) {
149 if (referentSpans.getSpanId() < 1) { // the element does not have an attribute
150 hasMoreSpans = referentSpans.next();
151 continue;
152 }
153 if (checkAttributeListPosition()) {
154 advanceNotAttributes();
155 // logger.info("element is found: "+ withAttributeSpans.start());
156 return true;
157 }
158 }
159 return false;
160 }
161
162 /**
163 * Advances the attributes to be in the same document and start position as
164 * the referentSpan.
165 *
166 * @return <code>true</code> if the attributes are in the same document and
167 * start position as the referentSpan.
168 * @throws IOException
169 */
170 private boolean checkAttributeListPosition() throws IOException {
171 int currentPosition = referentSpans.start();
172 boolean isSame = true;
173 boolean isFirst = true;
174
175 for (AttributeSpans a : attributeList) {
176 if (!ensureSamePosition(referentSpans, a))
177 return false;
178 // logger.info("pos:" + withAttributeSpans.start());
179 if (isFirst) {
180 isFirst = false;
181 currentPosition = referentSpans.start();
182 } else if (currentPosition != referentSpans.start()) {
183 currentPosition = referentSpans.start();
184 isSame = false;
185
186 }
187 }
188 // logger.info("same pos: "+isSame+ ", pos "+withAttributeSpans.start());
189 return isSame;
190 }
191
192 /**
193 * Advances the element or attribute spans to be in the same document and
194 * start position.
195 * */
196 private boolean ensureSamePosition(SpansWithId spans,
197 AttributeSpans attributes) throws IOException {
198
199 while (hasMoreSpans && ensureSameDoc(spans, attributes)) {
200 if (attributes.start() == spans.start())
201 return true;
202 else if (attributes.start() > spans.start())
203 hasMoreSpans = spans.next();
204 else
205 hasMoreSpans = attributes.next();
206 }
207
208 return false;
209 }
210
211 /**
212 * Advances the <em>not-attributes</em> to be in the same or greater
213 * document number than referentSpans' document number. If a
214 * <em>not-attribute</em> is in the same document, it is advanced to be in
215 * the same as or greater start position than the current referentSpan.
216 *
217 * @throws IOException
218 */
219 private void advanceNotAttributes() throws IOException {
220
221 for (AttributeSpans a : notAttributeList) {
222 // advance the doc# of not AttributeSpans
223 // logger.info("a "+a.start());
224 while (!a.isFinish() && a.doc() <= referentSpans.doc()) {
225
226 if (a.doc() == referentSpans.doc()
227 && a.start() >= referentSpans.start())
228 break;
229
230 if (!a.next())
231 a.setFinish(true);
232 }
233 }
234 //return true;
235 }
236
237 /**
238 * Ensures that the referent id of each attributeSpans in the attributeList
239 * is the same as the spanId of the actual referentSpans.
240 *
241 * @return <code>true</code> if the spanId of the current referentSpans is
242 * the same as all the referentId of all the attributeSpans in the
243 * attributeList, <code>false</code> otherwise.
244 * @throws IOException
245 */
246 private boolean checkReferentId() throws IOException {
247 for (AttributeSpans attribute : attributeList) {
248 if (referentSpans.getSpanId() != attribute.getReferentId()) {
249 if (referentSpans.getSpanId() < attribute.getReferentId())
250 hasMoreSpans = attribute.next();
251 else {
252 hasMoreSpans = referentSpans.next();
253 }
254
255 return false;
256 }
257 }
258 return true;
259 }
260
261 /**
262 * Ensures that the referentSpans do <em>not</em> contain the
263 * <em>not attributes</em> (with negation). In other words, the spanId must
264 * not the same as the <em>not attribute</em>'s referentId.
265 *
266 * @return <code>true</code> if the referentSpan does not have the same
267 * spanId as the referentIds of all the not attributes,
268 * <code>false</code> otherwise.
269 * @throws IOException
270 */
271 private boolean checkNotReferentId() throws IOException {
272 for (AttributeSpans notAttribute : notAttributeList) {
273 if (!notAttribute.isFinish()
274 && referentSpans.start() == notAttribute.start()
275 && referentSpans.getSpanId() == notAttribute
276 .getReferentId()) {
277 hasMoreSpans = referentSpans.next();
278 return false;
279 }
280 }
281 return true;
282 }
283
284 @Override
285 public boolean skipTo(int target) throws IOException {
286 if (hasMoreSpans && (referentSpans.doc() < target)) {
287 if (!referentSpans.skipTo(target)) {
288 return false;
289 }
290 }
291 isStartEnumeration = false;
292 return advance();
293 }
294
295 @Override
296 public long cost() {
297
298 long cost = 0;
299 for (AttributeSpans as : attributeList) {
300 cost += as.cost();
301 }
302 for (AttributeSpans as : notAttributeList) {
303 cost += as.cost();
304 }
305 return referentSpans.cost() + cost;
306 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000307
308}