blob: a378e1a91405ea4884fc61485d9a042cd5e66507 [file] [log] [blame]
Eliza Margarethafb25cef2014-06-06 14:19:07 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
Eliza Margaretha997ccde2014-07-04 09:20:35 +00004import java.util.ArrayList;
5import java.util.List;
Eliza Margarethafb25cef2014-06-06 14:19:07 +00006import java.util.Map;
7
8import org.apache.lucene.index.AtomicReaderContext;
9import org.apache.lucene.index.Term;
10import org.apache.lucene.index.TermContext;
Eliza Margaretha997ccde2014-07-04 09:20:35 +000011import org.apache.lucene.search.spans.SpanQuery;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000012import org.apache.lucene.util.Bits;
13import org.slf4j.Logger;
14import org.slf4j.LoggerFactory;
15
Eliza Margaretha997ccde2014-07-04 09:20:35 +000016import de.ids_mannheim.korap.query.SpanAttributeQuery;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000017import de.ids_mannheim.korap.query.SpanElementAttributeQuery;
18
Eliza Margaretha997ccde2014-07-04 09:20:35 +000019/** Span enumeration of elements that have some attribute and/or do <em>not</em>
20 * have some attributes. This class handles <em>and</em> operation on attributes.
Eliza Margarethafb25cef2014-06-06 14:19:07 +000021 *
Eliza Margaretha997ccde2014-07-04 09:20:35 +000022 * Use SpanOrQuery to perform <em>or</em> operation on attributes, i.e. choose
23 * between two elements with some attribute constraints. Note that the attribute
24 * constraints have to be in Conjunctive Normal Form (CNF).
25 *
26 * @author margaretha
Eliza Margarethafb25cef2014-06-06 14:19:07 +000027 * */
28public class ElementAttributeSpans extends SimpleSpans{
29
Eliza Margaretha997ccde2014-07-04 09:20:35 +000030 private ElementSpans elements;
31 private List<AttributeSpans> attributeList;
32 private List<AttributeSpans> notAttributeList;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000033
34 protected Logger logger = LoggerFactory.getLogger(ElementAttributeSpans.class);
Nils Diewald1455e1e2014-08-01 16:12:43 +000035
36 // This advices the java compiler to ignore all loggings
37 public static final boolean DEBUG = false;
38
Eliza Margarethafb25cef2014-06-06 14:19:07 +000039 public ElementAttributeSpans(SpanElementAttributeQuery simpleSpanQuery,
40 AtomicReaderContext context, Bits acceptDocs,
41 Map<Term, TermContext> termContexts) throws IOException {
42 super(simpleSpanQuery, context, acceptDocs, termContexts);
43 elements = (ElementSpans) firstSpans;
Eliza Margaretha1c3bf272014-06-11 11:50:39 +000044 elements.isElementRef = true; // dummy setting enabling reading elementRef
Eliza Margaretha997ccde2014-07-04 09:20:35 +000045 hasMoreSpans = elements.next();
46
47 attributeList = new ArrayList<AttributeSpans>();
48 notAttributeList = new ArrayList<AttributeSpans>();
49
50 List<SpanQuery> sqs = simpleSpanQuery.getClauseList();
51 AttributeSpans as;
52 for (SpanQuery sq: sqs){
53 as = (AttributeSpans) sq.getSpans(context, acceptDocs, termContexts);
54 if (((SpanAttributeQuery) sq).isNegation()){
55 notAttributeList.add(as);
56 as.next();
57 }
58 else {
59 attributeList.add(as);
60 hasMoreSpans &= as.next();
61 }
62 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +000063 }
64
65 @Override
66 public boolean next() throws IOException {
67 isStartEnumeration=false;
68 return advance();
69 }
Eliza Margarethac7fb7312014-07-25 14:11:36 +000070
71 /** Search for the next match by first identify a possible
72 * element position, and then ensuring that the element contains
73 * all the attributes and <em>do not</em> contain any of the
74 * not attributes.
75 * */
Eliza Margarethafb25cef2014-06-06 14:19:07 +000076 private boolean advance() throws IOException {
77
Eliza Margaretha997ccde2014-07-04 09:20:35 +000078 while (hasMoreSpans && computeElementPosition()){
Nils Diewald1455e1e2014-08-01 16:12:43 +000079 if (DEBUG)
80 logger.info("element: " + elements.start() + ","+ elements.end() +
Eliza Margarethac7fb7312014-07-25 14:11:36 +000081 " ref:"+elements.getElementRef());
Eliza Margaretha669e7a82014-06-26 12:57:18 +000082
Eliza Margaretha997ccde2014-07-04 09:20:35 +000083 if (checkElementRef() && checkNotElementRef()){
84 this.matchDocNumber = elements.doc();
85 this.matchStartPosition = elements.start();
86 this.matchEndPosition = elements.end();
87 this.matchPayload = elements.getPayload();
88 hasMoreSpans = attributeList.get(0).next();
Nils Diewald1455e1e2014-08-01 16:12:43 +000089 if (DEBUG)
90 logger.info("MATCH "+matchDocNumber);
Eliza Margaretha997ccde2014-07-04 09:20:35 +000091
92 hasMoreSpans = elements.next();
93 return true;
94 }
95 }
96 return false;
97 }
98
Eliza Margarethac7fb7312014-07-25 14:11:36 +000099 /** Ensuring all the attribute spans having the same elementRef with
100 * the actual element's elementRef.
101 * */
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000102 private boolean checkElementRef() throws IOException{
103
104 for (AttributeSpans attribute: attributeList){
105 if (elements.getElementRef() != attribute.getElementRef()){
Nils Diewald1455e1e2014-08-01 16:12:43 +0000106 if (DEBUG)
107 logger.info("attribute ref doesn't match");
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000108 if (elements.getElementRef() < attribute.getElementRef())
109 hasMoreSpans = attribute.next();
110 else {
111 hasMoreSpans = elements.next();
112 }
113
114 return false;
115 }
116 }
117 return true;
118 }
119
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000120 /** Ensuring elements do not contain the not attributes. In other words,
121 * the elementRef is not the same as the not attribute's elementRefs.
122 * */
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000123 private boolean checkNotElementRef() throws IOException{
124 for (AttributeSpans notAttribute: notAttributeList){
Eliza Margaretha7788a982014-08-29 16:10:52 +0000125 if (!notAttribute.isFinish() &&
126 elements.start() == notAttribute.start() &&
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000127 elements.getElementRef() == notAttribute.getElementRef()){
Nils Diewald1455e1e2014-08-01 16:12:43 +0000128 if (DEBUG)
129 logger.info("not attribute ref exists");
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000130 hasMoreSpans = elements.next();
131 return false;
132 }
133 }
134 return true;
135 }
136
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000137 /** Search for a possible element having the same doc and start position as
138 * the attributes.
139 * */
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000140 private boolean computeElementPosition() throws IOException {
141
142 while (hasMoreSpans){
143
144 if (elements.getElementRef() < 1){ // the element does not have an attribute
Eliza Margaretha669e7a82014-06-26 12:57:18 +0000145 elements.isElementRef = true; // dummy setting enabling reading elementRef
146 hasMoreSpans = elements.next();
Nils Diewald1455e1e2014-08-01 16:12:43 +0000147 if (DEBUG)
148 logger.info("skip");
Eliza Margaretha669e7a82014-06-26 12:57:18 +0000149 continue;
150 }
151
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000152 if (checkAttributeListPosition() &&
153 checkNotAttributeListPosition()){
Nils Diewald1455e1e2014-08-01 16:12:43 +0000154 if (DEBUG)
155 logger.info("element is found: "+ elements.start());
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000156 return true;
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000157 }
158 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000159
160 return false;
161 }
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000162
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000163 /** Advancing the not attributes to be in the same or greater doc# than
164 * element doc#. If a not attribute is in the same doc, advance it to
165 * be in the same or greater start position than the element.
166 *
167 * */
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000168 private boolean checkNotAttributeListPosition() throws IOException{
169
170 for (AttributeSpans a : notAttributeList){
171 // advance the doc# of not AttributeSpans
Nils Diewald1455e1e2014-08-01 16:12:43 +0000172 if (DEBUG)
173 logger.info("a "+a.start());
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000174 while (!a.isFinish() && a.doc() <= elements.doc()){
175
176 if (a.doc() == elements.doc() &&
177 a.start() >= elements.start())
178 break;
179
180 if (!a.next()) a.setFinish(true);
181 }
182 }
183
184 return true;
185 }
186
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000187 /** Advancing the attributes to be in the same doc and start position
188 * as the element.
189 * */
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000190 private boolean checkAttributeListPosition() throws IOException{
191 int currentPosition = elements.start();
192 boolean isSame = true;
193 boolean isFirst = true;
194
195 for (AttributeSpans a : attributeList){
196 if(!ensureSamePosition(elements, a)) return false;
Nils Diewald1455e1e2014-08-01 16:12:43 +0000197 if (DEBUG)
198 logger.info("pos:" + elements.start());
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000199 if (isFirst){
200 isFirst = false;
201 currentPosition = elements.start();
202 }
203 else if (currentPosition != elements.start()){
204 currentPosition = elements.start();
205 isSame = false;
206
207 }
208 }
Nils Diewald1455e1e2014-08-01 16:12:43 +0000209 if (DEBUG)
210 logger.info("same pos: "+isSame+ ", pos "+elements.start());
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000211 return isSame;
212 }
213
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000214 /** Advance the element or attribute spans to be in the same doc
215 * and start position.
216 * */
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000217 private boolean ensureSamePosition(ElementSpans elements,
218 AttributeSpans attributes) throws IOException {
219
220 while (hasMoreSpans && ensureSameDoc(elements, attributes)){
221 if (attributes.start() == elements.start())
222 return true;
223 else if (attributes.start() > elements.start())
224 hasMoreSpans = elements.next();
225 else
226 hasMoreSpans= attributes.next();
227 }
228
229 return false;
230 }
231
232 @Override
233 public boolean skipTo(int target) throws IOException {
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000234 if (hasMoreSpans && (elements.doc() < target)){
235 if (!elements.skipTo(target)){
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000236 return false;
237 }
238 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000239 isStartEnumeration=false;
240 return advance();
241 }
242
243 @Override
244 public long cost() {
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000245
246 long cost = 0;
247 for (AttributeSpans as: attributeList){
248 cost += as.cost();
249 }
250 for (AttributeSpans as: notAttributeList){
251 cost += as.cost();
252 }
253 return elements.cost() + cost;
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000254 }
255
256
257}