blob: 80904f247de97b81540ada56cb340bbde297d8d7 [file] [log] [blame]
Eliza Margarethac8d0a7a2014-09-16 13:08:30 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
4import java.util.ArrayList;
5import java.util.List;
6import java.util.Map;
7
8import org.apache.lucene.index.AtomicReaderContext;
9import org.apache.lucene.index.Term;
10import org.apache.lucene.index.TermContext;
11import org.apache.lucene.search.spans.Spans;
12import org.apache.lucene.util.Bits;
13
14import de.ids_mannheim.korap.query.SpanExpansionQuery;
15
Eliza Margaretha7dd87122014-09-16 15:34:46 +000016/** Spans expanded with min m tokens and max n tokens, and throughout all
17 * the expansions do not contain the notClause.
18 *
19 * @author margaretha
20 * */
Eliza Margarethac8d0a7a2014-09-16 13:08:30 +000021public class ExpandedExclusionSpans extends SimpleSpans{
22
23 private int min, max;
24 private int direction;
25 private List<CandidateSpan> candidateSpans;
26 private boolean hasMoreNotClause;
27 private Spans notClause;
28
29 public ExpandedExclusionSpans(SpanExpansionQuery spanExpansionQuery,
30 AtomicReaderContext context, Bits acceptDocs,
31 Map<Term, TermContext> termContexts) throws IOException {
32 super(spanExpansionQuery, context, acceptDocs, termContexts);
33
34 if (spanExpansionQuery.getSecondClause() == null){
35 throw new IllegalArgumentException("The SpanExpansionQuery " +
36 "is not valid. The spanquery to exclude (notClause) cannot " +
37 "be null.");
38 }
39
40 /*if (spanExpansionQuery.getMin() < 1){
41 throw new IllegalArgumentException("Min occurrence for notClause " +
42 "must be at least 1.");
43 }*/
44
45 this.min = spanExpansionQuery.getMin();
46 this.max = spanExpansionQuery.getMax();
47 this.direction = spanExpansionQuery.getDirection();
48
49 this.notClause = secondSpans;
50 this.hasMoreNotClause = notClause.next();
51
52 candidateSpans = new ArrayList<CandidateSpan>();
53 hasMoreSpans = firstSpans.next();
54 }
55
56 @Override
57 public boolean next() throws IOException {
58 matchPayload.clear();
59 isStartEnumeration = false;
60 return advance();
61 }
62
63 private boolean advance() throws IOException {
64 while (hasMoreSpans || candidateSpans.size() > 0){
Eliza Margaretha7dd87122014-09-16 15:34:46 +000065 if (candidateSpans.size() > 0){
66 // set a candidate span as a match
Eliza Margarethac8d0a7a2014-09-16 13:08:30 +000067 CandidateSpan cs = candidateSpans.get(0);
68 matchDocNumber = cs.getDoc();
69 matchStartPosition = cs.getStart();
70 matchEndPosition = cs.getEnd();
71 matchPayload = cs.getPayloads();
72 candidateSpans.remove(0);
73 return true;
74 }
75 else if (!hasMoreNotClause || notClause.doc() > firstSpans.doc()){
76 generateCandidates(min, max, direction);
77 hasMoreSpans = firstSpans.next();
78 }
79 else findMatches();
80 }
81 return false;
82 }
83
84 private void findMatches() throws IOException {
85 while (hasMoreNotClause && notClause.doc() <= firstSpans.doc()){
Eliza Margarethac8d0a7a2014-09-16 13:08:30 +000086 if (notClause.doc() == firstSpans.doc()){
Eliza Margarethac8d0a7a2014-09-16 13:08:30 +000087 if (direction < 0 ){ // left
Eliza Margaretha7dd87122014-09-16 15:34:46 +000088 expandLeft();
89 } // right
90 else { expandRight(); }
Eliza Margarethac8d0a7a2014-09-16 13:08:30 +000091 break;
92 }
Eliza Margarethac8d0a7a2014-09-16 13:08:30 +000093 else if (!notClause.next()) hasMoreNotClause = false;
94 }
95 }
96
Eliza Margaretha7dd87122014-09-16 15:34:46 +000097 private void expandLeft() throws IOException{
98 //int counter = max;
99 int maxPos = max;
100 CandidateSpan lastNotClause = null;
101 while (hasMoreNotClause &&
102 notClause.start() < firstSpans.start()){
103
104 // between max and firstspan.start()
105 if (notClause.start() >= firstSpans.start() - maxPos){
106 maxPos = firstSpans.start() - notClause.start() -1;
107 lastNotClause = new CandidateSpan(notClause);
108 //counter--;
109 }
110 if (!notClause.next()) hasMoreNotClause = false;
111 }
112
113 // if a notClause is between max and firstspan.start,
114 // then maxPos = last NotClause pos -1
115 generateCandidates(min, maxPos, direction);
116
117 if (lastNotClause != null)
118 while ((hasMoreSpans = firstSpans.next())
119 // the next notClause is not in between max and firstspan.start()
120 && notClause.start() > firstSpans.start()
121 // the last notClause is in between max and firstspan.start()
122 && lastNotClause.getStart() < firstSpans.start()
123 && lastNotClause.getStart() >= firstSpans.start() - max
124 ){
125
126 maxPos = firstSpans.start() - lastNotClause.getStart() -1;
127 generateCandidates(min, maxPos, direction);
128 }
129 else hasMoreSpans = firstSpans.next();
130 }
131
132 private void expandRight() throws IOException{
133 int expansionEnd = firstSpans.end() + max;
134 int maxPos = max;
135 boolean isFound = false;
136
137 CandidateSpan firstNotClause = null;
138 //System.out.println("main start:"+firstSpans.start());
139 while (hasMoreNotClause && notClause.start() < expansionEnd){
140 // between firstspan.end() and expansionEnd
141 if (!isFound && notClause.start() >= firstSpans.end()){
142 maxPos = notClause.start() - firstSpans.end() -1;
143 firstNotClause = new CandidateSpan(notClause);
144 isFound = true;
145 }
146 if (!notClause.next()) hasMoreNotClause = false;
147 }
148 // if a notClause is between firstSpan.end and max
149 // then maxPos = the first notClause pos -1
150 generateCandidates(min, maxPos, direction);
151
152 if (firstNotClause !=null){
153 while ((hasMoreSpans = firstSpans.next())
154 // in between
155 && firstNotClause.getStart() < firstSpans.end() + max
156 && firstNotClause.getStart() >= firstSpans.end())
157 {
158 //System.out.println("first start:"+firstNotClause.getStart()+", main start:"+firstSpans.start());
159 maxPos = firstNotClause.getStart() - firstSpans.end() -1;
160 generateCandidates(min, maxPos, direction);
161 }
162 }
163 else hasMoreSpans = firstSpans.next();
164 }
165
Eliza Margarethac8d0a7a2014-09-16 13:08:30 +0000166 private void generateCandidates(int minPos, int maxPos, int direction)
167 throws IOException {
168 int counter;
169 int start, end;
170
171 if (direction < 0 ) { // left
172 counter = maxPos;
173 while (counter >= min){
174 start = firstSpans.start() - counter;
175 if (start > -1 ){
176
177 end = firstSpans.end();
178 //System.out.println(start+","+end);
179 candidateSpans.add(new CandidateSpan(start, end, firstSpans.doc(),
180 firstSpans.cost(), firstSpans.getPayload()));
181 }
182 counter --;
183 }
184 }
185 else { // right
186 counter = minPos;
187 while(counter <= maxPos){
188 start = firstSpans.start();
189 end = firstSpans.end() + counter;
190 //System.out.println(start+","+end);
191 candidateSpans.add(new CandidateSpan(start, end, firstSpans.doc(),
192 firstSpans.cost(), firstSpans.getPayload()));
193 counter++;
194 }
195 }
196 }
197
198
199 @Override
200 public boolean skipTo(int target) throws IOException {
201 if (hasMoreSpans && (firstSpans.doc() < target)){
202 if (!firstSpans.skipTo(target)){
203 hasMoreSpans = false;
204 return false;
205 }
206 }
207 matchPayload.clear();
208 return advance();
209 }
210
211 @Override
212 public long cost() {
213 // TODO Auto-generated method stub
214 return 0;
215 }
216}