blob: 732eb42d6829bf477d7506f6d134cb086ebf23fa [file] [log] [blame]
Eliza Margaretha198e4ef2014-02-10 13:50:50 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
4import java.util.ArrayList;
5import java.util.List;
6import java.util.Map;
7
8import org.apache.lucene.index.AtomicReaderContext;
9import org.apache.lucene.index.Term;
10import org.apache.lucene.index.TermContext;
11import org.apache.lucene.util.Bits;
12
13import de.ids_mannheim.korap.query.SpanDistanceQuery;
14
15/** Base class for calculating a distance between two ordered spans.
16 * @author margaretha
17 * */
18public abstract class OrderedDistanceSpans extends DistanceSpans {
19
20 protected boolean hasMoreFirstSpans;
Eliza Margaretha198e4ef2014-02-10 13:50:50 +000021 protected int minDistance,maxDistance;
22
23 protected List<CandidateSpan> candidateList;
24 protected int candidateListIndex;
25 protected int candidateListDocNum;
26
27
28 public OrderedDistanceSpans(SpanDistanceQuery query,
29 AtomicReaderContext context, Bits acceptDocs,
30 Map<Term, TermContext> termContexts)
31 throws IOException {
32 super(query, context, acceptDocs, termContexts);
33
34 minDistance = query.getMinDistance();
35 maxDistance = query.getMaxDistance();
Eliza Margaretha198e4ef2014-02-10 13:50:50 +000036
37 hasMoreFirstSpans = firstSpans.next();
38
39 candidateList = new ArrayList<>();
40 candidateListIndex = -1;
41 candidateListDocNum = firstSpans.doc();
42 }
43
44 /** Find a span match in the candidate list.
45 * */
46 @Override
47 protected boolean advance() throws IOException {
48 while( hasMoreSpans && candidateListIndex < candidateList.size() ){
49 // Check candidates
50 for (candidateListIndex++;candidateListIndex < candidateList.size();
51 candidateListIndex++){
52 if (findMatch())
53 return true;
54 }
55
56 do { // Forward secondspan
57 hasMoreSpans = secondSpans.next();
58 setCandidateList();
59 }
60 while (hasMoreSpans && !isSecondSpanValid());
61 }
62 return false;
63 }
64
65 /** Determine if the current second span is valid. It is always valid in
66 * TokenDistanceSpan, but it can be invalid in the ElementDistanceSpan,
67 * namely when it is not within a particular element (a sentence or a
68 * paragraph depends on the element distance unit).
69 *
70 * */
71 protected abstract boolean isSecondSpanValid() throws IOException;
72
73 /** Collect all possible firstspan instances as candidate spans for
74 * the current secondspan. The candidate spans are within the max
75 * distance from the current secondspan.
76 * */
77 protected abstract void setCandidateList() throws IOException;
78
79 /** Define the conditions for a match.
80 * */
81 protected abstract boolean findMatch() throws IOException;
82
83 /** Define the properties of a span match.
84 * */
85 protected void setMatchProperties(CandidateSpan candidateSpan,
86 boolean isDistanceZero) throws IOException{
87
88 setMatchFirstSpan(candidateSpan);
89 setMatchSecondSpan(new CandidateSpan(secondSpans));
90
91 if (isDistanceZero){
92 matchStartPosition = Math.min(candidateSpan.getStart(), secondSpans.start());
93 matchEndPosition = Math.max(candidateSpan.getEnd(), secondSpans.end());
94 }
95 else {
96 matchStartPosition = candidateSpan.getStart();
97 matchEndPosition = secondSpans.end();
98 }
99
100 this.matchDocNumber = secondSpans.doc();
101 if (collectPayloads){
102 if (candidateSpan.getPayloads() != null) {
103 matchPayload.addAll(candidateSpan.getPayloads());
104 }
105 if (secondSpans.isPayloadAvailable()) {
106 matchPayload.addAll(secondSpans.getPayload());
107 }
108 }
109
110 log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
Eliza Margaretha01929182014-02-19 11:48:59 +0000111 matchEndPosition);
Eliza Margaretha198e4ef2014-02-10 13:50:50 +0000112 }
113
114 @Override
115 public boolean skipTo(int target) throws IOException {
116 if (hasMoreSpans && (secondSpans.doc() < target)){
117 if (!secondSpans.skipTo(target)){
118 candidateList.clear();
119 return false;
120 }
121 }
122
123 setCandidateList();
124 matchPayload.clear();
125 isStartEnumeration=false;
126 return advance();
127 }
128}