blob: 64507abb70129e8483d06f05296de192628103df [file] [log] [blame]
Eliza Margaretha198e4ef2014-02-10 13:50:50 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
4import java.util.ArrayList;
5import java.util.List;
6import java.util.Map;
7
Akron700c1eb2015-09-25 16:57:30 +02008import org.apache.lucene.index.LeafReaderContext;
Eliza Margaretha198e4ef2014-02-10 13:50:50 +00009import org.apache.lucene.index.Term;
10import org.apache.lucene.index.TermContext;
11import org.apache.lucene.util.Bits;
12
13import de.ids_mannheim.korap.query.SpanDistanceQuery;
14
Eliza Margaretha609a5be2014-12-18 16:52:20 +000015/**
16 * Base class for calculating a distance between two ordered spans.
17 *
18 * @author margaretha
Eliza Margaretha198e4ef2014-02-10 13:50:50 +000019 * */
20public abstract class OrderedDistanceSpans extends DistanceSpans {
21
Eliza Margaretha609a5be2014-12-18 16:52:20 +000022 protected boolean hasMoreFirstSpans;
23 protected int minDistance, maxDistance;
Nils Diewald34eaa862014-06-03 10:56:27 +000024
Eliza Margaretha609a5be2014-12-18 16:52:20 +000025 protected List<CandidateSpan> candidateList;
26 protected int candidateListIndex;
27 protected int candidateListDocNum;
Nils Diewald34eaa862014-06-03 10:56:27 +000028
Nils Diewaldbb33da22015-03-04 16:24:25 +000029
Eliza Margaretha609a5be2014-12-18 16:52:20 +000030 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000031 * Constructs OrderedDistanceSpans based on the given
32 * SpanDistanceQuery.
Eliza Margaretha609a5be2014-12-18 16:52:20 +000033 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000034 * @param query
35 * a SpanDistanceQuery
Eliza Margaretha609a5be2014-12-18 16:52:20 +000036 * @param context
37 * @param acceptDocs
38 * @param termContexts
39 * @throws IOException
40 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000041 public OrderedDistanceSpans (SpanDistanceQuery query,
Akron700c1eb2015-09-25 16:57:30 +020042 LeafReaderContext context, Bits acceptDocs,
Nils Diewaldbb33da22015-03-04 16:24:25 +000043 Map<Term, TermContext> termContexts)
44 throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000045 super(query, context, acceptDocs, termContexts);
Eliza Margaretha198e4ef2014-02-10 13:50:50 +000046
Eliza Margaretha609a5be2014-12-18 16:52:20 +000047 minDistance = query.getMinDistance();
48 maxDistance = query.getMaxDistance();
49
50 hasMoreFirstSpans = firstSpans.next();
51
52 candidateList = new ArrayList<>();
53 candidateListIndex = -1;
54 candidateListDocNum = firstSpans.doc();
55 }
56
Nils Diewaldbb33da22015-03-04 16:24:25 +000057
Eliza Margaretha609a5be2014-12-18 16:52:20 +000058 /**
59 * Finds a span match in the candidate list.
60 * */
61 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +000062 protected boolean advance () throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000063 while (hasMoreSpans && candidateListIndex < candidateList.size()) {
64 // Check candidates
65 for (candidateListIndex++; candidateListIndex < candidateList
66 .size(); candidateListIndex++) {
67 if (findMatch())
68 return true;
69 }
70
71 do { // Forward secondspan
72 hasMoreSpans = secondSpans.next();
73 setCandidateList();
Nils Diewaldbb33da22015-03-04 16:24:25 +000074 }
75 while (hasMoreSpans && !isSecondSpanValid());
Eliza Margaretha609a5be2014-12-18 16:52:20 +000076 }
77 return false;
78 }
79
Nils Diewaldbb33da22015-03-04 16:24:25 +000080
Eliza Margaretha609a5be2014-12-18 16:52:20 +000081 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000082 * Determines if the current second span is valid (i.e. within an
83 * element).
84 * It is always valid in TokenDistanceSpan, but it can be invalid
85 * in the
86 * ElementDistanceSpan, namely when it is not within a particular
87 * element (a
Eliza Margaretha609a5be2014-12-18 16:52:20 +000088 * sentence or a paragraph depends on the element distance unit).
89 *
90 * @return <code>true</code> of the current second span is valid,
91 * <code>false</code> otherwise.
92 * @throws IOException
93 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000094 protected abstract boolean isSecondSpanValid () throws IOException;
95
Eliza Margaretha609a5be2014-12-18 16:52:20 +000096
97 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000098 * Stores/collects the states of all possible firstspans as
99 * candidate spans
100 * for the current secondspan. The candidate spans must be within
101 * the
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000102 * maximum distance from the current secondspan.
103 *
104 * @throws IOException
105 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000106 protected abstract void setCandidateList () throws IOException;
107
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000108
109 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000110 * Defines the conditions for a match and tells if a match is
111 * found.
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000112 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000113 * @return <code>true</code> if a match is found,
114 * <code>false</code>
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000115 * otherwise.
116 * @throws IOException
117 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000118 protected abstract boolean findMatch () throws IOException;
119
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000120
121 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000122 * Defines the properties of a span match. The distance between
123 * the first
124 * and the second spans is zero, when there is an intersection
125 * between them
126 * in {@link TokenDistanceSpans}, or they occur in the same
127 * element in {@link ElementDistanceSpans}.
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000128 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000129 * @param candidateSpan
130 * a match span
131 * @param isDistanceZero
132 * <code>true</code> if the distance between the first
133 * and the second spans is zero, <code>false</code>
134 * otherwise.
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000135 * @throws IOException
136 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000137 protected void setMatchProperties (CandidateSpan candidateSpan,
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000138 boolean isDistanceZero) throws IOException {
139
140 setMatchFirstSpan(candidateSpan);
141 setMatchSecondSpan(new CandidateSpan(secondSpans));
142
143 if (isDistanceZero) {
144 matchStartPosition = Math.min(candidateSpan.getStart(),
145 secondSpans.start());
146 matchEndPosition = Math.max(candidateSpan.getEnd(),
147 secondSpans.end());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000148 }
149 else {
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000150 matchStartPosition = candidateSpan.getStart();
151 matchEndPosition = secondSpans.end();
152 }
153
154 this.matchDocNumber = secondSpans.doc();
155 if (collectPayloads) {
156 if (candidateSpan.getPayloads() != null) {
157 matchPayload.addAll(candidateSpan.getPayloads());
158 }
159 if (secondSpans.isPayloadAvailable()) {
160 matchPayload.addAll(secondSpans.getPayload());
161 }
162 }
163 }
164
Nils Diewaldbb33da22015-03-04 16:24:25 +0000165
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000166 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000167 public boolean skipTo (int target) throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000168 if (hasMoreSpans && (secondSpans.doc() < target)) {
169 if (!secondSpans.skipTo(target)) {
170 candidateList.clear();
171 return false;
172 }
173 }
174
175 setCandidateList();
176 matchPayload.clear();
177 isStartEnumeration = false;
178 return advance();
179 }
Eliza Margaretha198e4ef2014-02-10 13:50:50 +0000180}