blob: 2bf24def24d690532b74b8ba98aa434e2b242659 [file] [log] [blame]
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
Eliza Margaretha1c3bf272014-06-11 11:50:39 +00004import java.util.ArrayList;
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +00005import java.util.Collection;
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +00006import java.util.Map;
7
Akron700c1eb2015-09-25 16:57:30 +02008import org.apache.lucene.index.LeafReaderContext;
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +00009import org.apache.lucene.index.Term;
10import org.apache.lucene.index.TermContext;
Eliza Margaretha05bff462015-02-18 18:18:26 +000011import org.apache.lucene.search.spans.SpanQuery;
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000012import org.apache.lucene.search.spans.Spans;
13import org.apache.lucene.util.Bits;
Eliza Margarethadc98dc12016-11-16 14:33:42 +010014import org.slf4j.Logger;
15import org.slf4j.LoggerFactory;
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000016
17import de.ids_mannheim.korap.query.SimpleSpanQuery;
18
Nils Diewaldbb33da22015-03-04 16:24:25 +000019/**
20 * An abstract class for Span enumeration including span match
Eliza Margarethadc98dc12016-11-16 14:33:42 +010021 * properties and basic methods.
Nils Diewaldbb33da22015-03-04 16:24:25 +000022 *
23 * @author margaretha
Eliza Margaretha6f989202016-10-14 21:48:29 +020024 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000025public abstract class SimpleSpans extends Spans {
Eliza Margarethadc98dc12016-11-16 14:33:42 +010026 protected final Logger log = LoggerFactory.getLogger(SimpleSpans.class);
Nils Diewaldbb33da22015-03-04 16:24:25 +000027 private SimpleSpanQuery query;
28 protected boolean isStartEnumeration;
29 protected boolean collectPayloads;
30
31 protected boolean hasMoreSpans;
Eliza Margarethadc98dc12016-11-16 14:33:42 +010032 // Enumeration of Spans
Nils Diewaldbb33da22015-03-04 16:24:25 +000033 protected Spans firstSpans, secondSpans;
34
35 protected int matchDocNumber, matchStartPosition, matchEndPosition;
36 protected Collection<byte[]> matchPayload;
37
margaretha50c76332015-03-19 10:10:39 +010038 protected short spanId;
39 protected boolean hasSpanId = false;
40
Akron42993552016-02-04 13:24:24 +010041 protected byte payloadTypeIdentifier;
42
Nils Diewaldbb33da22015-03-04 16:24:25 +000043
44 public SimpleSpans () {
45 collectPayloads = true;
46 matchDocNumber = -1;
47 matchStartPosition = -1;
48 matchEndPosition = -1;
49 matchPayload = new ArrayList<byte[]>();
50 isStartEnumeration = true;
Eliza Margarethadc98dc12016-11-16 14:33:42 +010051 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000052
53
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000054 public SimpleSpans (SimpleSpanQuery simpleSpanQuery,
Akron700c1eb2015-09-25 16:57:30 +020055 LeafReaderContext context, Bits acceptDocs,
Eliza Margaretha6f989202016-10-14 21:48:29 +020056 Map<Term, TermContext> termContexts)
57 throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +000058 this();
59 query = simpleSpanQuery;
60 collectPayloads = query.isCollectPayloads();
61 // Get the enumeration of the two spans to match
62 SpanQuery sq;
63 if ((sq = simpleSpanQuery.getFirstClause()) != null)
64 firstSpans = sq.getSpans(context, acceptDocs, termContexts);
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000065
Nils Diewaldbb33da22015-03-04 16:24:25 +000066 if ((sq = simpleSpanQuery.getSecondClause()) != null)
67 secondSpans = sq.getSpans(context, acceptDocs, termContexts);
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000068
Nils Diewaldbb33da22015-03-04 16:24:25 +000069 }
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000070
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000071
Nils Diewaldbb33da22015-03-04 16:24:25 +000072 /**
73 * If the current x and y are not in the same document, to skip
74 * the
75 * span with the smaller document number, to the same OR a greater
76 * document number than, the document number of the other span. Do
77 * this until the x and the y are in the same doc, OR until the
78 * last
79 * document.
80 *
81 * @return true iff such a document exists.
Eliza Margaretha6f989202016-10-14 21:48:29 +020082 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000083 protected boolean ensureSameDoc (Spans x, Spans y) throws IOException {
84 while (x.doc() != y.doc()) {
85 if (x.doc() < y.doc()) {
86 if (!x.skipTo(y.doc())) {
87 hasMoreSpans = false;
88 return false;
89 }
90 }
91 else {
92 if (!y.skipTo(x.doc())) {
93 hasMoreSpans = false;
94 return false;
95 }
96 }
97 }
98 return true;
99 }
100
101
102 /**
103 * Find the same doc shared by element, firstspan and secondspan.
104 *
105 * @return true iff such a doc is found.
Eliza Margaretha6f989202016-10-14 21:48:29 +0200106 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000107 protected boolean findSameDoc (Spans x, Spans y, Spans e)
108 throws IOException {
109
110 while (hasMoreSpans) {
111 if (ensureSameDoc(x, y) && e.doc() == x.doc()) {
112 return true;
113 }
114 if (!ensureSameDoc(e, y)) {
115 return false;
116 };
117 }
118 return false;
119 }
120
121
122 @Override
123 public int doc () {
124 return matchDocNumber;
125 }
126
127
128 @Override
129 public int start () {
130 return matchStartPosition;
131 }
132
133
Akron42993552016-02-04 13:24:24 +0100134 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000135 public int end () {
136 return matchEndPosition;
Akron42993552016-02-04 13:24:24 +0100137 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000138
139
140 @Override
141 public Collection<byte[]> getPayload () throws IOException {
142 return matchPayload;
143 }
144
145
146 @Override
147 public boolean isPayloadAvailable () throws IOException {
148 return !matchPayload.isEmpty();
149 }
150
151
152 @Override
153 public String toString () {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200154 return getClass().getName() + "(" + query.toString() + ")@"
155 + (isStartEnumeration ? "START"
156 : (hasMoreSpans ? (doc() + ":" + start() + "-" + end())
157 : "END"));
Nils Diewaldbb33da22015-03-04 16:24:25 +0000158 }
159
margaretha50c76332015-03-19 10:10:39 +0100160
161
162 /**
163 * Returns the span id of the current span
164 *
165 * @return the span id of the current span
166 */
167 public short getSpanId () {
168 return spanId;
169 }
170
171
172 /**
173 * Sets the span id of the current span
174 *
175 * @param spanId
176 * span id
177 */
178 public void setSpanId (short spanId) {
179 this.spanId = spanId;
180 }
181
margaretha69726b12015-12-10 12:03:19 +0100182
Akron42993552016-02-04 13:24:24 +0100183 /**
184 * Gets the payload type identifier (PTI) of the current span
185 *
186 * @return a payload type identifier
187 */
188 public byte getPayloadTypeIdentifier () {
189 return payloadTypeIdentifier;
190 }
191
192
193 /**
194 * Sets the payload type identifier (PTI) of the current span
195 *
196 * @param payloadTypeIdentifier
197 */
198 public void setPayloadTypeIdentifier (byte payloadTypeIdentifier) {
199 this.payloadTypeIdentifier = payloadTypeIdentifier;
200 }
margaretha69726b12015-12-10 12:03:19 +0100201
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +0000202}