blob: 42e59a18483cb7bc5c7028e6219dff13c08d1eb8 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap.query.spans;
2
Eliza Margaretha427149f2014-10-29 15:53:30 +00003import java.io.IOException;
4import java.nio.ByteBuffer;
5import java.util.ArrayList;
6import java.util.Collection;
7import java.util.List;
8import java.util.Map;
9
Akron700c1eb2015-09-25 16:57:30 +020010import org.apache.lucene.index.LeafReaderContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000011import org.apache.lucene.index.Term;
12import org.apache.lucene.index.TermContext;
margaretha50c76332015-03-19 10:10:39 +010013import org.apache.lucene.index.TermState;
Eliza Margaretha427149f2014-10-29 15:53:30 +000014import org.apache.lucene.search.spans.SpanQuery;
15import org.apache.lucene.search.spans.Spans;
Nils Diewaldf399a672013-11-18 17:55:22 +000016import org.apache.lucene.util.Bits;
Nils Diewaldf399a672013-11-18 17:55:22 +000017import org.slf4j.Logger;
18import org.slf4j.LoggerFactory;
19
Nils Diewald1455e1e2014-08-01 16:12:43 +000020/**
Nils Diewald41750bf2015-02-06 17:45:20 +000021 * Add a payload to the span with an identification number (a class)
22 * And the start and end position of the span, so this information
23 * can bubble up for later processing (similar to captures in regular
24 * expression).
Nils Diewaldbb33da22015-03-04 16:24:25 +000025 *
Nils Diewald1455e1e2014-08-01 16:12:43 +000026 * @author diewald
27 */
28
margarethaf70addb2015-04-27 13:17:18 +020029public class ClassSpans extends SimpleSpans {
30 protected List<byte[]> classedPayload;
31 protected Spans spans;
32 protected byte number;
33 protected SpanQuery operand;
34 protected Boolean hasmorespans = false;
Nils Diewald82a4b862014-02-20 21:17:41 +000035
margarethaf70addb2015-04-27 13:17:18 +020036 private final Logger log = LoggerFactory.getLogger(ClassSpans.class);
Nils Diewald41750bf2015-02-06 17:45:20 +000037
Nils Diewaldc025a232014-02-28 19:01:14 +000038 // This advices the java compiler to ignore all loggings
Nils Diewald1e5d5942014-05-20 13:29:53 +000039 public static final boolean DEBUG = false;
Nils Diewaldc025a232014-02-28 19:01:14 +000040
Nils Diewaldbb33da22015-03-04 16:24:25 +000041
Nils Diewald41750bf2015-02-06 17:45:20 +000042 /**
43 * Construct a new ClassSpans object.
Nils Diewaldbb33da22015-03-04 16:24:25 +000044 *
45 * @param operand
46 * An arbitrary nested {@link SpanQuery}.
47 * @param context
Akron700c1eb2015-09-25 16:57:30 +020048 * The {@link LeafReaderContext}.
Nils Diewaldbb33da22015-03-04 16:24:25 +000049 * @param acceptDocs
50 * Bit vector representing the documents
51 * to be searched in.
52 * @param termContexts
53 * A map managing {@link TermState TermStates}.
54 * @param number
55 * The identifying class number.
Nils Diewald41750bf2015-02-06 17:45:20 +000056 */
Akron700c1eb2015-09-25 16:57:30 +020057 public ClassSpans (SpanQuery operand, LeafReaderContext context,
Nils Diewaldbb33da22015-03-04 16:24:25 +000058 Bits acceptDocs, Map<Term, TermContext> termContexts,
Eliza Margaretha6f989202016-10-14 21:48:29 +020059 byte number)
60 throws IOException {
Nils Diewald41750bf2015-02-06 17:45:20 +000061 spans = operand.getSpans(context, acceptDocs, termContexts);
62
63 // The number of the class
Nils Diewaldbb33da22015-03-04 16:24:25 +000064 this.number = number;
Nils Diewald41750bf2015-02-06 17:45:20 +000065
66 // The current operand
67 this.operand = operand;
68
69 // The highlighted payload
70 this.classedPayload = new ArrayList<byte[]>(3);
margarethaf70addb2015-04-27 13:17:18 +020071
Nils Diewaldf399a672013-11-18 17:55:22 +000072 };
73
Nils Diewald41750bf2015-02-06 17:45:20 +000074
Nils Diewaldf399a672013-11-18 17:55:22 +000075 @Override
Nils Diewald41750bf2015-02-06 17:45:20 +000076 public Collection<byte[]> getPayload () throws IOException {
77 return classedPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +000078 };
79
Nils Diewald41750bf2015-02-06 17:45:20 +000080
Nils Diewaldf399a672013-11-18 17:55:22 +000081 @Override
Nils Diewald41750bf2015-02-06 17:45:20 +000082 public boolean isPayloadAvailable () {
83 // We set payloads here - so it's always true
84 return true;
Nils Diewaldf399a672013-11-18 17:55:22 +000085 };
86
Nils Diewaldf399a672013-11-18 17:55:22 +000087
margaretha50c76332015-03-19 10:10:39 +010088 public byte getNumber () {
89 return number;
90 }
91
92
93 public void setNumber (byte number) {
94 this.number = number;
95 }
96
97
Nils Diewaldf399a672013-11-18 17:55:22 +000098 @Override
Nils Diewald41750bf2015-02-06 17:45:20 +000099 public int doc () {
100 return spans.doc();
101 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000102
Nils Diewald41750bf2015-02-06 17:45:20 +0000103
Nils Diewaldf399a672013-11-18 17:55:22 +0000104 @Override
Nils Diewald41750bf2015-02-06 17:45:20 +0000105 public int start () {
106 return spans.start();
107 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000108
109
Nils Diewaldf399a672013-11-18 17:55:22 +0000110 @Override
Nils Diewald41750bf2015-02-06 17:45:20 +0000111 public int end () {
112 return spans.end();
113 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000114
Nils Diewald41750bf2015-02-06 17:45:20 +0000115
116 @Override
117 public boolean next () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000118 if (DEBUG)
119 log.trace("Forward next");
Nils Diewald41750bf2015-02-06 17:45:20 +0000120
121 if (spans.next())
122 return this.addClassPayload();
123
124 hasmorespans = false;
125 return false;
Eliza Margaretha67a88572014-11-04 14:38:56 +0000126 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000127
Nils Diewaldbb33da22015-03-04 16:24:25 +0000128
Akronbb5d1732015-06-22 01:22:40 +0200129 protected boolean addClassPayload () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000130 hasmorespans = true;
131
132 classedPayload.clear();
Nils Diewaldf399a672013-11-18 17:55:22 +0000133
Nils Diewald41750bf2015-02-06 17:45:20 +0000134 // Subquery has payloads
Nils Diewaldbb33da22015-03-04 16:24:25 +0000135 if (spans.isPayloadAvailable()) {
Nils Diewald41750bf2015-02-06 17:45:20 +0000136 classedPayload.addAll(spans.getPayload());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000137 if (DEBUG)
138 log.trace("Found payload in nested SpanQuery");
Nils Diewald41750bf2015-02-06 17:45:20 +0000139 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000140
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141 if (DEBUG) {
142 log.trace("Wrap class {} around span {} - {}", number,
143 spans.start(), spans.end());
144 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000145
Nils Diewaldbb33da22015-03-04 16:24:25 +0000146 // Todo: Better allocate using a Factory!
Akron42993552016-02-04 13:24:24 +0100147 ByteBuffer bb = ByteBuffer.allocate(10);
148 bb.put((byte) 0);
149 bb.putInt(spans.start());
150 bb.putInt(spans.end());
151 bb.put(number);
Akron6cc7b7b2016-01-14 21:39:18 +0100152 /*
Akronc5e73b82016-01-14 17:01:18 +0100153 System.err.println(
154 "####################### " +
155 spans.start() +
156 "|" +
157 spans.end() +
158 ":" +
159 number
160 );
Akron6cc7b7b2016-01-14 21:39:18 +0100161 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000162
163 // Add highlight information as byte array
164 classedPayload.add(bb.array());
Akronbb5d1732015-06-22 01:22:40 +0200165
margarethaf70addb2015-04-27 13:17:18 +0200166 if (spans instanceof SimpleSpans) {
167 SimpleSpans ss = (SimpleSpans) spans;
168 this.hasSpanId = ss.hasSpanId;
169 this.spanId = ss.spanId;
170 }
Nils Diewald41750bf2015-02-06 17:45:20 +0000171 return true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000172 };
Nils Diewald41750bf2015-02-06 17:45:20 +0000173
174
Nils Diewaldf399a672013-11-18 17:55:22 +0000175 @Override
Nils Diewald41750bf2015-02-06 17:45:20 +0000176 public boolean skipTo (int target) throws IOException {
177 classedPayload.clear();
Nils Diewaldcd226862015-02-11 22:27:45 +0000178
Nils Diewaldbb33da22015-03-04 16:24:25 +0000179 if (DEBUG)
180 log.trace("Skip ClassSpans {} -> {}", spans.doc(), target);
Nils Diewaldcd226862015-02-11 22:27:45 +0000181
Nils Diewaldbb33da22015-03-04 16:24:25 +0000182 if (hasmorespans && spans.doc() < target && spans.skipTo(target))
Nils Diewald41750bf2015-02-06 17:45:20 +0000183 return this.addClassPayload();
184 return false;
Nils Diewaldf399a672013-11-18 17:55:22 +0000185 };
186
Nils Diewald41750bf2015-02-06 17:45:20 +0000187
Nils Diewaldf399a672013-11-18 17:55:22 +0000188 @Override
Nils Diewald41750bf2015-02-06 17:45:20 +0000189 public String toString () {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000190 return getClass().getName() + "(" + this.operand.toString() + ")@"
191 + (doc() + ":" + start() + "-" + end());
Nils Diewaldf399a672013-11-18 17:55:22 +0000192 };
193
194
195 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000196 public long cost () {
Nils Diewald41750bf2015-02-06 17:45:20 +0000197 return spans.cost();
198 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000199};