blob: 50e39aab03604883f456773b9e5e1880b563af47 [file] [log] [blame]
Nils Diewalde4986d72015-02-27 17:35:00 +00001package de.ids_mannheim.korap.index;
Nils Diewaldf399a672013-11-18 17:55:22 +00002
Nils Diewaldc383ed02015-02-26 21:35:22 +00003import static de.ids_mannheim.korap.util.KrillByte.*;
Nils Diewald5c375702015-02-09 20:58:24 +00004import de.ids_mannheim.korap.util.CorpusDataException;
Nils Diewaldf399a672013-11-18 17:55:22 +00005import org.apache.lucene.util.BytesRef;
Akron700c1eb2015-09-25 16:57:30 +02006import java.nio.ByteBuffer;
Nils Diewaldf399a672013-11-18 17:55:22 +00007
8import java.util.*;
9import java.util.regex.*;
10
11import org.apache.lucene.analysis.TokenStream;
Nils Diewaldcb8afb32015-02-04 21:12:37 +000012import org.apache.lucene.analysis.tokenattributes.*;
Nils Diewaldf399a672013-11-18 17:55:22 +000013
14import org.slf4j.Logger;
15import org.slf4j.LoggerFactory;
16
Nils Diewaldb5b7b8d2014-06-06 18:41:54 +000017import java.io.Reader;
Nils Diewaldf399a672013-11-18 17:55:22 +000018import java.io.IOException;
19
20/*
Nils Diewaldcb8afb32015-02-04 21:12:37 +000021 * Todo:
22 * - !Payload is
23 * [4ByteStartOffset][14BitEndOffset-startOffset]
24 * [1BitBooleanIfSpan][1BitBooleanIfOpen]
25 * - Payload is
26 * [4ByteOffsetStart][4ByteOffsetStart]
27 */
Nils Diewaldf399a672013-11-18 17:55:22 +000028
29/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000030 * MultiTermTokenStream extends Lucenes {@link TokenStream} to work
31 * with {@link MultiTermToken MultiTermTokens}.
32 *
Nils Diewaldcb8afb32015-02-04 21:12:37 +000033 * <blockquote><pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000034 * MultiTermTokenStream mtts = new MultiTermTokenStream(
35 * "[s:den#0-3|i:den|p:DET|l:der|m:c:acc|m:n:sg|m:masc]"
36 * );
Nils Diewaldcb8afb32015-02-04 21:12:37 +000037 * </pre></blockquote>
Nils Diewaldbb33da22015-03-04 16:24:25 +000038 *
Nils Diewaldcb8afb32015-02-04 21:12:37 +000039 * @author diewald
40 * @see TokenStream
Nils Diewaldf399a672013-11-18 17:55:22 +000041 */
42public class MultiTermTokenStream extends TokenStream {
43 private CharTermAttribute charTermAttr;
Nils Diewaldf399a672013-11-18 17:55:22 +000044 private PositionIncrementAttribute posIncrAttr;
45 private PayloadAttribute payloadAttr;
46
Nils Diewaldbb33da22015-03-04 16:24:25 +000047 private static final Pattern pattern = Pattern
48 .compile("\\[(?:\\([0-9]+-[0-9]+\\))?([^\\]]+?)\\]");
Nils Diewaldf399a672013-11-18 17:55:22 +000049
Nils Diewald82a4b862014-02-20 21:17:41 +000050 // This advices the java compiler to ignore all loggings
Akron8abefa12016-02-13 05:35:42 +010051 public static final boolean DEBUG = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +000052 private final Logger log = LoggerFactory
53 .getLogger(MultiTermTokenStream.class);
Nils Diewald82a4b862014-02-20 21:17:41 +000054
Nils Diewaldd0d6feb2014-02-26 18:51:08 +000055 private List<MultiTermToken> multiTermTokens;
Nils Diewaldbb33da22015-03-04 16:24:25 +000056 private int mttIndex = 0, mtIndex = 0;
Nils Diewaldfe6a3652015-02-05 20:34:27 +000057 private short i = 0;
Nils Diewaldd0d6feb2014-02-26 18:51:08 +000058
Akron700c1eb2015-09-25 16:57:30 +020059 private ByteBuffer payload = ByteBuffer.allocate(36);
60
Nils Diewaldbb33da22015-03-04 16:24:25 +000061
Nils Diewaldd0d6feb2014-02-26 18:51:08 +000062 /**
Nils Diewaldcb8afb32015-02-04 21:12:37 +000063 * Construct a new MultiTermTokenStream object.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +000064 */
Nils Diewaldf399a672013-11-18 17:55:22 +000065 public MultiTermTokenStream () {
Nils Diewaldbb33da22015-03-04 16:24:25 +000066 this.charTermAttr = this.addAttribute(CharTermAttribute.class);
67 this.posIncrAttr = this.addAttribute(PositionIncrementAttribute.class);
68 this.payloadAttr = this.addAttribute(PayloadAttribute.class);
Nils Diewaldcb8afb32015-02-04 21:12:37 +000069 this.multiTermTokens = new ArrayList<MultiTermToken>(100);
Nils Diewaldf399a672013-11-18 17:55:22 +000070 };
71
Nils Diewaldd0d6feb2014-02-26 18:51:08 +000072
73 /**
Nils Diewaldcb8afb32015-02-04 21:12:37 +000074 * Construct a new MultiTermTokenStream object
Nils Diewaldbb33da22015-03-04 16:24:25 +000075 *
76 * @param stream
77 * The stream as a string representation.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +000078 */
Nils Diewaldf399a672013-11-18 17:55:22 +000079 public MultiTermTokenStream (String stream) {
Nils Diewaldcb8afb32015-02-04 21:12:37 +000080 this();
Nils Diewald5c375702015-02-09 20:58:24 +000081 try {
82 this._fromString(stream);
83 }
84 catch (CorpusDataException cde) {
85 log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
86 };
87 return;
Nils Diewaldcb8afb32015-02-04 21:12:37 +000088 };
89
Nils Diewaldf399a672013-11-18 17:55:22 +000090
Nils Diewaldb5b7b8d2014-06-06 18:41:54 +000091 /**
Nils Diewaldcb8afb32015-02-04 21:12:37 +000092 * Construct a new MultiTermTokenStream object
Nils Diewaldbb33da22015-03-04 16:24:25 +000093 *
94 * @param stream
95 * The stream as a {@link Reader} object.
Nils Diewaldcb8afb32015-02-04 21:12:37 +000096 * @throws IOException
Nils Diewaldb5b7b8d2014-06-06 18:41:54 +000097 */
98 public MultiTermTokenStream (Reader stream) throws IOException {
Nils Diewaldcb8afb32015-02-04 21:12:37 +000099 this();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000100
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000101 StringBuilder sb = new StringBuilder(4096);
102 char[] buf = new char[128];
103
104 int j;
105 while ((j = stream.read(buf)) > 0)
106 sb.append(buf, 0, j);
107
Nils Diewald5c375702015-02-09 20:58:24 +0000108 try {
109 this._fromString(sb.toString());
110 }
111 catch (CorpusDataException cde) {
112 log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
113 };
114 return;
Nils Diewaldf399a672013-11-18 17:55:22 +0000115 };
116
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000117
118 /**
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000119 * Append a {@link MultiTermToken} to the MultiTermTokenStream.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000120 *
121 * @param mtt
122 * A {@link MultiTermToken}.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000123 * @return The {@link MultiTermTokenStream} object for chaining.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000124 */
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000125 public MultiTermTokenStream addMultiTermToken (MultiTermToken mtt) {
126 this.multiTermTokens.add(mtt);
127 return this;
Nils Diewaldf399a672013-11-18 17:55:22 +0000128 };
129
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000130
131 /**
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000132 * Append a {@link MultiTermToken} to the MultiTermTokenStream
133 * by means of a set of {@link MultiTerm MultiTerms}.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000134 *
135 * @param mts
136 * A list of {@link MultiTerm} objects.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000137 * @return The {@link MultiTermTokenStream} object for chaining.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000138 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000139 public MultiTermTokenStream addMultiTermToken (MultiTerm mts,
140 MultiTerm ... moreTerms) {
Nils Diewalddd46b342015-02-04 22:38:29 +0000141 return this.addMultiTermToken(new MultiTermToken(mts, moreTerms));
Nils Diewaldf399a672013-11-18 17:55:22 +0000142 };
143
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000144
145 /**
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000146 * Append a {@link MultiTermToken} to the MultiTermTokenStream
147 * by means of a single {@link MultiTerm} as a prefixed term.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000148 *
149 * @param prefix
150 * A prefix character of a surface form of a
151 * {@link MultiTerm}.
152 * @param surface
153 * A surface string of a {@link MultiTerm}.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000154 * @return The {@link MultiTermTokenStream} object for chaining.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000155 */
Eliza Margaretha6f989202016-10-14 21:48:29 +0200156 public MultiTermTokenStream addMultiTermToken (char prefix,
157 String surface) {
Nils Diewalddd46b342015-02-04 22:38:29 +0000158 return this.addMultiTermToken(new MultiTermToken(prefix, surface));
Nils Diewaldf399a672013-11-18 17:55:22 +0000159 };
160
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000161
162 /**
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000163 * Append a {@link MultiTermToken} to the MultiTermTokenStream
164 * by means of {@link MultiTerm MultiTerm} represented as a set
165 * of terms represented as strings.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000166 *
167 * @param surface
168 * At least one surface string of a {@link MultiTerm}.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000169 * @return The {@link MultiTermTokenStream} object for chaining.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000170 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000171 public MultiTermTokenStream addMultiTermToken (String surface,
172 String ... moreTerms) {
Nils Diewald5c375702015-02-09 20:58:24 +0000173 try {
174 this.addMultiTermToken(new MultiTermToken(surface, moreTerms));
175 }
176 catch (CorpusDataException cde) {
177 log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
178 };
179 return this;
Nils Diewaldf399a672013-11-18 17:55:22 +0000180 };
181
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000182
Nils Diewald5c375702015-02-09 20:58:24 +0000183
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000184 /**
185 * Add meta information to the MultiTermTokenStream.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000186 *
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000187 * <strong>This is experimental!</strong>
Nils Diewaldbb33da22015-03-04 16:24:25 +0000188 *
189 * @param key
190 * A string for denoting the meta information.
191 * @param value
192 * The value of the meta key as a string.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000193 * @return The {@link MultiTermTokenStream} object for chaining.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000194 */
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000195 public MultiTermTokenStream addMeta (String key, String value) {
Nils Diewald5c375702015-02-09 20:58:24 +0000196 try {
197 MultiTerm mt = new MultiTerm('-', key);
198 mt.setPayload(value);
199 this.multiTermTokens.get(0).add(mt);
200 }
201 catch (CorpusDataException cde) {
202 log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
203 };
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000204 return this;
Nils Diewaldf399a672013-11-18 17:55:22 +0000205 };
206
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000207
208 /**
209 * Add meta information to the MultiTermTokenStream.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000210 *
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000211 * <strong>This is experimental!</strong>
Nils Diewaldbb33da22015-03-04 16:24:25 +0000212 *
213 * @param key
214 * A string for denoting the meta information.
215 * @param value
216 * The value of the meta key as a byte array.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000217 * @return The {@link MultiTermTokenStream} object for chaining.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000218 */
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000219 public MultiTermTokenStream addMeta (String key, byte[] value) {
Nils Diewald5c375702015-02-09 20:58:24 +0000220 try {
221 MultiTerm mt = new MultiTerm('-', key);
222 mt.setPayload(value);
223 this.multiTermTokens.get(0).add(mt);
224 }
225 catch (CorpusDataException cde) {
226 log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
227 };
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000228 return this;
Nils Diewaldf399a672013-11-18 17:55:22 +0000229 };
230
231
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000232 /**
233 * Add meta information to the MultiTermTokenStream.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000234 *
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000235 * <strong>This is experimental!</strong>
Nils Diewaldbb33da22015-03-04 16:24:25 +0000236 *
237 * @param key
238 * A string for denoting the meta information.
239 * @param value
240 * The value of the meta key as a short value.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000241 * @return The {@link MultiTermTokenStream} object for chaining.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000242 */
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000243 public MultiTermTokenStream addMeta (String key, short value) {
Nils Diewald5c375702015-02-09 20:58:24 +0000244 try {
245 MultiTerm mt = new MultiTerm('-', key);
246 mt.setPayload(value);
247 this.multiTermTokens.get(0).add(mt);
248 }
249 catch (CorpusDataException cde) {
250 log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
251 };
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000252 return this;
Nils Diewaldf399a672013-11-18 17:55:22 +0000253 };
254
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000255
256 /**
257 * Add meta information to the MultiTermTokenStream.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000258 *
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000259 * <strong>This is experimental!</strong>
Nils Diewaldbb33da22015-03-04 16:24:25 +0000260 *
261 * @param key
262 * A string for denoting the meta information.
263 * @param value
264 * The value of the meta key as a long value.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000265 * @return The {@link MultiTermTokenStream} object for chaining.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000266 */
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000267 public MultiTermTokenStream addMeta (String key, long value) {
Nils Diewald5c375702015-02-09 20:58:24 +0000268 try {
269 MultiTerm mt = new MultiTerm('-', key);
270 mt.setPayload(value);
271 this.multiTermTokens.get(0).add(mt);
272 }
273 catch (CorpusDataException cde) {
274 log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
275 };
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000276 return this;
Nils Diewaldf399a672013-11-18 17:55:22 +0000277 };
278
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000279
280 /**
281 * Add meta information to the MultiTermTokenStream.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000282 *
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000283 * <strong>This is experimental!</strong>
Nils Diewaldbb33da22015-03-04 16:24:25 +0000284 *
285 * @param key
286 * A string for denoting the meta information.
287 * @param value
288 * The value of the meta key as a integer value.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000289 * @return The {@link MultiTermTokenStream} object for chaining.
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000290 */
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000291 public MultiTermTokenStream addMeta (String key, int value) {
Nils Diewald5c375702015-02-09 20:58:24 +0000292 try {
293 MultiTerm mt = new MultiTerm('-', key);
294 mt.setPayload(value);
295 this.multiTermTokens.get(0).add(mt);
296 }
297 catch (CorpusDataException cde) {
298 log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
299 };
300
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000301 return this;
Nils Diewaldf399a672013-11-18 17:55:22 +0000302 };
303
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000304
305 /**
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000306 * Get a {@link MultiTermToken} by index.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000307 *
308 * @param index
309 * The index position of a {@link MultiTermToken} in
310 * the {@link MultiTermTokenStream}.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000311 * @return A {@link MultiTermToken}.
312 */
313 public MultiTermToken get (int index) {
314 return this.multiTermTokens.get(index);
315 };
316
317
318 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000319 * Get the number of {@link MultiTermToken MultiTermTokens} in the
320 * stream.
321 *
322 * @return The number of {@link MultiTermToken MultiTermTokens} in
323 * the stream.
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000324 */
325 public int getSize () {
326 return this.multiTermTokens.size();
327 };
328
329
330 /**
331 * Serialize the MultiTermTokenStream to a string.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000332 *
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000333 * @return The MultiTermTokenStream as a string.
334 */
335 public String toString () {
336 StringBuffer sb = new StringBuffer();
337 for (MultiTermToken mtt : this.multiTermTokens) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000338 sb.append(mtt.toString());
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000339 };
340 return sb.toString();
341 };
342
343
344 // Deserialize a string
Nils Diewald5c375702015-02-09 20:58:24 +0000345 private void _fromString (String stream) throws CorpusDataException {
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000346 Matcher matcher = pattern.matcher(stream);
347
348 while (matcher.find()) {
349 String[] seg = matcher.group(1).split("\\|");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000350 MultiTermToken mtt = new MultiTermToken(seg[0]);
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000351
352 for (i = 1; i < seg.length; i++)
353 mtt.add(seg[i]);
Nils Diewald5c375702015-02-09 20:58:24 +0000354
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000355 this.addMultiTermToken(mtt);
356 };
357 };
358
359
360 /*
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000361 * Increment the token in the MultiTermTokenStream.
362 * This overrides the function in Lucene's TokenStream.
363 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000364 @Override
Nils Diewald5c375702015-02-09 20:58:24 +0000365 public final boolean incrementToken () throws IOException {
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000366 this.payloadAttr.setPayload(null);
Nils Diewaldf399a672013-11-18 17:55:22 +0000367
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000368 // Last token reached
369 if (this.multiTermTokens.size() == this.mttIndex) {
370 reset();
371 return false;
372 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000373
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000374 // Get current token
Nils Diewaldbb33da22015-03-04 16:24:25 +0000375 MultiTermToken mtt = this.multiTermTokens.get(this.mttIndex);
Nils Diewaldf399a672013-11-18 17:55:22 +0000376
Nils Diewalddd46b342015-02-04 22:38:29 +0000377 // Sort the MultiTermToken
378 mtt.sort();
379
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000380 // Last term reached
381 if (mtt.terms.size() == this.mtIndex) {
382 this.mtIndex = 0;
383 this.mttIndex++;
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000384
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000385 // Last term of last token reached
386 if (this.multiTermTokens.size() == this.mttIndex) {
387 reset();
388 return false;
389 }
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000390
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000391 // Get last token
392 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000393 mtt = this.multiTermTokens.get(this.mttIndex);
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000394 };
395 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000396
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000397 // Get current term
398 MultiTerm mt = mtt.terms.get(this.mtIndex);
Nils Diewaldf399a672013-11-18 17:55:22 +0000399
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000400 // Set the relative position to the former term
Nils Diewaldbb33da22015-03-04 16:24:25 +0000401 posIncrAttr.setPositionIncrement(this.mtIndex == 0 ? 1 : 0);
Nils Diewaldf399a672013-11-18 17:55:22 +0000402 charTermAttr.setEmpty();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000403 charTermAttr.append(mt.term);
Nils Diewaldf399a672013-11-18 17:55:22 +0000404
Akron700c1eb2015-09-25 16:57:30 +0200405 payload.rewind();
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000406
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000407 // There is offset information
408 if (mt.start != mt.end) {
409 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000410 log.trace("MultiTerm with payload offset: {}-{}", mt.start,
411 mt.end);
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000412
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000413 // Add offsets to BytesRef payload
Akron700c1eb2015-09-25 16:57:30 +0200414 payload.putInt(mt.start);
415 payload.putInt(mt.end);
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000416 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000417
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000418 // There is payload in the MultiTerm
419 if (mt.payload != null) {
Akron42993552016-02-04 13:24:24 +0100420
Akron700c1eb2015-09-25 16:57:30 +0200421 payload.put(mt.payload.bytes);
422
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000423 if (DEBUG)
424 log.trace("Create payload[1] {}", payload.toString());
425 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000426
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000427 // There is payload in the current token to index
Akron700c1eb2015-09-25 16:57:30 +0200428 // This seems to be heavily risky!
429 if (payload.position() > 0) {
430
431 if (payload.hasArray()) {
Akron42993552016-02-04 13:24:24 +0100432 payloadAttr.setPayload(new BytesRef(Arrays.copyOfRange(
433 payload.array(), payload.arrayOffset(),
434 payload.arrayOffset() + payload.position())));
Akron700c1eb2015-09-25 16:57:30 +0200435 }
436 else {
437 log.error("This should never happen!");
438 };
439
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000440 if (DEBUG)
441 log.trace("Set payload[2] {}", payload.toString());
442 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000443
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000444 // Some debug loggings
445 if (DEBUG) {
446 StringBuilder sb = new StringBuilder("Index: [");
447 sb.append(mt.term);
Akron700c1eb2015-09-25 16:57:30 +0200448 if (payload.position() > 0)
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000449 sb.append('$').append(payload.toString());
450 sb.append(']');
Nils Diewalddd46b342015-02-04 22:38:29 +0000451 sb.append(" with increment ").append(this.mtIndex == 0 ? 1 : 0);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000452
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000453 log.trace(sb.toString());
454 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000455
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000456 this.mtIndex++;
Nils Diewaldf399a672013-11-18 17:55:22 +0000457 return true;
458 };
459
Nils Diewaldbb33da22015-03-04 16:24:25 +0000460
Nils Diewaldf399a672013-11-18 17:55:22 +0000461 @Override
Nils Diewald5c375702015-02-09 20:58:24 +0000462 public void reset () {
Nils Diewaldcb8afb32015-02-04 21:12:37 +0000463 this.mttIndex = 0;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000464 this.mtIndex = 0;
Nils Diewaldf399a672013-11-18 17:55:22 +0000465 };
466};