blob: 8103dde2389fdeae5916833985fcc3a5e7238710 [file] [log] [blame]
Eliza Margaretha7ee76da2014-08-12 15:32:33 +00001package de.ids_mannheim.korap.index;
2
margaretha6cbe3712018-10-23 13:22:49 +02003import static de.ids_mannheim.korap.TestSimple.getJsonString;
margaretha7f4fd652018-11-22 18:00:02 +01004import static de.ids_mannheim.korap.TestSimple.simpleFieldDoc;
Eliza Margaretha942dcf32015-01-22 15:13:00 +00005import static org.junit.Assert.assertEquals;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +00006
margaretha6cbe3712018-10-23 13:22:49 +02007import java.io.IOException;
margaretha7f4fd652018-11-22 18:00:02 +01008import java.util.Arrays;
9import java.util.List;
10import java.util.regex.Pattern;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000011
12import org.apache.lucene.index.Term;
Eliza Margaretha942dcf32015-01-22 15:13:00 +000013import org.apache.lucene.search.RegexpQuery;
14import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
Eliza Margaretha8e200cd2014-11-13 16:00:38 +000015import org.apache.lucene.search.spans.SpanQuery;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000016import org.apache.lucene.search.spans.SpanTermQuery;
Nils Diewald9b11a442014-11-08 20:47:17 +000017import org.apache.lucene.util.automaton.RegExp;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000018import org.junit.Test;
Akronddbc8f52018-11-28 11:53:42 +010019import org.junit.Ignore;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000020
Akrond6611cd2018-01-05 19:45:35 +010021import de.ids_mannheim.korap.Krill;
Nils Diewalda14ecd62015-02-26 21:00:20 +000022import de.ids_mannheim.korap.KrillIndex;
Nils Diewald0339d462015-02-26 14:53:56 +000023import de.ids_mannheim.korap.KrillQuery;
margaretha7f4fd652018-11-22 18:00:02 +010024import de.ids_mannheim.korap.TestSimple;
Akron747986e2016-02-18 17:07:12 +010025import de.ids_mannheim.korap.query.QueryBuilder;
Eliza Margaretha656cb312014-08-14 12:42:26 +000026import de.ids_mannheim.korap.query.SpanElementQuery;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000027import de.ids_mannheim.korap.query.SpanExpansionQuery;
margaretha7f4fd652018-11-22 18:00:02 +010028import de.ids_mannheim.korap.query.SpanNextQuery;
Nils Diewald9b11a442014-11-08 20:47:17 +000029import de.ids_mannheim.korap.query.SpanRepetitionQuery;
Eliza Margaretha8e200cd2014-11-13 16:00:38 +000030import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
margarethaf151c962018-11-27 17:38:59 +010031import de.ids_mannheim.korap.response.Match;
margarethae43c5e52018-03-20 15:24:53 +010032import de.ids_mannheim.korap.response.Result;
Eliza Margaretha8e200cd2014-11-13 16:00:38 +000033import de.ids_mannheim.korap.util.QueryException;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000034
35public class TestSpanExpansionIndex {
36
Nils Diewald884dbcf2015-02-27 17:02:28 +000037 Result kr;
Nils Diewalda14ecd62015-02-26 21:00:20 +000038 KrillIndex ki;
Eliza Margaretha85787842014-09-30 17:42:09 +000039
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 public TestSpanExpansionIndex () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +000041 ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +000042 ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
Eliza Margaretha942dcf32015-01-22 15:13:00 +000043 ki.commit();
44 }
45
margaretha7f4fd652018-11-22 18:00:02 +010046 /** Method for finding bugs. Since java matcher cannot find multiple matches
47 * from the same offset, the expected results are sometimes lower than the
48 * actual results.
49 *
50 * @throws IOException
51 * @throws QueryException
52 */
margaretha327f2b72018-11-27 14:10:24 +010053// @Test
margaretha7f4fd652018-11-22 18:00:02 +010054 public void fuzzyTest () throws IOException, QueryException {
55 List<String> chars = Arrays.asList("a", "b", "c", "d", "e");
56
57 // c []{0,2} a
58 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
59 SpanTermQuery stq2 = new SpanTermQuery(new Term("base", "s:a"));
60 SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, 0, true);
61 SpanNextQuery snq = new SpanNextQuery(seq, stq2);
62
63 Pattern resultPattern = Pattern.compile("c[a-e]{0,2}a");
64 TestSimple.fuzzingTest(chars, resultPattern, snq,
margaretha327f2b72018-11-27 14:10:24 +010065 6, 20, 8);
margaretha7f4fd652018-11-22 18:00:02 +010066 }
67
68 @Test
69 public void testNoExpansion () throws IOException {
70 KrillIndex ki = new KrillIndex();
71 ki.addDoc(simpleFieldDoc("cc"));
72 ki.commit();
73
74 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
75 SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 0, 0, true);
76 Result kr = ki.search(seq, (short) 10);
77
78 assertEquals(2, kr.getTotalResults());
79 }
Akron6759b042016-04-28 01:25:00 +020080
Eliza Margaretha942dcf32015-01-22 15:13:00 +000081 /**
82 * Left and right expansions
Eliza Margaretha6f989202016-10-14 21:48:29 +020083 */
Eliza Margaretha942dcf32015-01-22 15:13:00 +000084 @Test
margaretha7f4fd652018-11-22 18:00:02 +010085 public void testLeftRightExpansions () throws IOException {
Eliza Margaretha942dcf32015-01-22 15:13:00 +000086
87 SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:des"));
88 // left
89 SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1, true);
90 kr = ki.search(seq, (short) 10);
91
margaretha7f4fd652018-11-22 18:00:02 +010092 // assertEquals(69,kr.getTotalResults());
Eliza Margaretha942dcf32015-01-22 15:13:00 +000093 assertEquals(5, kr.getMatch(0).getStartPos());
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000094 assertEquals(8, kr.getMatch(0).getEndPos());
95 assertEquals(6, kr.getMatch(1).getStartPos());
96 assertEquals(8, kr.getMatch(1).getEndPos());
97 assertEquals(7, kr.getMatch(2).getStartPos());
98 assertEquals(8, kr.getMatch(2).getEndPos());
Eliza Margaretha942dcf32015-01-22 15:13:00 +000099
Eliza Margaretha4423a922014-09-17 10:44:01 +0000100 // right
Eliza Margaretha7788a982014-08-29 16:10:52 +0000101 seq = new SpanExpansionQuery(stq, 3, 4, 0, true);
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000102 kr = ki.search(seq, (short) 10);
margaretha21e4ca22018-11-28 14:25:46 +0100103
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000104 assertEquals(7, kr.getMatch(0).getStartPos());
105 assertEquals(11, kr.getMatch(0).getEndPos());
106 assertEquals(7, kr.getMatch(1).getStartPos());
107 assertEquals(12, kr.getMatch(1).getEndPos());
Eliza Margaretha85787842014-09-30 17:42:09 +0000108 assertEquals(156, kr.getMatch(2).getStartPos());
109 assertEquals(160, kr.getMatch(2).getEndPos());
110 assertEquals(156, kr.getMatch(3).getStartPos());
111 assertEquals(161, kr.getMatch(3).getEndPos());
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000112 }
113
114 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000115 * Classnumber
Eliza Margaretha2dcde4f2015-02-10 12:02:18 +0000116 * Check the expansion offsets
Eliza Margaretha6f989202016-10-14 21:48:29 +0200117 */
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000118 @Test
margaretha7f4fd652018-11-22 18:00:02 +0100119 public void testExpansionWithClassNumber () {
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000120 byte classNumber = 1;
121 SpanExpansionQuery sq;
122 // create new payload for the expansion offsets
123 SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:des"));
124 sq = new SpanExpansionQuery(stq, 0, 2, -1, classNumber, true);
125 kr = ki.search(sq, (short) 10);
126
127 assertEquals(5, kr.getMatch(0).getStartPos());
128 assertEquals(8, kr.getMatch(0).getEndPos());
129 assertEquals(5, kr.getMatch(0).getStartPos(1)); // expansion 5,7
130 assertEquals(7, kr.getMatch(0).getEndPos(1));
131 // expansion offsets
Eliza Margarethaad053352014-09-17 16:21:23 +0000132 assertEquals(6, kr.getMatch(1).getStartPos(1));
133 assertEquals(7, kr.getMatch(1).getEndPos(1));
134 assertEquals(7, kr.getMatch(2).getStartPos(1));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000135 assertEquals(7, kr.getMatch(2).getEndPos(1));
Eliza Margaretha85787842014-09-30 17:42:09 +0000136 assertEquals(154, kr.getMatch(3).getStartPos(1));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000137 assertEquals(156, kr.getMatch(3).getEndPos(1));
138
139 /*
Nils Diewald392bcf32015-02-26 20:01:17 +0000140 * for (Match km : kr.getMatches()){
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000141 * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
142 * +km.getSnippetBrackets()); }
143 */
144
145 // add expansion offsets to the existing payload
Akron43cea662016-02-15 23:43:59 +0100146 SpanElementQuery seq = new SpanElementQuery("tokens", "base/s:s");
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000147 sq = new SpanExpansionQuery(seq, 1, 2, 0, classNumber, true);
148 kr = ki.search(sq, (short) 10);
149
150 assertEquals(13, kr.getMatch(0).getStartPos());
Eliza Margaretha85787842014-09-30 17:42:09 +0000151 assertEquals(26, kr.getMatch(0).getEndPos());
152 assertEquals(13, kr.getMatch(1).getStartPos());
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000153 assertEquals(27, kr.getMatch(1).getEndPos());
154
Eliza Margaretha85787842014-09-30 17:42:09 +0000155 assertEquals(25, kr.getMatch(2).getStartPos());
156 assertEquals(35, kr.getMatch(2).getEndPos());
157 assertEquals(34, kr.getMatch(2).getStartPos(1));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000158 assertEquals(35, kr.getMatch(2).getEndPos(1));
159
Eliza Margaretha85787842014-09-30 17:42:09 +0000160 assertEquals(25, kr.getMatch(3).getStartPos());
161 assertEquals(36, kr.getMatch(3).getEndPos());
162 assertEquals(34, kr.getMatch(3).getStartPos(1));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000163 assertEquals(36, kr.getMatch(3).getEndPos(1));
164
165 /*
Nils Diewald392bcf32015-02-26 20:01:17 +0000166 * for (Match km : kr.getMatches()){
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000167 * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
168 * +km.getSnippetBrackets()); }
169 */
170 }
171
172 /**
173 * Right expansion with exclusion
Eliza Margaretha6f989202016-10-14 21:48:29 +0200174 */
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000175 @Test
margaretha7f4fd652018-11-22 18:00:02 +0100176 public void testRightExpansionWithExclusion () throws IOException {
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000177 byte classNumber = 1;
178 SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "tt/p:NN"));
margarethae43c5e52018-03-20 15:24:53 +0100179 SpanTermQuery notQuery =
180 new SpanTermQuery(new Term("tokens", "s:Buchstabe"));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000181
182 SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 2, 3, 0,
183 classNumber, true);
184 kr = ki.search(seq, (short) 20);
185
186 assertEquals(6, kr.getMatch(0).getStartPos());
187 assertEquals(9, kr.getMatch(0).getEndPos());
Eliza Margaretha85787842014-09-30 17:42:09 +0000188 assertEquals(7, kr.getMatch(0).getStartPos(1));
189 assertEquals(9, kr.getMatch(0).getEndPos(1));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000190
191 assertEquals(9, kr.getMatch(2).getStartPos());
Eliza Margaretha85787842014-09-30 17:42:09 +0000192 assertEquals(12, kr.getMatch(2).getEndPos());
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000193
Eliza Margaretha85787842014-09-30 17:42:09 +0000194 assertEquals(9, kr.getMatch(3).getStartPos());
195 assertEquals(13, kr.getMatch(3).getEndPos());
196 assertEquals(10, kr.getMatch(3).getStartPos(1));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000197 assertEquals(13, kr.getMatch(3).getEndPos(1));
Nils Diewald5380aa62014-09-01 13:21:07 +0000198
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000199 /*
Nils Diewald392bcf32015-02-26 20:01:17 +0000200 * for (Match km : kr.getMatches()){
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000201 * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
202 * +km.getSnippetBrackets()); }
203 */
204 }
205
206 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000207 * Left expansion with exclusion
Eliza Margaretha2dcde4f2015-02-10 12:02:18 +0000208 * No expansion
Eliza Margaretha6f989202016-10-14 21:48:29 +0200209 */
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000210 @Test
margaretha7f4fd652018-11-22 18:00:02 +0100211 public void testLeftExpansionWithExclusion () throws IOException {
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000212 byte classNumber = 1;
213 SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "tt/p:NN"));
margarethae43c5e52018-03-20 15:24:53 +0100214 SpanTermQuery notQuery =
215 new SpanTermQuery(new Term("tokens", "tt/p:ADJA"));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000216
Eliza Margaretha6f989202016-10-14 21:48:29 +0200217 SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 0, 2, -1,
218 classNumber, true);
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000219 kr = ki.search(seq, (short) 10);
220
221 assertEquals(6, kr.getMatch(0).getStartPos());
Eliza Margaretha85787842014-09-30 17:42:09 +0000222 assertEquals(7, kr.getMatch(0).getEndPos());
223 assertEquals(6, kr.getMatch(0).getStartPos(1));
224 assertEquals(6, kr.getMatch(0).getEndPos(1));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000225
Eliza Margaretha85787842014-09-30 17:42:09 +0000226 assertEquals(12, kr.getMatch(4).getStartPos());
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000227 assertEquals(13, kr.getMatch(4).getEndPos());
228
Eliza Margaretha85787842014-09-30 17:42:09 +0000229 assertEquals(12, kr.getMatch(5).getStartPos());
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000230 assertEquals(15, kr.getMatch(5).getEndPos());
Eliza Margaretha85787842014-09-30 17:42:09 +0000231 assertEquals(12, kr.getMatch(5).getStartPos(1));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000232 assertEquals(14, kr.getMatch(5).getEndPos(1));
233
Eliza Margaretha85787842014-09-30 17:42:09 +0000234 assertEquals(13, kr.getMatch(6).getStartPos());
235 assertEquals(15, kr.getMatch(6).getEndPos());
236 assertEquals(13, kr.getMatch(6).getStartPos(1));
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000237 assertEquals(14, kr.getMatch(6).getEndPos(1));
238
239 /*
Nils Diewald392bcf32015-02-26 20:01:17 +0000240 * for (Match km : kr.getMatches()){
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000241 * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
242 * +km.getSnippetBrackets()); }
243 */
244
245 }
246
247 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000248 * Expansion over start and end documents start => cut to 0
Eliza Margaretha2dcde4f2015-02-10 12:02:18 +0000249 * TODO: end => to be handled in rendering process
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000250 *
251 * @throws IOException
Eliza Margaretha6f989202016-10-14 21:48:29 +0200252 */
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000253 @Test
margaretha7f4fd652018-11-22 18:00:02 +0100254 public void testExpansionOverStart () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000255 KrillIndex ki = new KrillIndex();
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000256 ki.addDoc(createFieldDoc0());
257 ki.commit();
258
259 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:e"));
260 // left expansion precedes 0
261 SpanExpansionQuery seq = new SpanExpansionQuery(stq, 2, 2, -1, true);
262 kr = ki.search(seq, (short) 10);
263
margaretha21e4ca22018-11-28 14:25:46 +0100264 assertEquals((long) 3, kr.getTotalResults());
265 assertEquals(2, kr.getMatch(0).getStartPos());
266 assertEquals(5, kr.getMatch(0).getEndPos());
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000267
margaretha7f4fd652018-11-22 18:00:02 +0100268 // right expansion exceeds end position
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000269 seq = new SpanExpansionQuery(stq, 3, 3, 0, true);
270 kr = ki.search(seq, (short) 10);
271
272 assertEquals((long) 4, kr.getTotalResults());
273 assertEquals(7, kr.getMatch(2).getStartPos());
Eliza Margaretha39662de2014-09-17 14:33:50 +0000274 assertEquals(11, kr.getMatch(2).getEndPos());
275 assertEquals(8, kr.getMatch(3).getStartPos());
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000276 assertEquals(12, kr.getMatch(3).getEndPos());
277
278 /*
Nils Diewald392bcf32015-02-26 20:01:17 +0000279 * for (Match km : kr.getMatches()){
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000280 * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
281 * //+km.getSnippetBrackets() ); }
282 */
283 }
284
285 /**
286 * Expansion exclusion : multiple documents
287 *
288 * @throws IOException
Eliza Margaretha6f989202016-10-14 21:48:29 +0200289 */
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000290 @Test
margaretha7f4fd652018-11-22 18:00:02 +0100291 public void testExclusionWithMultipleDocs () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000292 KrillIndex ki = new KrillIndex();
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000293 ki.addDoc(createFieldDoc0()); // same doc
294 ki.addDoc(createFieldDoc1()); // only not clause
295 ki.addDoc(createFieldDoc2()); // only main clause
296 ki.commit();
297
298 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:e"));
299 SpanTermQuery notQuery = new SpanTermQuery(new Term("base", "s:d"));
300
margarethae43c5e52018-03-20 15:24:53 +0100301 SpanExpansionQuery seq =
302 new SpanExpansionQuery(stq, notQuery, 2, 3, 0, true);
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000303 kr = ki.search(seq, (short) 20);
304
305 // notClause.doc() > firstSpans.doc()
306 assertEquals(7, kr.getMatch(0).getStartPos());
307 assertEquals(10, kr.getMatch(0).getEndPos());
Eliza Margaretha39662de2014-09-17 14:33:50 +0000308 assertEquals(7, kr.getMatch(1).getStartPos());
309 assertEquals(11, kr.getMatch(1).getEndPos());
310 // !hasMoreNotClause
311 assertEquals(2, kr.getMatch(4).getLocalDocID());
312 assertEquals(1, kr.getMatch(4).getStartPos());
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000313 assertEquals(4, kr.getMatch(4).getEndPos());
314 }
315
316 /**
317 * Skip to
Eliza Margaretha6f989202016-10-14 21:48:29 +0200318 */
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000319 @Test
margaretha7f4fd652018-11-22 18:00:02 +0100320 public void testExpansionWithSkipTo () throws IOException, QueryException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000321 KrillIndex ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000322 ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
323 ki.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"), true);
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000324 ki.commit();
margaretha7f4fd652018-11-22 18:00:02 +0100325 String jsonPath =
326 getClass().getResource("/queries/poly3.json").getFile();
Akron67d2ff02018-06-19 10:51:16 +0200327 String jsonQuery = getJsonString(jsonPath);
Akron850b46e2016-06-08 10:08:55 +0200328 SpanQueryWrapper sqwi = new KrillQuery("tokens").fromKoral(jsonQuery);
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000329
330 SpanQuery sq = sqwi.toQuery();
margaretha7f4fd652018-11-22 18:00:02 +0100331 // System.out.println(sq.toString());
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000332 kr = ki.search(sq, (short) 20);
333
334 assertEquals(205, kr.getMatch(0).getStartPos());
Eliza Margaretha8e200cd2014-11-13 16:00:38 +0000335 assertEquals(208, kr.getMatch(0).getEndPos());
Nils Diewald9b11a442014-11-08 20:47:17 +0000336
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000337 /*
Nils Diewald392bcf32015-02-26 20:01:17 +0000338 * for (Match km : kr.getMatches()){
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000339 * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
340 * +km.getSnippetBrackets() ); }
341 */
342 }
Nils Diewald9b11a442014-11-08 20:47:17 +0000343
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000344 /**
Nils Diewald9b11a442014-11-08 20:47:17 +0000345 * Query rewrite bug
Akron6759b042016-04-28 01:25:00 +0200346 *
Akron30c46062016-04-22 14:24:37 +0200347 * Warning: This is not armoured by <base/s=t>!
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000348 *
349 * @throws IOException
Eliza Margaretha6f989202016-10-14 21:48:29 +0200350 */
Nils Diewald9b11a442014-11-08 20:47:17 +0000351 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000352 public void testQueryRewriteBug () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000353 KrillIndex ki = new KrillIndex();
Akron30c46062016-04-22 14:24:37 +0200354 ki.addDoc(createFieldDoc0()); // ceccecdeec
355 /*
356 ki.addDoc(createFieldDoc1()); // bbccdd || only not clause
357 ki.addDoc(createFieldDoc2()); // beccea | only main clause
358 */
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000359 ki.commit();
360
361 // See /queries/bugs/repetition_group_rewrite
margarethae43c5e52018-03-20 15:24:53 +0100362 RegexpQuery requery =
363 new RegexpQuery(new Term("base", "s:[ac]"), RegExp.ALL);
364 SpanMultiTermQueryWrapper<RegexpQuery> query =
365 new SpanMultiTermQueryWrapper<RegexpQuery>(requery);
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000366 SpanExpansionQuery seq = new SpanExpansionQuery(query, 1, 1, 1, true);
367 SpanRepetitionQuery rep = new SpanRepetitionQuery(seq, 2, 2, true);
368
Akron30c46062016-04-22 14:24:37 +0200369 // spanRepetition(
370 // spanExpansion(
371 // SpanMultiTermQueryWrapper(base:/s:[ac]/),
372 // []{1, 1},
373 // right
374 // ){2,2}
375 // )
376
margaretha4cfc89e2016-04-25 18:01:14 +0200377 kr = ki.search(query, (short) 20);
378 assertEquals(5, kr.getTotalResults());
379 assertEquals(0, kr.getMatch(0).getStartPos());
380 assertEquals(1, kr.getMatch(0).getEndPos());
381 assertEquals(2, kr.getMatch(1).getStartPos());
382 assertEquals(3, kr.getMatch(1).getEndPos());
383 assertEquals(3, kr.getMatch(2).getStartPos());
384 assertEquals(4, kr.getMatch(2).getEndPos());
385 assertEquals(5, kr.getMatch(3).getStartPos());
386 assertEquals(6, kr.getMatch(3).getEndPos());
387 assertEquals(9, kr.getMatch(4).getStartPos());
388 assertEquals(10, kr.getMatch(4).getEndPos());
Akron6759b042016-04-28 01:25:00 +0200389
margaretha4cfc89e2016-04-25 18:01:14 +0200390 kr = ki.search(seq, (short) 20);
391 assertEquals(5, kr.getTotalResults());
392 assertEquals(0, kr.getMatch(0).getStartPos());
393 assertEquals(2, kr.getMatch(0).getEndPos());
394 assertEquals(2, kr.getMatch(1).getStartPos());
395 assertEquals(4, kr.getMatch(1).getEndPos());
396 assertEquals(3, kr.getMatch(2).getStartPos());
397 assertEquals(5, kr.getMatch(2).getEndPos());
398 assertEquals(5, kr.getMatch(3).getStartPos());
399 assertEquals(7, kr.getMatch(3).getEndPos());
400 assertEquals(9, kr.getMatch(4).getStartPos());
401 assertEquals(11, kr.getMatch(4).getEndPos());
402
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000403 kr = ki.search(rep, (short) 20);
Akrona7b936d2016-03-04 13:40:54 +0100404
Akronf05fde62016-08-03 23:46:17 +0200405 assertEquals("[[cecc]]ecdeec", kr.getMatch(0).getSnippetBrackets());
406 assertEquals("cec[[cecd]]eec", kr.getMatch(1).getSnippetBrackets());
Akron30c46062016-04-22 14:24:37 +0200407 assertEquals((long) 2, kr.getTotalResults());
Eliza Margaretha8e200cd2014-11-13 16:00:38 +0000408 }
Nils Diewald9b11a442014-11-08 20:47:17 +0000409
Akron747986e2016-02-18 17:07:12 +0100410 /**
411 * Query rewrite bug
412 *
413 * @throws IOException
Eliza Margaretha6f989202016-10-14 21:48:29 +0200414 */
Akron747986e2016-02-18 17:07:12 +0100415 @Test
416 public void testExpansionQueryBug3 () throws IOException, QueryException {
417 KrillIndex ki = new KrillIndex();
Akrona7b936d2016-03-04 13:40:54 +0100418 ki.addDoc(createFieldDoc3());
419 ki.addDoc(createFieldDoc4());
Akron747986e2016-02-18 17:07:12 +0100420 ki.commit();
Akron67d2ff02018-06-19 10:51:16 +0200421 String jsonPath = getClass()
422 .getResource("/queries/bugs/expansion_bug_3.jsonld").getFile();
423 String json = getJsonString(jsonPath);
Akron747986e2016-02-18 17:07:12 +0100424 KrillQuery kq = new KrillQuery("base");
Akron850b46e2016-06-08 10:08:55 +0200425 SpanQuery sq = kq.fromKoral(json).toQuery();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200426 assertEquals(sq.toString(),
Akron6759b042016-04-28 01:25:00 +0200427 "focus(254: spanContain(<base:base/s:t />, {254: spanExpansion(base:s:c, []{0, 4}, right)}))");
Akron747986e2016-02-18 17:07:12 +0100428
Akrona7b936d2016-03-04 13:40:54 +0100429 kr = ki.search(sq, (short) 10);
Akronf05fde62016-08-03 23:46:17 +0200430 assertEquals("[[c]]ab", kr.getMatch(0).getSnippetBrackets());
431 assertEquals("[[ca]]b", kr.getMatch(1).getSnippetBrackets());
432 assertEquals("[[cab]]", kr.getMatch(2).getSnippetBrackets());
433 assertEquals("[[c]]e", kr.getMatch(3).getSnippetBrackets());
Akron63cd32f2016-04-21 17:56:06 +0200434
Akronf05fde62016-08-03 23:46:17 +0200435 assertEquals("[[ce]]", kr.getMatch(4).getSnippetBrackets());
Akrona7b936d2016-03-04 13:40:54 +0100436 assertEquals(5, kr.getTotalResults());
Akron63cd32f2016-04-21 17:56:06 +0200437
Akrona7b936d2016-03-04 13:40:54 +0100438 sq = kq.builder().tag("base/s:t").toQuery();
Akron6759b042016-04-28 01:25:00 +0200439 assertEquals(sq.toString(), "<base:base/s:t />");
Akrona7b936d2016-03-04 13:40:54 +0100440 kr = ki.search(sq, (short) 5);
Akronf05fde62016-08-03 23:46:17 +0200441 assertEquals("[[cab]]", kr.getMatch(0).getSnippetBrackets());
442 assertEquals("[[ce]]", kr.getMatch(1).getSnippetBrackets());
Akrona7b936d2016-03-04 13:40:54 +0100443 assertEquals(2, kr.getTotalResults());
Akron747986e2016-02-18 17:07:12 +0100444 }
445
margarethae43c5e52018-03-20 15:24:53 +0100446 @Test
Akrond6611cd2018-01-05 19:45:35 +0100447 public void indexRegexSequence () throws Exception {
448 KrillIndex ki = new KrillIndex();
449 ki.addDoc(createFieldDoc5());
450 ki.commit();
451
452 QueryBuilder kq = new QueryBuilder("base");
453
margarethae43c5e52018-03-20 15:24:53 +0100454 SpanQueryWrapper sq = kq.seq(kq.or("s:baumgarten", "s:steingarten"))
455 .append(kq.seg().without(kq.or("s:franz", "s:hans")));
Akrond6611cd2018-01-05 19:45:35 +0100456
margarethae43c5e52018-03-20 15:24:53 +0100457 // Expected to find [baumgarten steingarten]
458 Krill ks = _newKrill(sq);
Akrond6611cd2018-01-05 19:45:35 +0100459 Result kr = ki.search(ks);
460
461 assertEquals((long) 1, kr.getTotalResults());
462
463 assertEquals("... baum [[baumgarten steingarten]] franz ...",
margarethae43c5e52018-03-20 15:24:53 +0100464 kr.getMatch(0).getSnippetBrackets());
Akrond6611cd2018-01-05 19:45:35 +0100465
margarethae43c5e52018-03-20 15:24:53 +0100466 // The same result should be shown for:
Akrond6611cd2018-01-05 19:45:35 +0100467
margarethae43c5e52018-03-20 15:24:53 +0100468 sq = kq.seq(kq.re("s:.*garten"))
469 .append(kq.seg().without(kq.re("s:.*an.*")));
Akrond6611cd2018-01-05 19:45:35 +0100470
margarethae43c5e52018-03-20 15:24:53 +0100471 ks = _newKrill(sq);
Akrond6611cd2018-01-05 19:45:35 +0100472 kr = ki.search(ks);
473
474 assertEquals((long) 1, kr.getTotalResults());
475
476 assertEquals("... baum [[baumgarten steingarten]] franz ...",
margarethae43c5e52018-03-20 15:24:53 +0100477 kr.getMatch(0).getSnippetBrackets());
478 };
Akrond6611cd2018-01-05 19:45:35 +0100479
margarethae43c5e52018-03-20 15:24:53 +0100480 @Test
481 public void testBugRegexExpandLeftNoMoreSpan () throws IOException {
482 KrillIndex ki = new KrillIndex();
483 ki.addDoc(createFieldDoc6());
484 ki.commit();
485
486 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:a"));
margaretha7f4fd652018-11-22 18:00:02 +0100487
margarethae43c5e52018-03-20 15:24:53 +0100488 RegexpQuery requery =
489 new RegexpQuery(new Term("base", "s:[bc]"), RegExp.ALL);
490 SpanMultiTermQueryWrapper<RegexpQuery> notQuery =
491 new SpanMultiTermQueryWrapper<RegexpQuery>(requery);
492
493 byte classNumber = 1;
494 // left expansion
495 SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 0, 1, -1,
496 classNumber, true);
497
498 kr = ki.search(seq, (short) 20);
499
margaretha7f4fd652018-11-22 18:00:02 +0100500 assertEquals(9, kr.getMatches().size());
margarethae43c5e52018-03-20 15:24:53 +0100501
margaretha7f4fd652018-11-22 18:00:02 +0100502 }
Akrona59252d2018-10-10 19:18:42 +0200503
504 @Test
505 public void indexExpansionWithNegationDifferentFragments () throws Exception {
506 KrillIndex ki = new KrillIndex();
507
508 // Add to the index in a single fragment
509 FieldDocument fd = new FieldDocument();
510 fd.addTV("base",
511 "a B c",
512 "[(0-1)s:a|i:a|_1$<i>0<i>1]"
513 + "[(1-2)s:B|i:b|_2$<i>1<i>2|]"
514 + "[(2-3)s:c|i:c|_3$<i>2<i>3]");
515 ki.addDoc(fd);
516 ki.commit();
517 fd.addTV("base",
518 "a b c",
519 "[(0-1)s:a|i:a|_1$<i>0<i>1]"
520 + "[(1-2)s:b|i:b|_2$<i>1<i>2|]"
521 + "[(2-3)s:c|i:c|_3$<i>2<i>3]");
522 ki.addDoc(fd);
523 ki.commit();
524
525 QueryBuilder kq = new QueryBuilder("base");
526 SpanQuery sq = kq.seq(kq.seg("s:a")).append(kq.seg().without("s:B")).append(kq.seg("s:c")).toQuery();
527 assertEquals("spanNext(base:s:a, spanExpansion(base:s:c, !base:s:B{1, 1}, left))", sq.toString());
528 Krill ks = new Krill(sq);
529 ks.getMeta().getContext().left.setToken(true).setLength(0);
530 ks.getMeta().getContext().right.setToken(true).setLength(0);
531
532 Result kr = ki.search(ks);
533 assertEquals((long) 1, kr.getTotalResults());
534 };
535
536 @Test
537 public void indexExpansionWithNegationSameFragmentBug () throws Exception {
538 KrillIndex ki = new KrillIndex();
539
540 // Add to the index in a single fragment
541 FieldDocument fd = new FieldDocument();
542 fd.addTV("base",
543 "a B c",
544 "[(0-1)s:a|i:a|_1$<i>0<i>1]"
545 + "[(1-2)s:B|i:b|_2$<i>1<i>2|]"
546 + "[(2-3)s:c|i:c|_3$<i>2<i>3]");
547 ki.addDoc(fd);
548 fd.addTV("base",
549 "a b c",
550 "[(0-1)s:a|i:a|_1$<i>0<i>1]"
551 + "[(1-2)s:b|i:b|_2$<i>1<i>2|]"
552 + "[(2-3)s:c|i:c|_3$<i>2<i>3]");
553 ki.addDoc(fd);
554 ki.commit();
555
556 QueryBuilder kq = new QueryBuilder("base");
557 SpanQuery sq = kq.seq(kq.seg("s:a")).append(kq.seg().without("s:B")).append(kq.seg("s:c")).toQuery();
558 assertEquals("spanNext(base:s:a, spanExpansion(base:s:c, !base:s:B{1, 1}, left))", sq.toString());
559 Krill ks = new Krill(sq);
560 ks.getMeta().getContext().left.setToken(true).setLength(0);
561 ks.getMeta().getContext().right.setToken(true).setLength(0);
562
563 Result kr = ki.search(ks);
564 assertEquals((long) 1, kr.getTotalResults());
565 };
566
Akron42047342018-11-27 15:15:38 +0100567
568 @Test
Akron42047342018-11-27 15:15:38 +0100569 public void indexExpansionLeftWithWrongSorting () throws IOException {
570 KrillIndex ki = new KrillIndex();
571 ki.addDoc(simpleFieldDoc("abcc"));
572 ki.commit();
573
574 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
575 SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1, true);
576 assertEquals("spanExpansion(base:s:c, []{0, 2}, left)", seq.toString());
577 Result kr = ki.search(seq, (short) 10);
578
margarethaf151c962018-11-27 17:38:59 +0100579 assertEquals("a[[bc]]c", kr.getMatch(1).getSnippetBrackets());
580 assertEquals(1, kr.getMatch(1).getStartPos());
581 assertEquals(3, kr.getMatch(1).getEndPos());
582 assertEquals("a[[bcc]]", kr.getMatch(2).getSnippetBrackets());
583 assertEquals(1, kr.getMatch(2).getStartPos());
584 assertEquals(4, kr.getMatch(2).getEndPos());
Akron42047342018-11-27 15:15:38 +0100585 assertEquals(6, kr.getTotalResults());
586 }
587
Akronddbc8f52018-11-28 11:53:42 +0100588 @Test
margaretha21e4ca22018-11-28 14:25:46 +0100589 public void indexExpansionMultipleStartsWithCorrectSorting () throws IOException {
Akronddbc8f52018-11-28 11:53:42 +0100590 KrillIndex ki = new KrillIndex();
591 ki.addDoc(simpleFieldDoc("abccef"));
592 ki.commit();
593
594 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
595 SpanExpansionQuery seqL = new SpanExpansionQuery(stq, 0, 2, -1, true);
596 SpanExpansionQuery seqR = new SpanExpansionQuery(seqL, 0, 1, 0, true);
597 assertEquals(
598 "spanExpansion(spanExpansion(base:s:c, []{0, 2}, left), []{0, 1}, right)",
599 seqR.toString());
600 Result kr = ki.search(seqR, (short) 20);
601
margaretha21e4ca22018-11-28 14:25:46 +0100602// for (Match km : kr.getMatches()) {
603// System.out.println(km.getStartPos() + "," + km.getEndPos() + " " +
604// km.getSnippetBrackets());
605// };
Akronddbc8f52018-11-28 11:53:42 +0100606
607 // TODO: These are duplicate results that may be restricted with a wrapper
608 assertEquals("a[[bcc]]ef", kr.getMatch(3).getSnippetBrackets());
609 assertEquals("a[[bcc]]ef", kr.getMatch(4).getSnippetBrackets());
610 assertEquals(12, kr.getTotalResults());
margaretha21e4ca22018-11-28 14:25:46 +0100611 }
Akronddbc8f52018-11-28 11:53:42 +0100612
margaretha21e4ca22018-11-28 14:25:46 +0100613 @Test
614 public void testRightExpansionWithWrongSorting ()
615 throws IOException {
616 KrillIndex ki = new KrillIndex();
617 ki.addDoc(simpleFieldDoc("abccef"));
618 ki.commit();
619
620 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
621 SpanExpansionQuery seqL = new SpanExpansionQuery(stq, 0, 2, -1, true);
622 kr = ki.search(seqL, (short) 20);
623// for (Match km : kr.getMatches()) {
624// System.out.println(km.getStartPos() + "," + km.getEndPos() + " " +
625// km.getSnippetBrackets());
626// };
627
628 SpanExpansionQuery seqR = new SpanExpansionQuery(seqL, 0, 2, 0, true);
Akronddbc8f52018-11-28 11:53:42 +0100629 assertEquals(
630 "spanExpansion(spanExpansion(base:s:c, []{0, 2}, left), []{0, 2}, right)",
631 seqR.toString());
632 kr = ki.search(seqR, (short) 20);
633
margaretha21e4ca22018-11-28 14:25:46 +0100634
635// for (Match km : kr.getMatches()) {
636// System.out.println(km.getStartPos() + "," + km.getEndPos() + " " +
637// km.getSnippetBrackets());
638// };
639
Akronddbc8f52018-11-28 11:53:42 +0100640 assertEquals("a[[bcc]]ef", kr.getMatch(5).getSnippetBrackets());
641 assertEquals("a[[bcce]]f", kr.getMatch(6).getSnippetBrackets());
642 assertEquals(18, kr.getTotalResults());
643 }
Akron7a7319a2018-11-28 17:08:56 +0100644
645
646 @Test
647 public void testRightExpansionWithTextBoundary () throws IOException, QueryException {
648 KrillIndex ki = new KrillIndex();
649 ki.addDoc(simpleFieldDoc("aabcd"));
650 ki.commit();
651
652 QueryBuilder kq = new QueryBuilder("base");
653
654 // a[ab]?[]{0,2}
655 SpanQuery sq = kq.seq(kq.seg("s:a")).append(kq.opt(kq.or("s:a","s:b"))).append(kq.repeat(kq.empty(),0,5)).toQuery();
656 assertEquals(
657 "focus(254: spanContain(<base:base/s:t />, {254: "+
658 "spanExpansion(spanOr([base:s:a, spanNext(base:s:a, spanOr([base:s:a, base:s:b]))]), []{0, 5}, right)"+
659 "}))", sq.toString());
660
661 Result kr = ki.search(sq, (short) 25);
662 assertEquals("[[aabcd]]", kr.getMatch(8).getSnippetBrackets());
663 assertEquals("a[[a]]bcd", kr.getMatch(9).getSnippetBrackets());
664 assertEquals(16, kr.getTotalResults());
665 }
666
Akronddbc8f52018-11-28 11:53:42 +0100667
margaretha21e4ca22018-11-28 14:25:46 +0100668 @Test
margaretha52a0d112018-11-28 12:58:55 +0100669 public void testLeftExpansionWrongSorting () throws IOException {
670 KrillIndex ki = new KrillIndex();
671 ki.addDoc(simpleFieldDoc("B u d B R a d m d Z z s B d v", " "));
672 ki.commit();
673
margaretha21e4ca22018-11-28 14:25:46 +0100674 // d positions: 2-3, 6-7, 8-9, 13-14
margaretha52a0d112018-11-28 12:58:55 +0100675 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:d"));
676 SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 8, -1, true);
677
678 Result kr = ki.search(seq, (short) 25);
679// for (Match km : kr.getMatches()){
680// System.out.println(km.getStartPos() +","+km.getEndPos()+" "
681// +km.getSnippetBrackets()); }
margaretha21e4ca22018-11-28 14:25:46 +0100682
margaretha52a0d112018-11-28 12:58:55 +0100683 assertEquals("BudBR[[admdZzsBd]]v", kr.getMatch(15).getSnippetBrackets());
684 assertEquals(28, kr.getTotalResults());
685 }
Akron42047342018-11-27 15:15:38 +0100686
margarethaf151c962018-11-27 17:38:59 +0100687 /** Tests left expansion over start doc boundary. Redundant matches should
688 * be omitted.
689 * @throws IOException
690 */
691 @Test
692 public void testLeftExpansionRedundantMatches () throws IOException {
693 KrillIndex ki = new KrillIndex();
694 ki.addDoc(simpleFieldDoc("A d F ü d T F u d m", " "));
695 ki.commit();
696
697 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:d"));
698 SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 6, -1, true);
699 Result kr = ki.search(seq, (short) 20);
700
margaretha21e4ca22018-11-28 14:25:46 +0100701// for (Match km : kr.getMatches()) {
702// System.out.println(km.getStartPos() + "," + km.getEndPos() + " " +
703// km.getSnippetBrackets());
704// };
705
margarethaf151c962018-11-27 17:38:59 +0100706 Match m = kr.getMatch(5);
707 assertEquals(2, m.getStartPos());
708 assertEquals(9, m.getEndPos());
709 assertEquals(14, kr.getTotalResults());
710
711 }
712
margaretha52a0d112018-11-28 12:58:55 +0100713
margarethae43c5e52018-03-20 15:24:53 +0100714 private FieldDocument createFieldDoc6 () {
715 FieldDocument fd = new FieldDocument();
716 fd.addString("ID", "doc-6");
717 fd.addTV("base", "baaaaaa",
718 "[(0-1)s:b|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>10<i>10<b>0]"
719 + "[(1-2)s:a|_1$<i>1<i>2]" + "[(2-3)s:c|_2$<i>2<i>3]"
720 + "[(3-4)s:a|s:d|_3$<i>3<i>4]"
721 + "[(4-5)s:a|_4$<i>4<i>5]" + "[(5-6)s:c|_5$<i>5<i>6]"
722 + "[(6-7)s:a|_6$<i>6<i>7]" + "[(7-8)s:d|_7$<i>7<i>8]"
723 + "[(8-9)s:a|_8$<i>8<i>9]"
724 + "[(9-10)s:a|_9$<i>9<i>10]");
725 return fd;
726 }
Akron747986e2016-02-18 17:07:12 +0100727
Nils Diewaldbb33da22015-03-04 16:24:25 +0000728 private FieldDocument createFieldDoc0 () {
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000729 FieldDocument fd = new FieldDocument();
Eliza Margaretha39662de2014-09-17 14:33:50 +0000730 fd.addString("ID", "doc-0");
Akrona7b936d2016-03-04 13:40:54 +0100731 fd.addTV("base", "ceccecdeec",
Akron6759b042016-04-28 01:25:00 +0200732 "[(0-1)s:c|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>10<i>10<b>0]"
733 + "[(1-2)s:e|_1$<i>1<i>2]" + "[(2-3)s:c|_2$<i>2<i>3]"
734 + "[(3-4)s:c|s:d|_3$<i>3<i>4]"
735 + "[(4-5)s:e|_4$<i>4<i>5]" + "[(5-6)s:c|_5$<i>5<i>6]"
736 + "[(6-7)s:d|_6$<i>6<i>7]" + "[(7-8)s:e|_7$<i>7<i>8]"
Eliza Margaretha6f989202016-10-14 21:48:29 +0200737 + "[(8-9)s:e|_8$<i>8<i>9]"
738 + "[(9-10)s:c|_9$<i>9<i>10]");
Eliza Margaretha39662de2014-09-17 14:33:50 +0000739 return fd;
740 }
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000741
Nils Diewaldbb33da22015-03-04 16:24:25 +0000742 private FieldDocument createFieldDoc1 () {
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000743 FieldDocument fd = new FieldDocument();
Eliza Margaretha39662de2014-09-17 14:33:50 +0000744 fd.addString("ID", "doc-1");
Akrona7b936d2016-03-04 13:40:54 +0100745 fd.addTV("base", "bbccdd",
Akron6759b042016-04-28 01:25:00 +0200746 "[(0-1)s:b|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>6<i>6<b>0]]"
747 + "[(1-2)s:b|_1$<i>1<i>2]" + "[(2-3)s:c|_2$<i>2<i>3]"
748 + "[(3-4)s:c|_3$<i>3<i>4]" + "[(4-5)s:d|_4$<i>4<i>5]"
749 + "[(5-6)s:d|_5$<i>5<i>6]");
Eliza Margaretha39662de2014-09-17 14:33:50 +0000750 return fd;
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000751 }
752
Nils Diewaldbb33da22015-03-04 16:24:25 +0000753 private FieldDocument createFieldDoc2 () {
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000754 FieldDocument fd = new FieldDocument();
Eliza Margaretha39662de2014-09-17 14:33:50 +0000755 fd.addString("ID", "doc-2");
Akrona7b936d2016-03-04 13:40:54 +0100756 fd.addTV("base", "beccea",
Akron6759b042016-04-28 01:25:00 +0200757 "[(0-1)s:b|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>6<i>6<b>0]]"
758 + "[(1-2)s:e|_1$<i>1<i>2]" + "[(2-3)s:c|_2$<i>2<i>3]"
759 + "[(3-4)s:c|_3$<i>3<i>4]" + "[(4-5)s:e|_4$<i>4<i>5]"
760 + "[(5-6)s:a|_5$<i>5<i>6]");
Akrona7b936d2016-03-04 13:40:54 +0100761 return fd;
762 }
763
Akrona7b936d2016-03-04 13:40:54 +0100764 private FieldDocument createFieldDoc3 () {
765 FieldDocument fd = new FieldDocument();
766 fd.addString("ID", "doc-3");
767 fd.addTV("base", "cab",
Akron6759b042016-04-28 01:25:00 +0200768 "[(0-1)s:c|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>3<i>3<b>0]]"
769 + "[(1-2)s:a|_1$<i>1<i>2]" + "[(2-3)s:b|_2$<i>2<i>3]");
Akrona7b936d2016-03-04 13:40:54 +0100770 return fd;
771 }
772
Akrona7b936d2016-03-04 13:40:54 +0100773 private FieldDocument createFieldDoc4 () {
774 FieldDocument fd = new FieldDocument();
Akrond6611cd2018-01-05 19:45:35 +0100775 fd.addString("ID", "doc-4");
Akrona7b936d2016-03-04 13:40:54 +0100776 fd.addTV("base", "ce",
Akron6759b042016-04-28 01:25:00 +0200777 "[(0-1)s:c|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>2<i>2<b>0]]"
778 + "[(1-2)s:e|_1$<i>1<i>2]");
Eliza Margaretha39662de2014-09-17 14:33:50 +0000779 return fd;
Eliza Margaretha942dcf32015-01-22 15:13:00 +0000780 }
781
margarethae43c5e52018-03-20 15:24:53 +0100782 private FieldDocument createFieldDoc5 () {
Akrond6611cd2018-01-05 19:45:35 +0100783 FieldDocument fd = new FieldDocument();
784 fd.addString("ID", "doc-5");
785 fd.addTV("base",
786 "affe afffe baum baumgarten steingarten franz hans haus efeu effe",
787 "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10|<>:base/s:t$<b>64<i>0<i>9<i>9<b>0]"
788 + "[(5-10)s:afffe|_1$<i>5<i>10]"
789 + "[(11-15)s:baum|_2$<i>11<i>15]"
790 + "[(16-26)s:baumgarten|_3$<i>16<i>26]"
791 + "[(27-38)s:steingarten|_4$<i>27<i>38]"
792 + "[(39-44)s:franz|_5$<i>39<i>44]"
793 + "[(45-49)s:hans|_6$<i>45<i>49]"
794 + "[(50-54)s:haus|_7$<i>50<i>54]"
795 + "[(55-59)s:efeu|_8$<i>55<i>59]"
796 + "[(60-64)s:effe|_9$<i>60<i>64]");
797 return fd;
798 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000799
Akrond6611cd2018-01-05 19:45:35 +0100800 private Krill _newKrill (SpanQueryWrapper query) {
801 Krill ks = new Krill(query);
802 ks.getMeta().getContext().left.setToken(true).setLength(1);
803 ks.getMeta().getContext().right.setToken(true).setLength(1);
804 return ks;
805 };
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000806}