blob: 9acd416b9a4fd8fe979f34403523590a8cc5a095 [file] [log] [blame]
Eliza Margaretha01929182014-02-19 11:48:59 +00001package de.ids_mannheim.korap.index;
2
Nils Diewaldf399a672013-11-18 17:55:22 +00003import java.util.*;
4import java.io.*;
5
6import org.apache.lucene.util.Version;
7import org.apache.lucene.util.BytesRef;
8import org.apache.lucene.util.Bits;
9
10import static org.junit.Assert.*;
Eliza Margaretha22898982014-11-04 17:10:21 +000011
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.junit.Test;
13import org.junit.Ignore;
14import org.junit.runner.RunWith;
15import org.junit.runners.JUnit4;
16
Nils Diewalda14ecd62015-02-26 21:00:20 +000017import de.ids_mannheim.korap.KrillIndex;
Nils Diewald392bcf32015-02-26 20:01:17 +000018import de.ids_mannheim.korap.response.Match;
Nils Diewald0339d462015-02-26 14:53:56 +000019import de.ids_mannheim.korap.KrillQuery;
Nils Diewald884dbcf2015-02-27 17:02:28 +000020import de.ids_mannheim.korap.response.Result;
Nils Diewaldf399a672013-11-18 17:55:22 +000021import de.ids_mannheim.korap.query.SpanElementQuery;
22import de.ids_mannheim.korap.query.SpanWithinQuery;
23import de.ids_mannheim.korap.query.SpanNextQuery;
24import de.ids_mannheim.korap.query.SpanClassQuery;
Eliza Margaretha22898982014-11-04 17:10:21 +000025import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
26import de.ids_mannheim.korap.util.QueryException;
Nils Diewaldf399a672013-11-18 17:55:22 +000027import de.ids_mannheim.korap.index.FieldDocument;
Nils Diewalde4986d72015-02-27 17:35:00 +000028import de.ids_mannheim.korap.index.MultiTermTokenStream;
Nils Diewaldf399a672013-11-18 17:55:22 +000029import org.apache.lucene.search.spans.SpanQuery;
30import org.apache.lucene.search.spans.SpanTermQuery;
31import org.apache.lucene.index.Term;
32
33import java.nio.ByteBuffer;
34
35// mvn -Dtest=TestWithinIndex#indexExample1 test
36
Nils Diewald83c9b162015-02-03 21:05:07 +000037
38/**
39 * @author diewald
40 * @author margaretha
41 */
Nils Diewaldf399a672013-11-18 17:55:22 +000042@RunWith(JUnit4.class)
43public class TestWithinIndex {
44
45 // Todo: primary data as a non-indexed field separated.
46
47 @Test
48 public void indexExample1a () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +000049 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +000050
Nils Diewald83c9b162015-02-03 21:05:07 +000051 // <a>x<a>y<a>zhij</a>hij</a>hij</a>
52 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +000053 fd.addTV("base", "x y z h i j h i j h i j ",
54 "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
55 "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
56 "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
57 "[(9-12)s:h]" + // 4
58 "[(12-15)s:i]" + // 5
59 "[(15-18)s:j]" + // 6
60 "[(18-21)s:h]" + // 7
61 "[(21-24)s:i]" + // 8
62 "[(24-27)s:j]" + // 9
63 "[(27-30)s:h]" + // 10
64 "[(30-33)s:i]" + // 11
65 "[(33-36)s:j]"); // 12
Nils Diewald83c9b162015-02-03 21:05:07 +000066 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +000067
Nils Diewald83c9b162015-02-03 21:05:07 +000068 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +000069
Nils Diewald83c9b162015-02-03 21:05:07 +000070 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +000071 Result kr;
Nils Diewaldf399a672013-11-18 17:55:22 +000072
Nils Diewaldbb33da22015-03-04 16:24:25 +000073 sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
74 new SpanTermQuery(new Term("base", "s:h")));
Nils Diewaldf399a672013-11-18 17:55:22 +000075
Nils Diewald83c9b162015-02-03 21:05:07 +000076 kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +000077
Nils Diewald83c9b162015-02-03 21:05:07 +000078 assertEquals("totalResults", kr.getTotalResults(), 6);
Nils Diewaldf399a672013-11-18 17:55:22 +000079
Nils Diewald83c9b162015-02-03 21:05:07 +000080 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
81 assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos);
82 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
83 assertEquals("EndPos (1)", 12, kr.getMatch(1).endPos);
84 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
85 assertEquals("EndPos (2)", 12, kr.getMatch(2).endPos);
86 assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos);
87 assertEquals("EndPos (3)", 9, kr.getMatch(3).endPos);
88 assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos);
89 assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos);
90 assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos);
91 assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +000092
Nils Diewald83c9b162015-02-03 21:05:07 +000093 assertEquals(1, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +000094 };
95
Nils Diewaldbb33da22015-03-04 16:24:25 +000096
Nils Diewaldf399a672013-11-18 17:55:22 +000097 @Test
98 public void indexExample1b () throws IOException {
Nils Diewald83c9b162015-02-03 21:05:07 +000099 // Cases 9, 12, 13
Nils Diewalda14ecd62015-02-26 21:00:20 +0000100 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000101
Nils Diewald83c9b162015-02-03 21:05:07 +0000102 // <a>x<a>y<a>zhij</a>hij</a>hij</a>
103 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000104 fd.addTV("base", "x y z h i j h i j h i j ",
105 "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
106 "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
107 "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
108 "[(9-12)s:h]" + // 4
109 "[(12-15)s:i]" + // 5
110 "[(15-18)s:j]" + // 6
111 "[(18-21)s:h]" + // 7
112 "[(21-24)s:i]" + // 8
113 "[(24-27)s:j]" + // 9
114 "[(27-30)s:h]" + // 10
115 "[(30-33)s:i]" + // 11
116 "[(33-36)s:j]"); // 12
Nils Diewald83c9b162015-02-03 21:05:07 +0000117 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000118
Nils Diewald83c9b162015-02-03 21:05:07 +0000119 // <a>x<a>y<a>zhij</a>hij</a>hij</a>
120 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000121 fd.addTV("base", "x y z h i j h i j h i j ",
122 "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
123 "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
124 "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
125 "[(9-12)s:h]" + // 4
126 "[(12-15)s:i]" + // 5
127 "[(15-18)s:j]" + // 6
128 "[(18-21)s:h]" + // 7
129 "[(21-24)s:i]" + // 8
130 "[(24-27)s:j]" + // 9
131 "[(27-30)s:h]" + // 10
132 "[(30-33)s:i]" + // 11
133 "[(33-36)s:j]"); // 12
Nils Diewald83c9b162015-02-03 21:05:07 +0000134 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000135
136
Nils Diewald83c9b162015-02-03 21:05:07 +0000137 // Save documents
138 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000139
Nils Diewald83c9b162015-02-03 21:05:07 +0000140 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000141 Result kr;
Nils Diewaldf399a672013-11-18 17:55:22 +0000142
Nils Diewaldbb33da22015-03-04 16:24:25 +0000143 sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
144 new SpanTermQuery(new Term("base", "s:h")));
Nils Diewaldf399a672013-11-18 17:55:22 +0000145
Nils Diewald83c9b162015-02-03 21:05:07 +0000146 kr = ki.search(sq, (short) 15);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000147
Nils Diewald83c9b162015-02-03 21:05:07 +0000148 assertEquals("totalResults", kr.getTotalResults(), 12);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000149
Nils Diewald83c9b162015-02-03 21:05:07 +0000150 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
151 assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos);
152 assertEquals("Doc (0)", 0, kr.getMatch(0).internalDocID);
153 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
154 assertEquals("EndPos (1)", 12, kr.getMatch(1).endPos);
155 assertEquals("Doc (1)", 0, kr.getMatch(1).internalDocID);
156 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
157 assertEquals("EndPos (2)", 12, kr.getMatch(2).endPos);
158 assertEquals("Doc (2)", 0, kr.getMatch(2).internalDocID);
159 assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos);
160 assertEquals("EndPos (3)", 9, kr.getMatch(3).endPos);
161 assertEquals("Doc (3)", 0, kr.getMatch(3).internalDocID);
162 assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos);
163 assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos);
164 assertEquals("Doc (4)", 0, kr.getMatch(4).internalDocID);
165 assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos);
166 assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
167 assertEquals("Doc (5)", 0, kr.getMatch(5).internalDocID);
Nils Diewaldf399a672013-11-18 17:55:22 +0000168
Nils Diewald83c9b162015-02-03 21:05:07 +0000169 assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
170 assertEquals("EndPos (6)", 12, kr.getMatch(6).endPos);
171 assertEquals("Doc (6)", 1, kr.getMatch(6).internalDocID);
172 assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos);
173 assertEquals("EndPos (7)", 12, kr.getMatch(7).endPos);
174 assertEquals("Doc (7)", 1, kr.getMatch(7).internalDocID);
175 assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos);
176 assertEquals("EndPos (8)", 12, kr.getMatch(8).endPos);
177 assertEquals("Doc (8)", 1, kr.getMatch(8).internalDocID);
178 assertEquals("StartPos (9)", 1, kr.getMatch(9).startPos);
179 assertEquals("EndPos (9)", 9, kr.getMatch(9).endPos);
180 assertEquals("Doc (9)", 1, kr.getMatch(9).internalDocID);
181 assertEquals("StartPos (10)", 1, kr.getMatch(10).startPos);
182 assertEquals("EndPos (10)", 9, kr.getMatch(10).endPos);
183 assertEquals("Doc (10)", 1, kr.getMatch(10).internalDocID);
184 assertEquals("StartPos (11)", 2, kr.getMatch(11).startPos);
185 assertEquals("EndPos (11)", 6, kr.getMatch(11).endPos);
186 assertEquals("Doc (11)", 1, kr.getMatch(11).internalDocID);
Nils Diewaldf399a672013-11-18 17:55:22 +0000187
Nils Diewald83c9b162015-02-03 21:05:07 +0000188 /*
Nils Diewaldbb33da22015-03-04 16:24:25 +0000189 for (Match km : kr.getMatches()){
190 System.out.println(km.getStartPos() +","+km.getEndPos()+" "
Nils Diewald83c9b162015-02-03 21:05:07 +0000191 +km.getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000192 };
Nils Diewald83c9b162015-02-03 21:05:07 +0000193 */
194
195 assertEquals(2, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000196 };
197
198
199 @Test
200 public void indexExample1c () throws IOException {
Nils Diewald83c9b162015-02-03 21:05:07 +0000201 // Cases 9, 12, 13
Nils Diewalda14ecd62015-02-26 21:00:20 +0000202 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000203
Nils Diewald83c9b162015-02-03 21:05:07 +0000204 // <a>x<a>y<a>zhij</a>hij</a>hij</a>
205 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000206 fd.addTV("base", "x y z h i j h i j h i j ",
207 "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
208 "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
209 "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
210 "[(9-12)s:h]" + // 4
211 "[(12-15)s:i]" + // 5
212 "[(15-18)s:j]" + // 6
213 "[(18-21)s:h]" + // 7
214 "[(21-24)s:i]" + // 8
215 "[(24-27)s:j]" + // 9
216 "[(27-30)s:h]" + // 10
217 "[(30-33)s:i]" + // 11
218 "[(33-36)s:j]"); // 12
Nils Diewald83c9b162015-02-03 21:05:07 +0000219 ki.addDoc(fd);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000220
Nils Diewald83c9b162015-02-03 21:05:07 +0000221 // <a>x<a>y<a>zabc</a>abc</a>abc</a>
222 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000223 fd.addTV("base", "x y z a b c a b c a b c ",
224 "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
225 "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
226 "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
227 "[(9-12)s:a]" + // 4
228 "[(12-15)s:b]" + // 5
229 "[(15-18)s:c]" + // 6
230 "[(18-21)s:a]" + // 7
231 "[(21-24)s:b]" + // 8
232 "[(24-27)s:c]" + // 9
233 "[(27-30)s:a]" + // 10
234 "[(30-33)s:b]" + // 11
235 "[(33-36)s:c]"); // 12
Nils Diewald83c9b162015-02-03 21:05:07 +0000236 ki.addDoc(fd);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000237
Nils Diewald83c9b162015-02-03 21:05:07 +0000238 // Save documents
239 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000240
Nils Diewald83c9b162015-02-03 21:05:07 +0000241 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000242 Result kr;
Nils Diewaldf399a672013-11-18 17:55:22 +0000243
Nils Diewaldbb33da22015-03-04 16:24:25 +0000244 sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
245 new SpanTermQuery(new Term("base", "s:h")));
Nils Diewaldf399a672013-11-18 17:55:22 +0000246
Nils Diewald83c9b162015-02-03 21:05:07 +0000247 kr = ki.search(sq, (short) 15);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000248
Nils Diewald83c9b162015-02-03 21:05:07 +0000249 assertEquals("totalResults", kr.getTotalResults(), 6);
Nils Diewaldf399a672013-11-18 17:55:22 +0000250
Nils Diewald83c9b162015-02-03 21:05:07 +0000251 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
252 assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos);
253 assertEquals("Doc (0)", 0, kr.getMatch(0).internalDocID);
254 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
255 assertEquals("EndPos (1)", 12, kr.getMatch(1).endPos);
256 assertEquals("Doc (1)", 0, kr.getMatch(1).internalDocID);
257 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
258 assertEquals("EndPos (2)", 12, kr.getMatch(2).endPos);
259 assertEquals("Doc (2)", 0, kr.getMatch(2).internalDocID);
260 assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos);
261 assertEquals("EndPos (3)", 9, kr.getMatch(3).endPos);
262 assertEquals("Doc (3)", 0, kr.getMatch(3).internalDocID);
263 assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos);
264 assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos);
265 assertEquals("Doc (4)", 0, kr.getMatch(4).internalDocID);
266 assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos);
267 assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
268 assertEquals("Doc (5)", 0, kr.getMatch(5).internalDocID);
Nils Diewaldf399a672013-11-18 17:55:22 +0000269
Nils Diewald83c9b162015-02-03 21:05:07 +0000270 assertEquals(2, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000271 };
272
273
274 @Test
Nils Diewald20607ab2014-03-20 23:28:36 +0000275 public void indexExample1d () throws IOException {
Nils Diewald83c9b162015-02-03 21:05:07 +0000276 // Cases 9, 12, 13
Nils Diewalda14ecd62015-02-26 21:00:20 +0000277 KrillIndex ki = new KrillIndex();
Nils Diewald20607ab2014-03-20 23:28:36 +0000278
Nils Diewald83c9b162015-02-03 21:05:07 +0000279 // <a>x<a>y<a>zhij</a>hij</a>hij</a>
280 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000281 fd.addTV("base", "x y z h i j h i j h i j ",
282 "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
283 "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
284 "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
285 "[(9-12)s:h]" + // 4
286 "[(12-15)s:i]" + // 5
287 "[(15-18)s:j]" + // 6
288 "[(18-21)s:h]" + // 7
289 "[(21-24)s:i]" + // 8
290 "[(24-27)s:j]" + // 9
291 "[(27-30)s:h]" + // 10
292 "[(30-33)s:i]" + // 11
293 "[(33-36)s:j]"); // 12
Nils Diewald83c9b162015-02-03 21:05:07 +0000294 ki.addDoc(fd);
Nils Diewald20607ab2014-03-20 23:28:36 +0000295
Nils Diewald83c9b162015-02-03 21:05:07 +0000296 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000297 fd.addTV("base", "x y z h ", "[(0-3)s:x]" + // 1
298 "[(3-6)s:y]" + // 2
299 "[(6-9)s:z]" + // 3
300 "[(9-12)s:h]"); // 4
Nils Diewald83c9b162015-02-03 21:05:07 +0000301 ki.addDoc(fd);
Nils Diewald20607ab2014-03-20 23:28:36 +0000302
Nils Diewald83c9b162015-02-03 21:05:07 +0000303 // <a>x<a>y<a>zabc</a>abc</a>abc</a>
304 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000305 fd.addTV("base", "x y z a b c a b c a b c ",
306 "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
307 "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
308 "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
309 "[(9-12)s:a]" + // 4
310 "[(12-15)s:b]" + // 5
311 "[(15-18)s:c]" + // 6
312 "[(18-21)s:a]" + // 7
313 "[(21-24)s:b]" + // 8
314 "[(24-27)s:c]" + // 9
315 "[(27-30)s:a]" + // 10
316 "[(30-33)s:b]" + // 11
317 "[(33-36)s:c]"); // 12
Nils Diewald83c9b162015-02-03 21:05:07 +0000318 ki.addDoc(fd);
Nils Diewald20607ab2014-03-20 23:28:36 +0000319
Nils Diewald83c9b162015-02-03 21:05:07 +0000320 // Save documents
321 ki.commit();
Nils Diewald20607ab2014-03-20 23:28:36 +0000322
Nils Diewald83c9b162015-02-03 21:05:07 +0000323 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000324 Result kr;
Nils Diewald20607ab2014-03-20 23:28:36 +0000325
Nils Diewald83c9b162015-02-03 21:05:07 +0000326 sq = new SpanElementQuery("base", "a");
327 kr = ki.search(sq, (short) 15);
Nils Diewald20607ab2014-03-20 23:28:36 +0000328
Nils Diewaldbb33da22015-03-04 16:24:25 +0000329 sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
330 new SpanTermQuery(new Term("base", "s:h")));
Nils Diewald20607ab2014-03-20 23:28:36 +0000331
Nils Diewald83c9b162015-02-03 21:05:07 +0000332 kr = ki.search(sq, (short) 15);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000333
Nils Diewald83c9b162015-02-03 21:05:07 +0000334 assertEquals("totalResults", kr.getTotalResults(), 6);
Nils Diewald20607ab2014-03-20 23:28:36 +0000335
Nils Diewald83c9b162015-02-03 21:05:07 +0000336 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
337 assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos);
338 assertEquals("Doc (0)", 0, kr.getMatch(0).internalDocID);
339 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
340 assertEquals("EndPos (1)", 12, kr.getMatch(1).endPos);
341 assertEquals("Doc (1)", 0, kr.getMatch(1).internalDocID);
342 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
343 assertEquals("EndPos (2)", 12, kr.getMatch(2).endPos);
344 assertEquals("Doc (2)", 0, kr.getMatch(2).internalDocID);
345 assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos);
346 assertEquals("EndPos (3)", 9, kr.getMatch(3).endPos);
347 assertEquals("Doc (3)", 0, kr.getMatch(3).internalDocID);
348 assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos);
349 assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos);
350 assertEquals("Doc (4)", 0, kr.getMatch(4).internalDocID);
351 assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos);
352 assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
353 assertEquals("Doc (5)", 0, kr.getMatch(5).internalDocID);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000354
Nils Diewald83c9b162015-02-03 21:05:07 +0000355 assertEquals(3, ki.numberOf("documents"));
Nils Diewald20607ab2014-03-20 23:28:36 +0000356 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000357
358
Nils Diewald20607ab2014-03-20 23:28:36 +0000359 @Test
Nils Diewaldf399a672013-11-18 17:55:22 +0000360 public void indexExample2a () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000361 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000362
Nils Diewald83c9b162015-02-03 21:05:07 +0000363 // <a><a><a>h</a>hij</a>hij</a>
364 FieldDocument fd = new FieldDocument();
365 fd.addTV("base",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000366 // <a><a>hhij</a>hijh</a>ij</a>
367 "h h i j h i j h i j ",
368 "[s:h|_0#0-3|<>:a#0-24$<i>7|<>:a#0-12$<i>3|<>:a#0-30$<i>9]" + // 1
369 "[s:h|_1#3-6]" + // 2
370 "[s:i|_2#6-9]" + // 3
371 "[s:j|_3#9-12]" + // 4
372 "[s:h|_4#12-15]" + // 5
373 "[s:i|_5#15-18]" + // 6
374 "[s:j|_6#18-21]" + // 7
375 "[s:h|_7#21-24]" + // 8
376 "[s:i|_8#24-27]" + // 9
377 "[s:j|_9#27-30]"); // 10
Nils Diewald83c9b162015-02-03 21:05:07 +0000378 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000379
Nils Diewald83c9b162015-02-03 21:05:07 +0000380 // Save documents
381 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000382
Nils Diewald83c9b162015-02-03 21:05:07 +0000383 assertEquals(1, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000384
Nils Diewald83c9b162015-02-03 21:05:07 +0000385 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000386 Result kr;
Nils Diewaldf399a672013-11-18 17:55:22 +0000387
Nils Diewald83c9b162015-02-03 21:05:07 +0000388 sq = new SpanElementQuery("base", "a");
389 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000390
Nils Diewald83c9b162015-02-03 21:05:07 +0000391 assertEquals("totalResults", kr.getTotalResults(), 3);
392 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000393 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000394 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
395 assertEquals("EndPos (1)", 7, kr.getMatch(1).endPos);
396 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000397 assertEquals("EndPos (2)", 9, kr.getMatch(2).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000398
399 sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
400 new SpanTermQuery(new Term("base", "s:h")));
Nils Diewaldf399a672013-11-18 17:55:22 +0000401
Nils Diewald83c9b162015-02-03 21:05:07 +0000402 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000403
Nils Diewaldcd226862015-02-11 22:27:45 +0000404 assertEquals("totalResults", kr.getTotalResults(), 10);
405
Nils Diewald83c9b162015-02-03 21:05:07 +0000406 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000407 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000408 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000409 assertEquals("EndPos (1)", 3, kr.getMatch(1).endPos);
410
Nils Diewald83c9b162015-02-03 21:05:07 +0000411 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
412 assertEquals("EndPos (2)", 7, kr.getMatch(2).endPos);
413 assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
414 assertEquals("EndPos (3)", 7, kr.getMatch(3).endPos);
415 assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos);
416 assertEquals("EndPos (4)", 7, kr.getMatch(4).endPos);
417 assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000418 assertEquals("EndPos (5)", 7, kr.getMatch(5).endPos);
419
Nils Diewald83c9b162015-02-03 21:05:07 +0000420 assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000421 assertEquals("EndPos (6)", 9, kr.getMatch(6).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000422 assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000423 assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000424 assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000425 assertEquals("EndPos (8)", 9, kr.getMatch(8).endPos);
426 assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos);
427 assertEquals("EndPos (9)", 9, kr.getMatch(9).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000428 };
429
Nils Diewaldbb33da22015-03-04 16:24:25 +0000430
Nils Diewaldf399a672013-11-18 17:55:22 +0000431 @Test
432 public void indexExample2b () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000433 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000434
Nils Diewald83c9b162015-02-03 21:05:07 +0000435 // 6,9,12
436 // <a><a><a>h</a>hij</a>hij</a>h
437 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000438 fd.addTV("base", "h h i j h i j h i j h ",
439 "[(0-3)s:h|<>:a#0-21$<i>6|<>:a#0-12$<i>3|<>:a#0-30$<i>9]" + // 1
440 "[(3-6)s:h]" + // 2
441 "[(6-9)s:i]" + // 3
442 "[(9-12)s:j]" + // 4
443 "[(12-15)s:h]" + // 5
444 "[(15-18)s:i]" + // 6
445 "[(18-21)s:j]" + // 7
446 "[(21-24)s:h]" + // 8
447 "[(24-27)s:i]" + // 9
448 "[(27-30)s:j]" + // 10
449 "[(30-33)s:h]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000450 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000451
Nils Diewald83c9b162015-02-03 21:05:07 +0000452 // Save documents
453 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000454
Nils Diewald83c9b162015-02-03 21:05:07 +0000455 assertEquals(1, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000456
Nils Diewald83c9b162015-02-03 21:05:07 +0000457 SpanQuery sq = new SpanElementQuery("base", "a");
Nils Diewald884dbcf2015-02-27 17:02:28 +0000458 Result kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000459
Nils Diewald83c9b162015-02-03 21:05:07 +0000460 assertEquals("totalResults", kr.getTotalResults(), 3);
461 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000462 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000463 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000464 assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000465 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000466 assertEquals("EndPos (2)", 9, kr.getMatch(2).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000467
468 sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
469 new SpanTermQuery(new Term("base", "s:h")));
Nils Diewaldf399a672013-11-18 17:55:22 +0000470
Nils Diewald83c9b162015-02-03 21:05:07 +0000471 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000472
Nils Diewald83c9b162015-02-03 21:05:07 +0000473 assertEquals("totalResults", kr.getTotalResults(), 9);
474 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000475 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000476 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000477 assertEquals("EndPos (1)", 3, kr.getMatch(1).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000478 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000479 assertEquals("EndPos (2)", 6, kr.getMatch(2).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000480 assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000481 assertEquals("EndPos (3)", 6, kr.getMatch(3).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000482 assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000483 assertEquals("EndPos (4)", 6, kr.getMatch(4).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000484 assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000485 assertEquals("EndPos (5)", 9, kr.getMatch(5).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000486 assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000487 assertEquals("EndPos (6)", 9, kr.getMatch(6).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000488 assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000489 assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000490 assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000491 assertEquals("EndPos (8)", 9, kr.getMatch(8).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000492 };
493
494
495 @Test
496 public void indexExample2c () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000497 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000498
Nils Diewald83c9b162015-02-03 21:05:07 +0000499 // 2, 6, 9, 12
500 // <a><a><a>h</a>hij</a>hij</a>h<a>i</i>
501 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000502 fd.addTV("base", "h h i j h i j h i j h i ",
503 "[(0-3)s:h|<>:a#0-21$<i>7|<>:a#0-15$<i>4|<>:a#0-30$<i>10]" + // 1
504 "[(3-6)s:h]" + // 2
505 "[(6-9)s:i]" + // 3
506 "[(9-12)s:j]" + // 4
507 "[(12-15)s:h]" + // 5
508 "[(15-18)s:i]" + // 6
509 "[(18-21)s:j]" + // 7
510 "[(21-24)s:h]" + // 8
511 "[(24-27)s:i]" + // 9
512 "[(27-30)s:j]" + // 10
513 "[(30-33)s:h]" + // 11
514 "[(33-36)s:i|<>:a#33-36$<i>12]"); // 12
Nils Diewald83c9b162015-02-03 21:05:07 +0000515 ki.addDoc(fd);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000516
Nils Diewald83c9b162015-02-03 21:05:07 +0000517 // Save documents
518 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000519
Nils Diewald83c9b162015-02-03 21:05:07 +0000520 assertEquals(1, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000521
Nils Diewald83c9b162015-02-03 21:05:07 +0000522 SpanQuery sq = new SpanElementQuery("base", "a");
Nils Diewaldf399a672013-11-18 17:55:22 +0000523
Nils Diewald884dbcf2015-02-27 17:02:28 +0000524 Result kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000525
Nils Diewald83c9b162015-02-03 21:05:07 +0000526 assertEquals("totalResults", kr.getTotalResults(), 4);
527 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
528 assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
529 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
530 assertEquals("EndPos (1)", 7, kr.getMatch(1).endPos);
531 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
532 assertEquals("EndPos (2)", 10, kr.getMatch(2).endPos);
533 assertEquals("StartPos (3)", 11, kr.getMatch(3).startPos);
534 assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000535
Nils Diewaldbb33da22015-03-04 16:24:25 +0000536 sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
537 new SpanTermQuery(new Term("base", "s:h")));
Nils Diewaldf399a672013-11-18 17:55:22 +0000538
Nils Diewald83c9b162015-02-03 21:05:07 +0000539 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000540
Nils Diewaldcd226862015-02-11 22:27:45 +0000541 assertEquals("totalResults", kr.getTotalResults(), 11);
Nils Diewald83c9b162015-02-03 21:05:07 +0000542 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
543 assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
544 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
545 assertEquals("EndPos (1)", 4, kr.getMatch(1).endPos);
546 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000547 assertEquals("EndPos (2)", 4, kr.getMatch(2).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000548
Nils Diewald83c9b162015-02-03 21:05:07 +0000549 assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
550 assertEquals("EndPos (3)", 7, kr.getMatch(3).endPos);
551 assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos);
552 assertEquals("EndPos (4)", 7, kr.getMatch(4).endPos);
553 assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000554 assertEquals("EndPos (5)", 7, kr.getMatch(5).endPos);
555
Nils Diewald83c9b162015-02-03 21:05:07 +0000556 assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
557 assertEquals("EndPos (6)", 10, kr.getMatch(6).endPos);
558 assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos);
559 assertEquals("EndPos (7)", 10, kr.getMatch(7).endPos);
560 assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos);
561 assertEquals("EndPos (8)", 10, kr.getMatch(8).endPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000562 assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos);
563 assertEquals("EndPos (9)", 10, kr.getMatch(9).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000564 };
565
566
567 @Test
568 public void indexExample2d () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000569 KrillIndex ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000570
Nils Diewald83c9b162015-02-03 21:05:07 +0000571 // 2, 6, 9, 12, 7
572 // <a><a><a>h</a>hij</a>hij</a>h<a>h</h>
573 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000574 fd.addTV("base", "h h i j h i j h i j h h ",
575 "[(0-3)s:h|_0#0-3|<>:a#0-18$<i>6|<>:a#0-15$<i>4|<>:a#0-27$<i>8]"
576 + // 1
577 "[(3-6)s:h|_1#3-6]" + // 2
578 "[(6-9)s:i|_2#6-9]" + // 3
579 "[(9-12)s:j|_3#9-12]" + // 4
580 "[(12-15)s:h|_4#12-15]" + // 5
581 "[(15-18)s:i|_5#15-18]" + // 6
582 "[(18-21)s:j|_6#18-21]" + // 7
583 "[(21-24)s:h|_7#21-24]" + // 8
584 "[(24-27)s:i|_8#24-27]" + // 9
585 "[(27-30)s:j|_9#27-30]" + // 10
586 "[(30-33)s:h|_10#30-33|<>:a#30-36$<i>12]" + // 11
587 "[(33-36)s:h|_11#33-36|<>:a#33-36$<i>12]"); // 12
Nils Diewald83c9b162015-02-03 21:05:07 +0000588 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000589
Nils Diewald83c9b162015-02-03 21:05:07 +0000590 // Save documents
591 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000592
Nils Diewald83c9b162015-02-03 21:05:07 +0000593 assertEquals(1, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000594
Nils Diewald83c9b162015-02-03 21:05:07 +0000595 SpanQuery sq = new SpanElementQuery("base", "a");
Nils Diewaldf399a672013-11-18 17:55:22 +0000596
Nils Diewald884dbcf2015-02-27 17:02:28 +0000597 Result kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000598
Nils Diewaldcd226862015-02-11 22:27:45 +0000599 assertEquals("totalResults", kr.getTotalResults(), 5);
600
Nils Diewald83c9b162015-02-03 21:05:07 +0000601 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
602 assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
603 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000604 assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000605 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000606 assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos);
607 assertEquals("StartPos (3)", 10, kr.getMatch(3).startPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000608 assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000609 assertEquals("StartPos (4)", 11, kr.getMatch(4).startPos);
610 assertEquals("EndPos (4)", 12, kr.getMatch(4).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000611
Nils Diewaldbb33da22015-03-04 16:24:25 +0000612 sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
613 new SpanTermQuery(new Term("base", "s:h")));
Nils Diewaldf399a672013-11-18 17:55:22 +0000614
Nils Diewald83c9b162015-02-03 21:05:07 +0000615 kr = ki.search(sq, (short) 15);
Nils Diewaldf399a672013-11-18 17:55:22 +0000616
Nils Diewaldcd226862015-02-11 22:27:45 +0000617 assertEquals("totalResults", kr.getTotalResults(), 13);
Nils Diewald83c9b162015-02-03 21:05:07 +0000618 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
619 assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
620 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
621 assertEquals("EndPos (1)", 4, kr.getMatch(1).endPos);
622 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000623 assertEquals("EndPos (2)", 4, kr.getMatch(2).endPos);
624
Nils Diewald83c9b162015-02-03 21:05:07 +0000625 assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000626 assertEquals("EndPos (3)", 6, kr.getMatch(3).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000627 assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000628 assertEquals("EndPos (4)", 6, kr.getMatch(4).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000629 assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000630 assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
631
Nils Diewald83c9b162015-02-03 21:05:07 +0000632 assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000633 assertEquals("EndPos (6)", 8, kr.getMatch(6).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000634 assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000635 assertEquals("EndPos (7)", 8, kr.getMatch(7).endPos);
Nils Diewald83c9b162015-02-03 21:05:07 +0000636 assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos);
Nils Diewaldcd226862015-02-11 22:27:45 +0000637 assertEquals("EndPos (8)", 8, kr.getMatch(8).endPos);
638 assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos);
639 assertEquals("EndPos (9)", 8, kr.getMatch(9).endPos);
640
641 assertEquals("StartPos (10)", 10, kr.getMatch(10).startPos);
642 assertEquals("EndPos (10)", 12, kr.getMatch(10).endPos);
643 assertEquals("StartPos (11)", 10, kr.getMatch(11).startPos);
644 assertEquals("EndPos (11)", 12, kr.getMatch(11).endPos);
645
646 assertEquals("StartPos (12)", 11, kr.getMatch(12).startPos);
647 assertEquals("EndPos (12)", 12, kr.getMatch(12).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000648 };
649
650
651 @Test
652 public void indexExample3 () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000653 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000654
Nils Diewald83c9b162015-02-03 21:05:07 +0000655 // <a><a><a>u</a></a></a>
656 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000657 fd.addTV("base", "xyz",
658 "[(0-3)s:xyz|<>:a#0-3$<i>0|<>:a#0-3$<i>0|<>:a#0-3$<i>0|<>:b#0-3$<i>0]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000659 ki.addDoc(fd);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000660
Nils Diewald83c9b162015-02-03 21:05:07 +0000661 // <a><b>x<a>y<a>zcde</a>cde</a>cde</b></a>
662 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000663 fd.addTV("base", "x y z c d e c d e c d e ",
664 "[(0-3)s:x|<>:a#0-36$<i>12|<>:b#0-36$<i>12]"
665 + "[(3-6)s:y|<>:a#3-27$<i>9]"
666 + "[(6-9)s:z|<>:a#6-18$<i>6]" + "[(9-12)s:c]"
667 + "[(12-15)s:d]" + "[(15-18)s:e]" + "[(18-21)s:c]"
668 + "[(21-24)s:d]" + "[(24-27)s:e]" + "[(27-30)s:c]"
669 + "[(30-33)s:d]" + "[(33-36)s:e]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000670 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000671
Nils Diewald83c9b162015-02-03 21:05:07 +0000672 // xyz
673 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000674 fd.addTV("base", "x y z ", "[(0-3)s:x]" + "[(3-6)s:y]"
675 + "[(6-9)s:z]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000676 ki.addDoc(fd);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000677
Nils Diewald83c9b162015-02-03 21:05:07 +0000678 // <a>x<a><b>y<a>zcde</a>cde</b></a>cde</a>
679 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000680 fd.addTV("base", "x y z k l m k l m k l m ",
681 "[(0-3)s:x|<>:a#0-3$<i>12]"
682 + "[(3-6)s:y|<>:a#3-6$<i>9|<>:b#3-6$<i>9]"
683 + "[(6-9)s:z|<>:a#6-9$<i>6]" + "[(9-12)s:k]"
684 + "[(12-15)s:l]" + "[(15-18)s:m]" + "[(18-21)s:k]"
685 + "[(21-24)s:l]" + "[(24-27)s:m]" + "[(27-30)s:k]"
686 + "[(30-33)s:l]" + "[(33-36)s:m]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000687 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000688
Nils Diewald83c9b162015-02-03 21:05:07 +0000689 // <a><a><a>h</a>hhij</a>hij</a>hij</a>
690 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000691 fd.addTV("base", "h i j h i j h i j ",
692 "[(0-3)s:h|<>:a#0-27$<i>6|<>:a#0-18$<i>3|<>:a#0-36$<i>9]"
693 + "[(3-6)s:h]" + "[(12-15)s:i]" + "[(15-18)s:j]"
694 + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]"
695 + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000696 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000697
Nils Diewald83c9b162015-02-03 21:05:07 +0000698 // xyz
699 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000700 fd.addTV("base", "a b c ", "[(0-3)s:a]" + "[(3-6)s:b]"
701 + "[(6-9)s:c]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000702 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000703
Nils Diewald83c9b162015-02-03 21:05:07 +0000704 // Save documents
705 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000706
Nils Diewald83c9b162015-02-03 21:05:07 +0000707 assertEquals(6, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000708
Nils Diewald83c9b162015-02-03 21:05:07 +0000709 SpanQuery sq = new SpanElementQuery("base", "a");
Nils Diewaldf399a672013-11-18 17:55:22 +0000710
Nils Diewald884dbcf2015-02-27 17:02:28 +0000711 Result kr = ki.search(sq, (short) 15);
Nils Diewaldf399a672013-11-18 17:55:22 +0000712
Nils Diewald83c9b162015-02-03 21:05:07 +0000713 assertEquals("totalResults", kr.getTotalResults(), 12);
714 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
715 assertEquals("EndPos (0)", 0, kr.getMatch(0).endPos);
716 assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
717 assertEquals("EndPos (1)", 0, kr.getMatch(1).endPos);
718 assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
719 assertEquals("EndPos (2)", 0, kr.getMatch(2).endPos);
720 assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
721 assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos);
722 assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos);
723 assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos);
724 assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos);
725 assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000726
Nils Diewald83c9b162015-02-03 21:05:07 +0000727 assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
728 assertEquals("EndPos (6)", 12, kr.getMatch(6).endPos);
729 assertEquals("StartPos (7)", 1, kr.getMatch(7).startPos);
730 assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos);
731 assertEquals("StartPos (8)", 2, kr.getMatch(8).startPos);
732 assertEquals("EndPos (8)", 6, kr.getMatch(8).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000733
Nils Diewald83c9b162015-02-03 21:05:07 +0000734 assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos);
735 assertEquals("EndPos (9)", 3, kr.getMatch(9).endPos);
736 assertEquals("StartPos (10)", 0, kr.getMatch(10).startPos);
737 assertEquals("EndPos (10)", 6, kr.getMatch(10).endPos);
738 assertEquals("StartPos (11)", 0, kr.getMatch(11).startPos);
739 assertEquals("EndPos (11)", 9, kr.getMatch(11).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000740 };
741
Nils Diewaldbb33da22015-03-04 16:24:25 +0000742
Nils Diewaldf399a672013-11-18 17:55:22 +0000743 @Test
744 public void indexExample3Offsets () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000745 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000746
Nils Diewald83c9b162015-02-03 21:05:07 +0000747 // Er schrie: <s>"Das war ich!"</s>
748 FieldDocument fd = new FieldDocument();
749 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000750 fd.addTV("base", "Er schrie: \"Das war ich!\" und ging.",
751 "[(0-2)s:Er|_0#0-3]" + "[(3-9)s:schrie|_1#3-9]"
752 + "[(12-15)s:Das|_2#12-15|<>:sentence#11-25$<i>5]"
753 + "[(16-19)s:war|_3#16-19]" + "[(20-23)s:ich|_4#20-23]"
754 + "[(26-29)s:und|_5#26-29]"
755 + "[(30-34)s:ging|_6#30-34]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000756 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000757
Nils Diewald83c9b162015-02-03 21:05:07 +0000758 // Save documents
759 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000760
761 SpanQuery sq = new SpanClassQuery(new SpanElementQuery("base",
762 "sentence"), (byte) 3);
Nils Diewald884dbcf2015-02-27 17:02:28 +0000763 Result kr;
Nils Diewald83c9b162015-02-03 21:05:07 +0000764 kr = ki.search(sq, 0, (short) 15, true, (short) 1, true, (short) 1);
765 assertEquals("totalResults", kr.getTotalResults(), 1);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000766
767 assertEquals("... schrie: [\"{3:Das war ich}!\"] und ...",
768 kr.getMatch(0).getSnippetBrackets());
769 assertEquals(
770 "<span class=\"context-left\"><span class=\"more\"></span>schrie: </span><mark>&quot;<mark class=\"class-3 level-0\">Das war ich</mark>!&quot;</mark><span class=\"context-right\"> und<span class=\"more\"></span></span>",
771 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +0000772
Nils Diewald83c9b162015-02-03 21:05:07 +0000773 kr = ki.search(sq, 0, (short) 15, true, (short) 0, true, (short) 0);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000774 assertEquals("... [\"{3:Das war ich}!\"] ...", kr.getMatch(0)
775 .getSnippetBrackets());
Nils Diewald83c9b162015-02-03 21:05:07 +0000776 assertEquals("totalResults", kr.getTotalResults(), 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000777
Nils Diewald83c9b162015-02-03 21:05:07 +0000778 kr = ki.search(sq, 0, (short) 15, true, (short) 6, true, (short) 6);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000779 assertEquals("Er schrie: [\"{3:Das war ich}!\"] und ging.", kr
780 .getMatch(0).getSnippetBrackets());
Nils Diewald83c9b162015-02-03 21:05:07 +0000781 assertEquals("totalResults", kr.getTotalResults(), 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000782
Nils Diewald83c9b162015-02-03 21:05:07 +0000783 kr = ki.search(sq, 0, (short) 15, true, (short) 2, true, (short) 2);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000784 assertEquals("Er schrie: [\"{3:Das war ich}!\"] und ging ...", kr
785 .getMatch(0).getSnippetBrackets());
Nils Diewald83c9b162015-02-03 21:05:07 +0000786 assertEquals("totalResults", kr.getTotalResults(), 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000787
Nils Diewaldbb33da22015-03-04 16:24:25 +0000788 sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery(
789 "base", "sentence"), new SpanClassQuery(new SpanTermQuery(
790 new Term("base", "s:Das")), (byte) 2)), (byte) 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000791
Nils Diewald83c9b162015-02-03 21:05:07 +0000792 kr = ki.search(sq, (short) 15);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000793 assertEquals("Er schrie: [\"{1:{2:Das} war ich}!\"] und ging.", kr
794 .getMatch(0).getSnippetBrackets());
Nils Diewald83c9b162015-02-03 21:05:07 +0000795 assertEquals("totalResults", kr.getTotalResults(), 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000796
Nils Diewaldbb33da22015-03-04 16:24:25 +0000797 sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery(
798 "base", "sentence"), new SpanClassQuery(new SpanTermQuery(
799 new Term("base", "s:war")), (byte) 2)), (byte) 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000800
Nils Diewald83c9b162015-02-03 21:05:07 +0000801 kr = ki.search(sq, (short) 15);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000802 assertEquals("Er schrie: [\"{1:Das {2:war} ich}!\"] und ging.", kr
803 .getMatch(0).getSnippetBrackets());
Nils Diewald83c9b162015-02-03 21:05:07 +0000804 assertEquals("totalResults", kr.getTotalResults(), 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000805
Nils Diewaldbb33da22015-03-04 16:24:25 +0000806 sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery(
807 "base", "sentence"), new SpanClassQuery(new SpanTermQuery(
808 new Term("base", "s:ich")), (byte) 2)), (byte) 1);
809
Nils Diewald83c9b162015-02-03 21:05:07 +0000810 kr = ki.search(sq, (short) 15);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000811 assertEquals("Er schrie: [\"{1:Das war {2:ich}}!\"] und ging.", kr
812 .getMatch(0).getSnippetBrackets());
Nils Diewald83c9b162015-02-03 21:05:07 +0000813 assertEquals("totalResults", kr.getTotalResults(), 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000814
Nils Diewaldbb33da22015-03-04 16:24:25 +0000815 sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery(
816 "base", "sentence"), new SpanClassQuery(new SpanTermQuery(
817 new Term("base", "s:und")), (byte) 2)), (byte) 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000818
Nils Diewald83c9b162015-02-03 21:05:07 +0000819 kr = ki.search(sq, (short) 15);
820 assertEquals("totalResults", kr.getTotalResults(), 0);
Nils Diewaldf399a672013-11-18 17:55:22 +0000821
Nils Diewaldbb33da22015-03-04 16:24:25 +0000822 sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery(
823 "base", "sentence"), new SpanClassQuery(new SpanTermQuery(
824 new Term("base", "s:schrie")), (byte) 2)), (byte) 1);
Nils Diewaldf399a672013-11-18 17:55:22 +0000825
Nils Diewald83c9b162015-02-03 21:05:07 +0000826 kr = ki.search(sq, (short) 15);
827 assertEquals("totalResults", kr.getTotalResults(), 0);
Nils Diewaldf399a672013-11-18 17:55:22 +0000828 };
829
Nils Diewaldbb33da22015-03-04 16:24:25 +0000830
Nils Diewaldf399a672013-11-18 17:55:22 +0000831 @Test
832 public void indexExample4 () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000833 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000834
Nils Diewald83c9b162015-02-03 21:05:07 +0000835 // Case 1, 6, 7, 13
836 // xy<a><a>x</a>b<a>c</a></a>x
837 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000838 fd.addTV("base", "x y x b c x ", "[(0-3)s:x|_0#0-3]"
839 + "[(3-6)s:y|_1#3-6]"
840 + "[(6-9)s:x|_2#6-9|<>:a#6-15$<i>5|<>:a#6-9$<i>3]"
841 + "[(9-12)s:b|_3#9-12]"
842 + "[(12-15)s:c|_4#12-15|<>:a#12-15$<i>5]"
843 + "[(15-18)s:x|_5#15-18]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000844 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000845
Nils Diewald83c9b162015-02-03 21:05:07 +0000846 // Save documents
847 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000848
Nils Diewald83c9b162015-02-03 21:05:07 +0000849 assertEquals(1, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000850
Nils Diewaldbb33da22015-03-04 16:24:25 +0000851 SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
852 new SpanTermQuery(new Term("base", "s:x")));
Nils Diewaldf399a672013-11-18 17:55:22 +0000853
Nils Diewald884dbcf2015-02-27 17:02:28 +0000854 Result kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000855
Nils Diewald83c9b162015-02-03 21:05:07 +0000856 assertEquals("totalResults", kr.getTotalResults(), 2);
857 assertEquals("StartPos (0)", 2, kr.getMatch(0).startPos);
858 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
859 assertEquals("StartPos (1)", 2, kr.getMatch(1).startPos);
860 assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000861 };
862
Nils Diewaldbb33da22015-03-04 16:24:25 +0000863
Nils Diewaldf399a672013-11-18 17:55:22 +0000864 @Test
865 public void indexExample5 () throws IOException {
Nils Diewald83c9b162015-02-03 21:05:07 +0000866 // 1,2,3,6,9,10,12
Nils Diewalda14ecd62015-02-26 21:00:20 +0000867 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000868
Nils Diewald83c9b162015-02-03 21:05:07 +0000869 // hij<a>hi<a>h<a>ij</a></a>hi</a>
870 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000871 fd.addTV("base", "hijhihijhi",
872 "[(0-1)s:h|i:h|_0#0-1|-:a$<i>3|-:t$<i>10]"
873 + "[(1-2)s:i|i:i|_1#1-2]" + "[(2-3)s:j|i:j|_2#2-3]"
874 + "[(3-4)s:h|i:h|_3#3-4|<>:a#3-10$<i>10]"
875 + "[(4-5)s:i|i:i|_4#4-5]"
876 + "[(5-6)s:h|i:h|_5#5-6|<>:a#5-8$<i>8]"
877 + "[(6-7)s:i|i:i|_6#6-7|<>:a#6-8$<i>8]"
878 + "[(7-8)s:j|i:j|_7#7-8]" + "[(8-9)s:h|i:h|_8#8-9]"
879 + "[(9-10)s:i|i:i|_9#9-10]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000880 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000881
Nils Diewald83c9b162015-02-03 21:05:07 +0000882 // Save documents
883 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000884
Nils Diewald83c9b162015-02-03 21:05:07 +0000885 assertEquals(1, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000886
Nils Diewaldbb33da22015-03-04 16:24:25 +0000887 SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
888 new SpanNextQuery(new SpanTermQuery(new Term("base", "s:h")),
889 new SpanTermQuery(new Term("base", "s:i"))));
Nils Diewaldf399a672013-11-18 17:55:22 +0000890
Nils Diewald884dbcf2015-02-27 17:02:28 +0000891 Result kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000892
Nils Diewald83c9b162015-02-03 21:05:07 +0000893 assertEquals("totalResults", kr.getTotalResults(), 4);
Nils Diewaldf399a672013-11-18 17:55:22 +0000894
Nils Diewald83c9b162015-02-03 21:05:07 +0000895 assertEquals("StartPos (0)", 3, kr.getMatch(0).startPos);
896 assertEquals("EndPos (0)", 10, kr.getMatch(0).endPos);
897 assertEquals("StartPos (1)", 3, kr.getMatch(1).startPos);
898 assertEquals("EndPos (1)", 10, kr.getMatch(1).endPos);
899 assertEquals("StartPos (2)", 3, kr.getMatch(2).startPos);
900 assertEquals("EndPos (2)", 10, kr.getMatch(2).endPos);
901 assertEquals("StartPos (3)", 5, kr.getMatch(3).startPos);
902 assertEquals("EndPos (3)", 8, kr.getMatch(3).endPos);
Nils Diewaldf399a672013-11-18 17:55:22 +0000903 };
904
Nils Diewaldbb33da22015-03-04 16:24:25 +0000905
Nils Diewaldf399a672013-11-18 17:55:22 +0000906 @Test
907 public void indexExample6 () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000908 KrillIndex ki = new KrillIndex();
Nils Diewald83c9b162015-02-03 21:05:07 +0000909 // 2,5,8,12,13
910 // h<a><a>i</a>j</a><a>h</a>i j<a>h i</a>j
911 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000912 fd.addTV("base", "hijhi jh ij",
913 "[(0-1)s:h|i:h|_0#0-1|-:a$<i>4|-:t$<i>9]"
914 + "[(1-2)s:i|i:i|_1#1-2|<>:a#1-2$<i>2|<>:a#1-3$<i>3]"
915 + "[(2-3)s:j|i:j|_2#2-3]"
916 + "[(3-4)s:h|i:h|_3#3-4|<>:a#3-4$<i>4]"
917 + "[(4-5)s:i|i:i|_4#4-5]" + "[(6-7)s:j|i:j|_5#6-7]"
918 + "[(7-8)s:h|i:h|_6#7-8|<>:a#7-10$<i>8]"
919 + "[(9-10)s:i|i:i|_7#9-10]"
920 + "[(10-11)s:j|i:j|_8#10-11]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000921 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000922
Nils Diewald83c9b162015-02-03 21:05:07 +0000923 // Save documents
924 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000925
Nils Diewald83c9b162015-02-03 21:05:07 +0000926 assertEquals(1, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000927
Nils Diewaldbb33da22015-03-04 16:24:25 +0000928 SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
929 new SpanNextQuery(new SpanTermQuery(new Term("base", "s:h")),
930 new SpanNextQuery(new SpanTermQuery(new Term("base",
931 "s:i")), new SpanTermQuery(new Term("base",
932 "s:j")))));
Nils Diewaldf399a672013-11-18 17:55:22 +0000933
Nils Diewald884dbcf2015-02-27 17:02:28 +0000934 Result kr = ki.search(sq, (short) 10);
Nils Diewald83c9b162015-02-03 21:05:07 +0000935 assertEquals("totalResults", kr.getTotalResults(), 0);
Nils Diewaldf399a672013-11-18 17:55:22 +0000936 };
937
938
939 @Test
940 public void indexExample7 () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000941 KrillIndex ki = new KrillIndex();
Nils Diewald83c9b162015-02-03 21:05:07 +0000942 // 4,5,11,13
943 // x<a>x h</a>i j h<a>i j</a>
944 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000945 fd.addTV("base", "xx hi j hi j",
946 "[(0-1)s:x|i:x|_0#0-1|-:a$<i>2|-:t$<i>8]"
947 + "[(1-2)s:x|i:x|_1#1-2|<>:a#1-4$<i>3]"
948 + "[(3-4)s:h|i:h|_2#3-4]" + "[(4-5)s:i|i:i|_3#4-5]"
949 + "[(6-7)s:j|i:j|_4#6-7]" + "[(8-9)s:h|i:h|_5#8-9]"
950 + "[(9-10)s:i|i:i|_6#9-10|<>:a#9-12$<i>8]"
951 + "[(11-12)s:j|i:j|_7#11-12]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000952 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000953
Nils Diewald83c9b162015-02-03 21:05:07 +0000954 // Save documents
955 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000956
Nils Diewald83c9b162015-02-03 21:05:07 +0000957 assertEquals(1, ki.numberOf("documents"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000958
Nils Diewaldbb33da22015-03-04 16:24:25 +0000959 SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"),
960 new SpanNextQuery(new SpanTermQuery(new Term("base", "s:h")),
961 new SpanNextQuery(new SpanTermQuery(new Term("base",
962 "s:i")), new SpanTermQuery(new Term("base",
963 "s:j")))));
Nils Diewaldf399a672013-11-18 17:55:22 +0000964
Nils Diewald884dbcf2015-02-27 17:02:28 +0000965 Result kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000966
Nils Diewald83c9b162015-02-03 21:05:07 +0000967 assertEquals("totalResults", kr.getTotalResults(), 0);
Nils Diewaldf399a672013-11-18 17:55:22 +0000968 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000969
Nils Diewald83c9b162015-02-03 21:05:07 +0000970
971 /** SpanElementQueries */
Nils Diewald20607ab2014-03-20 23:28:36 +0000972 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000973 public void indexExample8 () throws IOException {
974 KrillIndex ki = new KrillIndex();
Nils Diewald83c9b162015-02-03 21:05:07 +0000975 FieldDocument fd = new FieldDocument();
976 // <a>xx <e>hi j <e>hi j</e></e></a>
Nils Diewaldbb33da22015-03-04 16:24:25 +0000977 fd.addTV("base", "xx hi j hi j", "[(0-1)s:x|i:x|_0#0-1|<>:a#1-12$<i>8]"
978 + "[(1-2)s:x|i:x|_1#1-2]"
979 + "[(3-4)s:h|i:h|_2#3-4|<>:e#3-12$<i>8]"
980 + "[(4-5)s:i|i:i|_3#4-5]" + "[(6-7)s:j|i:j|_4#6-7]"
981 + "[(8-9)s:h|i:h|_5#8-9|<>:e#8-9$<i>8]"
982 + "[(9-10)s:i|i:i|_6#9-10]" + "[(11-12)s:j|i:j|_7#11-12]");
Nils Diewald83c9b162015-02-03 21:05:07 +0000983 ki.addDoc(fd);
Nils Diewald20607ab2014-03-20 23:28:36 +0000984 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000985
Nils Diewald83c9b162015-02-03 21:05:07 +0000986
987 // contains(<s>, (es wird | wird es))
Nils Diewald7d320642014-11-12 17:39:42 +0000988 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000989 public void queryJSONpoly2 () throws QueryException, IOException {
990 String jsonPath = getClass().getResource("/queries/poly2.json")
991 .getFile();
992 String jsonPQuery = readFile(jsonPath);
Nils Diewald0339d462015-02-26 14:53:56 +0000993 SpanQueryWrapper sqwi = new KrillQuery("tokens").fromJson(jsonPQuery);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000994
Nils Diewald83c9b162015-02-03 21:05:07 +0000995 SpanWithinQuery sq = (SpanWithinQuery) sqwi.toQuery();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000996
Nils Diewalda14ecd62015-02-26 21:00:20 +0000997 KrillIndex ki = new KrillIndex();
Nils Diewald886d3212014-11-14 01:27:23 +0000998
Nils Diewaldbb33da22015-03-04 16:24:25 +0000999 ki.addDoc(getClass().getResourceAsStream("/wiki/DDD-08370.json.gz"),
1000 true);
1001 ki.addDoc(getClass().getResourceAsStream("/wiki/PPP-02924.json.gz"),
1002 true);
Nils Diewald83c9b162015-02-03 21:05:07 +00001003
1004 ki.commit();
Nils Diewald884dbcf2015-02-27 17:02:28 +00001005 Result kr = ki.search(sq, (short) 10);
Nils Diewald83c9b162015-02-03 21:05:07 +00001006 assertEquals(2, kr.getTotalResults());
1007 assertEquals(0, kr.getMatch(0).getLocalDocID());
1008 assertEquals(76, kr.getMatch(0).getStartPos());
1009 assertEquals(93, kr.getMatch(0).getEndPos());
1010 assertEquals(1, kr.getMatch(1).getLocalDocID());
1011 assertEquals(237, kr.getMatch(1).getStartPos());
1012 assertEquals(252, kr.getMatch(1).getEndPos());
1013
1014 /*
Nils Diewaldbb33da22015-03-04 16:24:25 +00001015 for (Match km : kr.getMatches()){
1016 System.out.println(km.getStartPos() +","+km.getEndPos()+" "
Nils Diewald83c9b162015-02-03 21:05:07 +00001017 +km.getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +00001018 };
Nils Diewalde7a820b2015-02-12 21:34:50 +00001019 */
Nils Diewald83c9b162015-02-03 21:05:07 +00001020 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001021
1022
1023 private String readFile (String path) {
Nils Diewald83c9b162015-02-03 21:05:07 +00001024 StringBuilder sb = new StringBuilder();
1025 try {
1026 BufferedReader in = new BufferedReader(new FileReader(path));
1027 String str;
1028 while ((str = in.readLine()) != null) {
1029 sb.append(str);
1030 };
1031 in.close();
Nils Diewaldbb33da22015-03-04 16:24:25 +00001032 }
1033 catch (IOException e) {
Nils Diewald83c9b162015-02-03 21:05:07 +00001034 fail(e.getMessage());
1035 }
1036 return sb.toString();
Nils Diewald11e91862014-11-12 16:29:18 +00001037 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001038};