blob: 82724b014f6325e2ff5168371b2fc30d5ead6816 [file] [log] [blame]
Eliza Margaretha01929182014-02-19 11:48:59 +00001package de.ids_mannheim.korap.index;
2
margaretha50c76332015-03-19 10:10:39 +01003import static org.junit.Assert.assertEquals;
4import static org.junit.Assert.fail;
Nils Diewaldf399a672013-11-18 17:55:22 +00005
margaretha50c76332015-03-19 10:10:39 +01006import java.io.IOException;
Nils Diewaldf399a672013-11-18 17:55:22 +00007
margaretha50c76332015-03-19 10:10:39 +01008import org.apache.lucene.index.Term;
9import org.apache.lucene.search.spans.SpanOrQuery;
10import org.apache.lucene.search.spans.SpanQuery;
11import org.apache.lucene.search.spans.SpanTermQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.junit.Ignore;
margaretha50c76332015-03-19 10:10:39 +010013import org.junit.Test;
Nils Diewaldf399a672013-11-18 17:55:22 +000014import org.junit.runner.RunWith;
15import org.junit.runners.JUnit4;
16
Nils Diewald2d5f8102015-02-26 21:07:54 +000017import de.ids_mannheim.korap.KrillCollection;
margaretha50c76332015-03-19 10:10:39 +010018import de.ids_mannheim.korap.KrillIndex;
19import de.ids_mannheim.korap.query.QueryBuilder;
20import de.ids_mannheim.korap.query.SpanClassQuery;
21import de.ids_mannheim.korap.query.SpanElementQuery;
22import de.ids_mannheim.korap.query.SpanFocusQuery;
23import de.ids_mannheim.korap.query.SpanNextQuery;
24import de.ids_mannheim.korap.query.SpanWithinQuery;
Nils Diewald392bcf32015-02-26 20:01:17 +000025import de.ids_mannheim.korap.response.Match;
margaretha50c76332015-03-19 10:10:39 +010026import de.ids_mannheim.korap.response.Result;
Nils Diewaldf399a672013-11-18 17:55:22 +000027
28// mvn -Dtest=TestWithinIndex#indexExample1 test
29
Nils Diewald85f9c422015-02-06 21:09:16 +000030// match is focus and split
Nils Diewaldf399a672013-11-18 17:55:22 +000031
32@RunWith(JUnit4.class)
33public class TestMatchIndex {
margaretha50c76332015-03-19 10:10:39 +010034 @Test
35 public void testEmbeddedClassQuery () throws IOException {
36 KrillIndex ki = new KrillIndex();
37
38 // abcabcabac
39 FieldDocument fd = new FieldDocument();
40 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
41 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]"
42 + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
43 + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]"
44 + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]"
45 + "[(9-10)s:c|i:c|_9#9-10]");
46 ki.addDoc(fd);
47
48 ki.commit();
49
50 SpanQuery sq;
51 Result kr;
52
53 sq = new SpanFocusQuery(new SpanClassQuery(new SpanNextQuery(
54 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
55 (byte) 1), new SpanClassQuery(new SpanTermQuery(
56 new Term("base", "s:c")), (byte) 2)), (byte) 3),
57 (byte) 3);
58
59 kr = ki.search(sq, (short) 10);
60
61 assertEquals("totalResults", kr.getTotalResults(), 2);
62 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
63 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
64 assertEquals("SnippetBrackets (0)", "a[{3:{1:b}{2:c}}]abcaba ...", kr
65 .getMatch(0).getSnippetBrackets());
66 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
67 assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
68 assertEquals("SnippetBrackets (1)", "abca[{3:{1:b}{2:c}}]abac", kr
69 .getMatch(1).getSnippetBrackets());
70
71 assertEquals("Document count", 1, ki.numberOf("base", "documents"));
72 assertEquals("Token count", 10, ki.numberOf("base", "t"));
73
74 }
75
Nils Diewaldf399a672013-11-18 17:55:22 +000076
77 @Test
78 public void indexExample1 () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +000079 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +000080
Nils Diewaldbb33da22015-03-04 16:24:25 +000081 // abcabcabac
82 FieldDocument fd = new FieldDocument();
83 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
84 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]"
85 + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
86 + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]"
87 + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]"
88 + "[(9-10)s:c|i:c|_9#9-10]");
89 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +000090
Nils Diewaldbb33da22015-03-04 16:24:25 +000091 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +000092
Nils Diewaldbb33da22015-03-04 16:24:25 +000093 SpanQuery sq;
94 Result kr;
Nils Diewaldf399a672013-11-18 17:55:22 +000095
Nils Diewaldbb33da22015-03-04 16:24:25 +000096 sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:b")),
97 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a"))));
98 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +000099
Nils Diewaldbb33da22015-03-04 16:24:25 +0000100 assertEquals("totalResults", kr.getTotalResults(), 1);
101 assertEquals("StartPos (0)", 7, kr.getMatch(0).startPos);
102 assertEquals("EndPos (0)", 9, kr.getMatch(0).endPos);
103 assertEquals("SnippetBrackets (0)", "... bcabca[b{1:a}]c",
104 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000105
Nils Diewaldbb33da22015-03-04 16:24:25 +0000106 assertEquals(
107 "Test no 'more' context",
108 "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><mark>b<mark class=\"class-1 level-0\">a</mark></mark><span class=\"context-right\">c</span>",
109 kr.getMatch(0).getSnippetHTML());
110 sq = new SpanFocusQuery(new SpanNextQuery(new SpanTermQuery(new Term(
111 "base", "s:b")), new SpanClassQuery(new SpanTermQuery(new Term(
112 "base", "s:a")))));
113 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000114
Nils Diewaldbb33da22015-03-04 16:24:25 +0000115 assertEquals("totalResults", kr.getTotalResults(), 1);
116 assertEquals("StartPos (0)", 8, kr.getMatch(0).startPos);
117 assertEquals("EndPos (0)", 9, kr.getMatch(0).endPos);
118 assertEquals("SnippetBrackets (0)", "... cabcab[{1:a}]c", kr
119 .getMatch(0).getSnippetBrackets());
120 sq = new SpanFocusQuery(new SpanNextQuery(new SpanClassQuery(
121 new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
122 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
123 (byte) 3)), (byte) 3);
Nils Diewaldf399a672013-11-18 17:55:22 +0000124
Nils Diewaldbb33da22015-03-04 16:24:25 +0000125 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000126
Nils Diewaldbb33da22015-03-04 16:24:25 +0000127 assertEquals("totalResults", kr.getTotalResults(), 3);
128 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
129 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
130 assertEquals("SnippetBrackets (0)", "a[{3:b}]cabcab ...", kr
131 .getMatch(0).getSnippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000132
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000133
Nils Diewaldbb33da22015-03-04 16:24:25 +0000134 assertEquals(
135 "<span class=\"context-left\">a</span><mark><mark class=\"class-3 level-0\">b</mark></mark><span class=\"context-right\">cabcab<span class=\"more\"></span></span>",
136 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000137
Nils Diewaldbb33da22015-03-04 16:24:25 +0000138 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
139 assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
140 assertEquals("SnippetBrackets (1)", "abca[{3:b}]cabac", kr.getMatch(1)
141 .getSnippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000142
Nils Diewaldbb33da22015-03-04 16:24:25 +0000143 assertEquals(
144 "<span class=\"context-left\">abca</span><mark><mark class=\"class-3 level-0\">b</mark></mark><span class=\"context-right\">cabac</span>",
145 kr.getMatch(1).getSnippetHTML());
146
147 assertEquals("StartPos (2)", 7, kr.getMatch(2).startPos);
148 assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos);
149 assertEquals("SnippetBrackets (2)", "... bcabca[{3:b}]ac",
150 kr.getMatch(2).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000151
152
153
Nils Diewaldbb33da22015-03-04 16:24:25 +0000154 // abcabcabac
155 sq = new SpanFocusQuery(
156 new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")),
157 new SpanClassQuery(new SpanNextQuery(new SpanTermQuery(
158 new Term("base", "s:b")), new SpanClassQuery(
159 new SpanTermQuery(new Term("base", "s:a")))),
160 (byte) 2)), (byte) 2);
Nils Diewaldf399a672013-11-18 17:55:22 +0000161
Nils Diewaldbb33da22015-03-04 16:24:25 +0000162 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000163
Nils Diewaldbb33da22015-03-04 16:24:25 +0000164 assertEquals("totalResults", kr.getTotalResults(), 1);
165 assertEquals("SnippetBrackets (0)", "... bcabca[{2:b{1:a}}]c", kr
166 .getMatch(0).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000167
Nils Diewaldbb33da22015-03-04 16:24:25 +0000168 assertEquals(
169 "SnippetHTML (0) 1",
170 "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><mark><mark class=\"class-2 level-0\">b<mark class=\"class-1 level-1\">a</mark></mark></mark><span class=\"context-right\">c</span>",
171 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +0000172
Nils Diewaldbb33da22015-03-04 16:24:25 +0000173 // Offset tokens
174 kr = ki.search(sq, 0, (short) 10, true, (short) 2, true, (short) 2);
175 assertEquals("totalResults", kr.getTotalResults(), 1);
176 assertEquals("SnippetBrackets (0)", "... ca[{2:b{1:a}}]c",
177 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000178
179
180
Nils Diewaldbb33da22015-03-04 16:24:25 +0000181 // Offset Characters
182 kr = ki.search(sq, 0, (short) 10, false, (short) 1, false, (short) 0);
183 assertEquals("totalResults", kr.getTotalResults(), 1);
184 assertEquals("SnippetBrackets (0)", "... a[{2:b{1:a}}] ...", kr
185 .getMatch(0).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000186
Nils Diewaldbb33da22015-03-04 16:24:25 +0000187 assertEquals(
188 "SnippetHTML (0) 2",
189 "<span class=\"context-left\"><span class=\"more\"></span>a</span><mark><mark class=\"class-2 level-0\">b<mark class=\"class-1 level-1\">a</mark></mark></mark><span class=\"context-right\"><span class=\"more\"></span></span>",
190 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000191
Nils Diewaldf399a672013-11-18 17:55:22 +0000192
Nils Diewald1455e1e2014-08-01 16:12:43 +0000193
Nils Diewaldbb33da22015-03-04 16:24:25 +0000194 // Don't match the expected class!
195 sq = new SpanFocusQuery(new SpanNextQuery(new SpanClassQuery(
196 new SpanTermQuery(new Term("base", "s:b")), (byte) 1),
197 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:c")),
198 (byte) 2)), (byte) 3);
Nils Diewaldf399a672013-11-18 17:55:22 +0000199
Nils Diewaldbb33da22015-03-04 16:24:25 +0000200 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000201
Nils Diewaldbb33da22015-03-04 16:24:25 +0000202 assertEquals("totalResults", kr.getTotalResults(), 0);
Nils Diewaldf399a672013-11-18 17:55:22 +0000203
Nils Diewaldbb33da22015-03-04 16:24:25 +0000204 sq = new SpanFocusQuery(new SpanNextQuery(new SpanTermQuery(new Term(
205 "base", "s:a")), new SpanClassQuery(new SpanNextQuery(
206 new SpanTermQuery(new Term("base", "s:b")), new SpanTermQuery(
207 new Term("base", "s:c"))))));
208
209 kr = ki.search(sq, (short) 2);
210
211 assertEquals("totalResults", kr.getTotalResults(), 2);
212 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
213 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
214 assertEquals("SnippetBrackets (0)", "a[{1:bc}]abcaba ...",
215 kr.getMatch(0).getSnippetBrackets());
216 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
217 assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
218 assertEquals("SnippetBrackets (1)", "abca[{1:bc}]abac", kr.getMatch(1)
219 .getSnippetBrackets());
220
221 assertEquals(1, ki.numberOf("base", "documents"));
222 assertEquals(10, ki.numberOf("base", "t"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000223 };
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000224
225
226 @Test
227 public void indexExample2 () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000228 KrillIndex ki = new KrillIndex();
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000229
Nils Diewald5c375702015-02-09 20:58:24 +0000230 // abcabcabac
231 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000232 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
233 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]"
234 + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
235 + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]"
236 + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]"
237 + "[(9-10)s:c|i:c|_9#9-10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000238 ki.addDoc(fd);
239 ki.commit();
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000240
Nils Diewald5c375702015-02-09 20:58:24 +0000241 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000242 Result kr;
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000243
Nils Diewald5c375702015-02-09 20:58:24 +0000244 // No contexts:
Nils Diewaldbb33da22015-03-04 16:24:25 +0000245 sq = new SpanOrQuery(new SpanTermQuery(new Term("base", "s:a")),
246 new SpanTermQuery(new Term("base", "s:c")));
Nils Diewald5c375702015-02-09 20:58:24 +0000247 kr = ki.search(sq, (short) 20);
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000248
Nils Diewald5c375702015-02-09 20:58:24 +0000249 assertEquals("totalResults", kr.getTotalResults(), 7);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000250 assertEquals(
251 "SnippetBrackets (0)",
252 "<span class=\"context-left\"></span><mark>a</mark><span class=\"context-right\">bcabca<span class=\"more\"></span></span>",
253 kr.getMatch(0).getSnippetHTML());
254 assertEquals("SnippetBrackets (0)", "[a]bcabca ...", kr.getMatch(0)
255 .getSnippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000256
Nils Diewaldbb33da22015-03-04 16:24:25 +0000257 assertEquals("SnippetBrackets (1)", "ab[c]abcaba ...", kr.getMatch(1)
258 .getSnippetBrackets());
259 assertEquals(
260 "SnippetBrackets (1)",
261 "<span class=\"context-left\">ab</span><mark>c</mark><span class=\"context-right\">abcaba<span class=\"more\"></span></span>",
262 kr.getMatch(1).getSnippetHTML());
263
264 assertEquals("SnippetBrackets (6)", "... abcaba[c]", kr.getMatch(6)
265 .getSnippetBrackets());
266 assertEquals(
267 "SnippetBrackets (6)",
268 "<span class=\"context-left\"><span class=\"more\"></span>abcaba</span><mark>c</mark><span class=\"context-right\"></span>",
269 kr.getMatch(6).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000270
Nils Diewald5c375702015-02-09 20:58:24 +0000271 kr = ki.search(sq, 0, (short) 20, true, (short) 0, true, (short) 0);
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000272
Nils Diewald5c375702015-02-09 20:58:24 +0000273 assertEquals("totalResults", kr.getTotalResults(), 7);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000274 assertEquals("SnippetBrackets (0)", "[a] ...", kr.getMatch(0)
275 .getSnippetBrackets());
276 assertEquals(
277 "SnippetHTML (0)",
278 "<span class=\"context-left\"></span><mark>a</mark><span class=\"context-right\"><span class=\"more\"></span></span>",
279 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000280
Nils Diewaldbb33da22015-03-04 16:24:25 +0000281 assertEquals("SnippetBrackets (1)", "... [c] ...", kr.getMatch(1)
282 .getSnippetBrackets());
283 assertEquals(
284 "SnippetHTML (1)",
285 "<span class=\"context-left\"><span class=\"more\"></span></span><mark>c</mark><span class=\"context-right\"><span class=\"more\"></span></span>",
286 kr.getMatch(1).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000287
Nils Diewaldbb33da22015-03-04 16:24:25 +0000288 assertEquals("SnippetBrackets (6)", "... [c]", kr.getMatch(6)
289 .getSnippetBrackets());
290 assertEquals(
291 "SnippetBrackets (6)",
292 "<span class=\"context-left\"><span class=\"more\"></span></span><mark>c</mark><span class=\"context-right\"></span>",
293 kr.getMatch(6).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000294 };
Nils Diewald3ef9a472013-12-02 16:06:09 +0000295
296
297 @Test
Nils Diewaldbe5943e2014-10-21 19:35:34 +0000298 public void indexExample3 () throws Exception {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000299 KrillIndex ki = new KrillIndex();
Nils Diewald3ef9a472013-12-02 16:06:09 +0000300
Nils Diewald5c375702015-02-09 20:58:24 +0000301 // abcabcabac
302 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000303 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
304 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]"
305 + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
306 + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]"
307 + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]"
308 + "[(9-10)s:c|i:c|_9#9-10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000309 ki.addDoc(fd);
310 ki.commit();
Nils Diewald3ef9a472013-12-02 16:06:09 +0000311
Nils Diewald884dbcf2015-02-27 17:02:28 +0000312 Result kr;
Nils Diewald3ef9a472013-12-02 16:06:09 +0000313
Nils Diewald8904c1d2015-02-26 16:13:18 +0000314 QueryBuilder kq = new QueryBuilder("base");
Nils Diewald3ef9a472013-12-02 16:06:09 +0000315
Nils Diewaldbb33da22015-03-04 16:24:25 +0000316 SpanQuery sq = kq._(
317 1,
318 kq.seq(kq.seg("s:b")).append(kq.seg("s:a"))
319 .append(kq._(2, kq.seg("s:c")))).toQuery();
320
Nils Diewald5c375702015-02-09 20:58:24 +0000321 kr = ki.search(sq, 0, (short) 20, true, (short) 2, true, (short) 5);
Nils Diewald3ef9a472013-12-02 16:06:09 +0000322
Nils Diewald5c375702015-02-09 20:58:24 +0000323 assertEquals("totalResults", kr.getTotalResults(), 1);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000324 assertEquals("SnippetBrackets (0)", "... ca[{1:ba{2:c}}]",
325 kr.getMatch(0).getSnippetBrackets());
Nils Diewald3ef9a472013-12-02 16:06:09 +0000326 };
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000327
328
329 @Test
330 public void indexExampleExtend () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000331 KrillIndex ki = new KrillIndex();
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000332
Nils Diewald5c375702015-02-09 20:58:24 +0000333 // abcabcabac
334 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000335 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
336 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]"
337 + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
338 + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]"
339 + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]"
340 + "[(9-10)s:c|i:c|_9#9-10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000341 ki.addDoc(fd);
342 ki.commit();
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000343
Nils Diewald5c375702015-02-09 20:58:24 +0000344 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000345 Result kr;
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000346
Nils Diewaldbb33da22015-03-04 16:24:25 +0000347 sq = new SpanFocusQuery(new SpanNextQuery(new SpanClassQuery(
348 new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
349 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
350 (byte) 3)), (byte) 3);
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000351
Nils Diewald5c375702015-02-09 20:58:24 +0000352 kr = ki.search(sq, (short) 10);
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000353
Nils Diewald5c375702015-02-09 20:58:24 +0000354 assertEquals("totalResults", kr.getTotalResults(), 3);
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000355
Nils Diewald392bcf32015-02-26 20:01:17 +0000356 Match km = kr.getMatch(0);
Nils Diewald5c375702015-02-09 20:58:24 +0000357 assertEquals("StartPos (0)", 1, km.startPos);
358 assertEquals("EndPos (0)", 2, km.endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000359 assertEquals("SnippetBrackets (0)", "a[{3:b}]cabcab ...",
360 km.getSnippetBrackets());
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000361
Nils Diewaldbb33da22015-03-04 16:24:25 +0000362 sq = new SpanFocusQuery(new SpanFocusQuery(new SpanNextQuery(
363 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")),
364 (byte) 2), new SpanClassQuery(new SpanTermQuery(
365 new Term("base", "s:b")), (byte) 3)), (byte) 3),
366 (byte) 2);
367
Nils Diewald5c375702015-02-09 20:58:24 +0000368 kr = ki.search(sq, (short) 10);
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000369
Nils Diewald5c375702015-02-09 20:58:24 +0000370 km = kr.getMatch(0);
371 assertEquals("StartPos (0)", 0, km.startPos);
372 assertEquals("EndPos (0)", 1, km.endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000373 assertEquals("SnippetBrackets (0)", "[{2:a}]bcabca ...",
374 km.getSnippetBrackets());
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000375
Nils Diewald5c375702015-02-09 20:58:24 +0000376 // TODO: Check ID
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000377 };
Nils Diewald44693e82014-11-05 18:00:12 +0000378
379
380 @Test
Nils Diewald85f9c422015-02-06 21:09:16 +0000381 public void indexExampleFocusWithSpan () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000382 KrillIndex ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000383
Nils Diewald5c375702015-02-09 20:58:24 +0000384 // abcabcabac
385 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000386 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
387 + "[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]"
388 + "[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]"
389 + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
390 + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]"
391 + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]"
392 + "[(9-10)s:c|i:c|_9#9-10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000393 ki.addDoc(fd);
394 ki.commit();
Nils Diewald44693e82014-11-05 18:00:12 +0000395
Nils Diewald5c375702015-02-09 20:58:24 +0000396 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000397 Result kr;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000398
margaretha50c76332015-03-19 10:10:39 +0100399 // sq = new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery(
400 // "base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(
401 // new Term("base", "s:b")), (byte) 3));
402 //
403 // kr = ki.search(sq, (short) 10);
404 // assertEquals(kr.getSerialQuery(),
405 // "spanContain({2: <base:s />}, {3: base:s:b})");
406 // assertEquals(kr.getMatch(0).getSnippetBrackets(),
407 // "a[{2:{3:b}cab}]cabac");
Nils Diewald44693e82014-11-05 18:00:12 +0000408
Nils Diewaldbb33da22015-03-04 16:24:25 +0000409 sq = new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(
410 new SpanElementQuery("base", "s"), (byte) 2),
411 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
412 (byte) 3)), (byte) 3);
Nils Diewald44693e82014-11-05 18:00:12 +0000413
Nils Diewald5c375702015-02-09 20:58:24 +0000414 kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000415 assertEquals(kr.getSerialQuery(),
416 "focus(3: spanContain({2: <base:s />}, {3: base:s:b}))");
Nils Diewald5c375702015-02-09 20:58:24 +0000417 assertEquals(kr.getMatch(0).getSnippetBrackets(), "a[{3:b}]cabcab ...");
Nils Diewald44693e82014-11-05 18:00:12 +0000418 };
Nils Diewald5c375702015-02-09 20:58:24 +0000419
420
Nils Diewaldf075df02015-03-03 20:34:00 +0000421 @Ignore
Nils Diewald5c375702015-02-09 20:58:24 +0000422 public void indexExampleFocusWithSkip () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000423 KrillIndex ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000424
Nils Diewald5c375702015-02-09 20:58:24 +0000425 // abcabcabac
426 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000427 fd.addTV("base", "abcabcabac",
428 // The payload should be ignored
429 "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
430 + // |<>:p#0-10<i>9]" +
431 "[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]"
432 + "[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]"
433 + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
434 + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]"
435 + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]"
436 + "[(9-10)s:c|i:c|_9#9-10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000437 ki.addDoc(fd);
Nils Diewaldcd226862015-02-11 22:27:45 +0000438 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000439 fd.addTV("base", "gbcgbcgbgc",
440 "[(0-1)s:g|i:g|_0#0-1|-:t$<i>10|<>:p#0-10$<i>9]"
441 + "[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]"
442 + "[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]"
443 + "[(3-4)s:g|i:g|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
444 + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:g|i:g|_6#6-7]"
445 + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:g|i:g|_8#8-9]"
446 + "[(9-10)s:c|i:c|_9#9-10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000447 ki.addDoc(fd);
Nils Diewaldcd226862015-02-11 22:27:45 +0000448 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000449 fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0#0-1|-:t$<i>10]"
450 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]"
451 + "[(3-4)s:g|i:g|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]"
452 + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:g|i:g|_6#6-7]"
453 + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:g|i:g|_8#8-9]"
454 + "[(9-10)s:c|i:c|_9#9-10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000455 ki.addDoc(fd);
Nils Diewaldcd226862015-02-11 22:27:45 +0000456 fd = new FieldDocument();
457 // contains(<p>, focus(3: contains({2:<s>}, {3:a})))
Nils Diewaldbb33da22015-03-04 16:24:25 +0000458 fd.addTV("base", "acabcabac",
459 "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10|<>:p#0-9$<i>8]"
460 + "[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]"
461 + "[(2-3)s:a|i:a|_2#2-3|<>:s#2-7$<i>7]"
462 + "[(3-4)s:b|i:b|_3#3-4]" + "[(4-5)s:c|i:c|_4#4-5]"
463 + "[(5-6)s:a|i:a|_5#5-6]" + "[(6-7)s:b|i:b|_6#6-7]"
464 + "[(7-8)s:a|i:a|_7#7-8]" + "[(8-9)s:c|i:c|_8#8-9]");
Nils Diewald5c375702015-02-09 20:58:24 +0000465 ki.addDoc(fd);
466 ki.commit();
467
468 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000469 Result kr;
Nils Diewald2d5f8102015-02-26 21:07:54 +0000470 KrillCollection kc = new KrillCollection(ki);
Nils Diewald5c375702015-02-09 20:58:24 +0000471
472 assertEquals("Documents", 4, kc.numberOf("documents"));
473
Nils Diewaldcd226862015-02-11 22:27:45 +0000474 // within(<p>, focus(3:within({2:<s>}, {3:a})))
Nils Diewaldbb33da22015-03-04 16:24:25 +0000475 sq = new SpanWithinQuery(new SpanElementQuery("base", "p"),
476 new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(
477 new SpanElementQuery("base", "s"), (byte) 2),
478 new SpanClassQuery(new SpanTermQuery(new Term("base",
479 "s:a")), (byte) 3)), (byte) 3));
Nils Diewald5c375702015-02-09 20:58:24 +0000480
481 fail("Skipping may go horribly wrong! (Known issue)");
Nils Diewaldcd226862015-02-11 22:27:45 +0000482
Nils Diewald5c375702015-02-09 20:58:24 +0000483 kr = kc.search(sq);
Nils Diewaldcd226862015-02-11 22:27:45 +0000484 // System.err.println(kr.getOverview());
485
486
Nils Diewaldbb33da22015-03-04 16:24:25 +0000487 assertEquals(
488 kr.getSerialQuery(),
489 "spanContain(<base:p />, focus(3: spanContain({2: <base:s />}, {3: base:s:a})))");
Nils Diewald5c375702015-02-09 20:58:24 +0000490 assertEquals(12, kr.getTotalResults());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000491 assertEquals("[a{2:bc{3:a}b}cabac]", kr.getMatch(0)
492 .getSnippetBrackets());
493 assertEquals("[ab{2:c{3:a}bcab}ac]", kr.getMatch(1)
494 .getSnippetBrackets());
495 assertEquals("[ab{2:cabc{3:a}}bac]", kr.getMatch(2)
496 .getSnippetBrackets());
Nils Diewald5c375702015-02-09 20:58:24 +0000497 };
498
Nils Diewaldf399a672013-11-18 17:55:22 +0000499};