blob: 42b575da69877e70db5d4b02c3f50e751c433fe1 [file] [log] [blame]
Eliza Margaretha01929182014-02-19 11:48:59 +00001package de.ids_mannheim.korap.index;
2
margaretha50c76332015-03-19 10:10:39 +01003import static org.junit.Assert.assertEquals;
4import static org.junit.Assert.fail;
Nils Diewaldf399a672013-11-18 17:55:22 +00005
margaretha50c76332015-03-19 10:10:39 +01006import java.io.IOException;
Nils Diewaldf399a672013-11-18 17:55:22 +00007
margaretha50c76332015-03-19 10:10:39 +01008import org.apache.lucene.index.Term;
9import org.apache.lucene.search.spans.SpanOrQuery;
10import org.apache.lucene.search.spans.SpanQuery;
11import org.apache.lucene.search.spans.SpanTermQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.junit.Ignore;
margaretha50c76332015-03-19 10:10:39 +010013import org.junit.Test;
Nils Diewaldf399a672013-11-18 17:55:22 +000014import org.junit.runner.RunWith;
15import org.junit.runners.JUnit4;
16
Nils Diewald2d5f8102015-02-26 21:07:54 +000017import de.ids_mannheim.korap.KrillCollection;
Akron60971692016-06-08 12:56:21 +020018import de.ids_mannheim.korap.Krill;
margaretha50c76332015-03-19 10:10:39 +010019import de.ids_mannheim.korap.KrillIndex;
20import de.ids_mannheim.korap.query.QueryBuilder;
21import de.ids_mannheim.korap.query.SpanClassQuery;
22import de.ids_mannheim.korap.query.SpanElementQuery;
23import de.ids_mannheim.korap.query.SpanFocusQuery;
24import de.ids_mannheim.korap.query.SpanNextQuery;
25import de.ids_mannheim.korap.query.SpanWithinQuery;
Nils Diewald392bcf32015-02-26 20:01:17 +000026import de.ids_mannheim.korap.response.Match;
margaretha50c76332015-03-19 10:10:39 +010027import de.ids_mannheim.korap.response.Result;
Akron60971692016-06-08 12:56:21 +020028import de.ids_mannheim.korap.response.SearchContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000029
30// mvn -Dtest=TestWithinIndex#indexExample1 test
31
Nils Diewald85f9c422015-02-06 21:09:16 +000032// match is focus and split
Nils Diewaldf399a672013-11-18 17:55:22 +000033
34@RunWith(JUnit4.class)
35public class TestMatchIndex {
margaretha50c76332015-03-19 10:10:39 +010036 @Test
37 public void testEmbeddedClassQuery () throws IOException {
38 KrillIndex ki = new KrillIndex();
39
40 // abcabcabac
41 FieldDocument fd = new FieldDocument();
margaretha71c66ee2015-12-11 14:39:55 +010042 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
43 + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
44 + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
45 + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
46 + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
47 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
margaretha50c76332015-03-19 10:10:39 +010048 ki.addDoc(fd);
49
50 ki.commit();
51
52 SpanQuery sq;
53 Result kr;
54
Eliza Margaretha6f989202016-10-14 21:48:29 +020055 sq = new SpanFocusQuery(
56 new SpanClassQuery(
57 new SpanNextQuery(
58 new SpanClassQuery(
59 new SpanTermQuery(
60 new Term("base", "s:b")),
61 (byte) 1),
62 new SpanClassQuery(
63 new SpanTermQuery(
64 new Term("base", "s:c")),
65 (byte) 2)),
66 (byte) 3),
margaretha50c76332015-03-19 10:10:39 +010067 (byte) 3);
68
69 kr = ki.search(sq, (short) 10);
70
71 assertEquals("totalResults", kr.getTotalResults(), 2);
72 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
73 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +020074 assertEquals("SnippetBrackets (0)", "a[[{3:{1:b}{2:c}}]]abcaba ...",
75 kr.getMatch(0).getSnippetBrackets());
margaretha50c76332015-03-19 10:10:39 +010076 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
77 assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +020078 assertEquals("SnippetBrackets (1)", "abca[[{3:{1:b}{2:c}}]]abac",
79 kr.getMatch(1).getSnippetBrackets());
margaretha50c76332015-03-19 10:10:39 +010080
81 assertEquals("Document count", 1, ki.numberOf("base", "documents"));
82 assertEquals("Token count", 10, ki.numberOf("base", "t"));
83
84 }
85
Nils Diewaldf399a672013-11-18 17:55:22 +000086
87 @Test
88 public void indexExample1 () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +000089 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +000090
Nils Diewaldbb33da22015-03-04 16:24:25 +000091 // abcabcabac
92 FieldDocument fd = new FieldDocument();
margaretha71c66ee2015-12-11 14:39:55 +010093 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
94 + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
95 + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
96 + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
Akron08f4ceb2016-08-03 23:53:32 +020097 + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
margaretha71c66ee2015-12-11 14:39:55 +010098 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewaldbb33da22015-03-04 16:24:25 +000099 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000100
Nils Diewaldbb33da22015-03-04 16:24:25 +0000101 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000102
Nils Diewaldbb33da22015-03-04 16:24:25 +0000103 SpanQuery sq;
104 Result kr;
Nils Diewaldf399a672013-11-18 17:55:22 +0000105
Nils Diewaldbb33da22015-03-04 16:24:25 +0000106 sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:b")),
107 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a"))));
108 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000109
Nils Diewaldbb33da22015-03-04 16:24:25 +0000110 assertEquals("totalResults", kr.getTotalResults(), 1);
111 assertEquals("StartPos (0)", 7, kr.getMatch(0).startPos);
112 assertEquals("EndPos (0)", 9, kr.getMatch(0).endPos);
Akronf05fde62016-08-03 23:46:17 +0200113
Eliza Margaretha6f989202016-10-14 21:48:29 +0200114 assertEquals("SnippetBrackets (0)", "... bcabca[[b{1:a}]]c",
115 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000116
Eliza Margaretha6f989202016-10-14 21:48:29 +0200117 assertEquals("Test no 'more' context",
Akronf05fde62016-08-03 23:46:17 +0200118 "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><span class=\"match\"><mark>b<mark class=\"class-1 level-0\">a</mark></mark></span><span class=\"context-right\">c</span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000119 kr.getMatch(0).getSnippetHTML());
Akronf05fde62016-08-03 23:46:17 +0200120
121
Eliza Margaretha6f989202016-10-14 21:48:29 +0200122 sq = new SpanFocusQuery(new SpanNextQuery(
123 new SpanTermQuery(new Term("base", "s:b")), new SpanClassQuery(
124 new SpanTermQuery(new Term("base", "s:a")))));
Nils Diewaldbb33da22015-03-04 16:24:25 +0000125 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000126
Nils Diewaldbb33da22015-03-04 16:24:25 +0000127 assertEquals("totalResults", kr.getTotalResults(), 1);
128 assertEquals("StartPos (0)", 8, kr.getMatch(0).startPos);
129 assertEquals("EndPos (0)", 9, kr.getMatch(0).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200130 assertEquals("SnippetBrackets (0)", "... cabcab[[{1:a}]]c",
131 kr.getMatch(0).getSnippetBrackets());
132 sq = new SpanFocusQuery(new SpanNextQuery(
133 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")),
134 (byte) 2),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000135 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
Eliza Margaretha6f989202016-10-14 21:48:29 +0200136 (byte) 3)),
137 (byte) 3);
Nils Diewaldf399a672013-11-18 17:55:22 +0000138
Nils Diewaldbb33da22015-03-04 16:24:25 +0000139 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000140
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141 assertEquals("totalResults", kr.getTotalResults(), 3);
142 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
143 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200144 assertEquals("SnippetBrackets (0)", "a[[{3:b}]]cabcab ...",
145 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000146
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000147
Nils Diewaldbb33da22015-03-04 16:24:25 +0000148 assertEquals(
Akronf05fde62016-08-03 23:46:17 +0200149 "<span class=\"context-left\">a</span><span class=\"match\"><mark><mark class=\"class-3 level-0\">b</mark></mark></span><span class=\"context-right\">cabcab<span class=\"more\"></span></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000150 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000151
Nils Diewaldbb33da22015-03-04 16:24:25 +0000152 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
153 assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200154 assertEquals("SnippetBrackets (1)", "abca[[{3:b}]]cabac",
155 kr.getMatch(1).getSnippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000156
Nils Diewaldbb33da22015-03-04 16:24:25 +0000157 assertEquals(
Akronf05fde62016-08-03 23:46:17 +0200158 "<span class=\"context-left\">abca</span><span class=\"match\"><mark><mark class=\"class-3 level-0\">b</mark></mark></span><span class=\"context-right\">cabac</span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000159 kr.getMatch(1).getSnippetHTML());
160
161 assertEquals("StartPos (2)", 7, kr.getMatch(2).startPos);
162 assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200163 assertEquals("SnippetBrackets (2)", "... bcabca[[{3:b}]]ac",
164 kr.getMatch(2).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000165
166
167
Nils Diewaldbb33da22015-03-04 16:24:25 +0000168 // abcabcabac
Eliza Margaretha6f989202016-10-14 21:48:29 +0200169 sq = new SpanFocusQuery(new SpanNextQuery(
170 new SpanTermQuery(new Term("base", "s:a")),
171 new SpanClassQuery(
172 new SpanNextQuery(new SpanTermQuery(
173 new Term("base", "s:b")),
174 new SpanClassQuery(new SpanTermQuery(
175 new Term("base", "s:a")))),
176 (byte) 2)),
177 (byte) 2);
Nils Diewaldf399a672013-11-18 17:55:22 +0000178
Nils Diewaldbb33da22015-03-04 16:24:25 +0000179 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000180
Nils Diewaldbb33da22015-03-04 16:24:25 +0000181 assertEquals("totalResults", kr.getTotalResults(), 1);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200182 assertEquals("SnippetBrackets (0)", "... bcabca[[{2:b{1:a}}]]c",
183 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000184
Eliza Margaretha6f989202016-10-14 21:48:29 +0200185 assertEquals("SnippetHTML (0) 1",
Akronf05fde62016-08-03 23:46:17 +0200186 "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><span class=\"match\"><mark><mark class=\"class-2 level-0\">b<mark class=\"class-1 level-1\">a</mark></mark></mark></span><span class=\"context-right\">c</span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000187 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +0000188
Nils Diewaldbb33da22015-03-04 16:24:25 +0000189 // Offset tokens
190 kr = ki.search(sq, 0, (short) 10, true, (short) 2, true, (short) 2);
191 assertEquals("totalResults", kr.getTotalResults(), 1);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200192 assertEquals("SnippetBrackets (0)", "... ca[[{2:b{1:a}}]]c",
193 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000194
195
196
Nils Diewaldbb33da22015-03-04 16:24:25 +0000197 // Offset Characters
198 kr = ki.search(sq, 0, (short) 10, false, (short) 1, false, (short) 0);
199 assertEquals("totalResults", kr.getTotalResults(), 1);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200200 assertEquals("SnippetBrackets (0)", "... a[[{2:b{1:a}}]] ...",
201 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000202
Eliza Margaretha6f989202016-10-14 21:48:29 +0200203 assertEquals("SnippetHTML (0) 2",
Akronf05fde62016-08-03 23:46:17 +0200204 "<span class=\"context-left\"><span class=\"more\"></span>a</span><span class=\"match\"><mark><mark class=\"class-2 level-0\">b<mark class=\"class-1 level-1\">a</mark></mark></mark></span><span class=\"context-right\"><span class=\"more\"></span></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000205 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000206
Nils Diewaldf399a672013-11-18 17:55:22 +0000207
Nils Diewald1455e1e2014-08-01 16:12:43 +0000208
Nils Diewaldbb33da22015-03-04 16:24:25 +0000209 // Don't match the expected class!
Eliza Margaretha6f989202016-10-14 21:48:29 +0200210 sq = new SpanFocusQuery(new SpanNextQuery(
211 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
212 (byte) 1),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000213 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:c")),
Eliza Margaretha6f989202016-10-14 21:48:29 +0200214 (byte) 2)),
215 (byte) 3);
Nils Diewaldf399a672013-11-18 17:55:22 +0000216
Nils Diewaldbb33da22015-03-04 16:24:25 +0000217 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000218
Nils Diewaldbb33da22015-03-04 16:24:25 +0000219 assertEquals("totalResults", kr.getTotalResults(), 0);
Nils Diewaldf399a672013-11-18 17:55:22 +0000220
Eliza Margaretha6f989202016-10-14 21:48:29 +0200221 sq = new SpanFocusQuery(
222 new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")),
223 new SpanClassQuery(new SpanNextQuery(
224 new SpanTermQuery(new Term("base",
225 "s:b")),
226 new SpanTermQuery(new Term("base", "s:c"))))));
Nils Diewaldbb33da22015-03-04 16:24:25 +0000227
228 kr = ki.search(sq, (short) 2);
229
230 assertEquals("totalResults", kr.getTotalResults(), 2);
231 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
232 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200233 assertEquals("SnippetBrackets (0)", "a[[{1:bc}]]abcaba ...",
234 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000235 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
236 assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200237 assertEquals("SnippetBrackets (1)", "abca[[{1:bc}]]abac",
238 kr.getMatch(1).getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000239
240 assertEquals(1, ki.numberOf("base", "documents"));
241 assertEquals(10, ki.numberOf("base", "t"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000242 };
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000243
244
245 @Test
246 public void indexExample2 () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000247 KrillIndex ki = new KrillIndex();
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000248
Nils Diewald5c375702015-02-09 20:58:24 +0000249 // abcabcabac
250 FieldDocument fd = new FieldDocument();
margaretha71c66ee2015-12-11 14:39:55 +0100251 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
252 + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
253 + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
254 + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
255 + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
256 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000257 ki.addDoc(fd);
258 ki.commit();
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000259
Nils Diewald5c375702015-02-09 20:58:24 +0000260 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000261 Result kr;
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000262
Nils Diewald5c375702015-02-09 20:58:24 +0000263 // No contexts:
Nils Diewaldbb33da22015-03-04 16:24:25 +0000264 sq = new SpanOrQuery(new SpanTermQuery(new Term("base", "s:a")),
265 new SpanTermQuery(new Term("base", "s:c")));
Nils Diewald5c375702015-02-09 20:58:24 +0000266 kr = ki.search(sq, (short) 20);
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000267
Nils Diewald5c375702015-02-09 20:58:24 +0000268 assertEquals("totalResults", kr.getTotalResults(), 7);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200269 assertEquals("SnippetBrackets (0)",
Akronf05fde62016-08-03 23:46:17 +0200270 "<span class=\"context-left\"></span><span class=\"match\"><mark>a</mark></span><span class=\"context-right\">bcabca<span class=\"more\"></span></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000271 kr.getMatch(0).getSnippetHTML());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200272 assertEquals("SnippetBrackets (0)", "[[a]]bcabca ...",
273 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000274
Eliza Margaretha6f989202016-10-14 21:48:29 +0200275 assertEquals("SnippetBrackets (1)", "ab[[c]]abcaba ...",
276 kr.getMatch(1).getSnippetBrackets());
277 assertEquals("SnippetBrackets (1)",
Akronf05fde62016-08-03 23:46:17 +0200278 "<span class=\"context-left\">ab</span><span class=\"match\"><mark>c</mark></span><span class=\"context-right\">abcaba<span class=\"more\"></span></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000279 kr.getMatch(1).getSnippetHTML());
280
Eliza Margaretha6f989202016-10-14 21:48:29 +0200281 assertEquals("SnippetBrackets (6)", "... abcaba[[c]]",
282 kr.getMatch(6).getSnippetBrackets());
283 assertEquals("SnippetBrackets (6)",
Akronf05fde62016-08-03 23:46:17 +0200284 "<span class=\"context-left\"><span class=\"more\"></span>abcaba</span><span class=\"match\"><mark>c</mark></span><span class=\"context-right\"></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000285 kr.getMatch(6).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000286
Nils Diewald5c375702015-02-09 20:58:24 +0000287 kr = ki.search(sq, 0, (short) 20, true, (short) 0, true, (short) 0);
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000288
Nils Diewald5c375702015-02-09 20:58:24 +0000289 assertEquals("totalResults", kr.getTotalResults(), 7);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200290 assertEquals("SnippetBrackets (0)", "[[a]] ...",
291 kr.getMatch(0).getSnippetBrackets());
292 assertEquals("SnippetHTML (0)",
Akronf05fde62016-08-03 23:46:17 +0200293 "<span class=\"context-left\"></span><span class=\"match\"><mark>a</mark></span><span class=\"context-right\"><span class=\"more\"></span></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000294 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000295
Eliza Margaretha6f989202016-10-14 21:48:29 +0200296 assertEquals("SnippetBrackets (1)", "... [[c]] ...",
297 kr.getMatch(1).getSnippetBrackets());
298 assertEquals("SnippetHTML (1)",
Akronf05fde62016-08-03 23:46:17 +0200299 "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\"><mark>c</mark></span><span class=\"context-right\"><span class=\"more\"></span></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000300 kr.getMatch(1).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000301
Eliza Margaretha6f989202016-10-14 21:48:29 +0200302 assertEquals("SnippetBrackets (6)", "... [[c]]",
303 kr.getMatch(6).getSnippetBrackets());
304 assertEquals("SnippetBrackets (6)",
Akronf05fde62016-08-03 23:46:17 +0200305 "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\"><mark>c</mark></span><span class=\"context-right\"></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000306 kr.getMatch(6).getSnippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000307 };
Nils Diewald3ef9a472013-12-02 16:06:09 +0000308
309
310 @Test
Nils Diewaldbe5943e2014-10-21 19:35:34 +0000311 public void indexExample3 () throws Exception {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000312 KrillIndex ki = new KrillIndex();
Nils Diewald3ef9a472013-12-02 16:06:09 +0000313
Nils Diewald5c375702015-02-09 20:58:24 +0000314 // abcabcabac
315 FieldDocument fd = new FieldDocument();
margaretha71c66ee2015-12-11 14:39:55 +0100316 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
317 + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
318 + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
319 + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
320 + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
321 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000322 ki.addDoc(fd);
323 ki.commit();
Nils Diewald3ef9a472013-12-02 16:06:09 +0000324
Nils Diewald884dbcf2015-02-27 17:02:28 +0000325 Result kr;
Nils Diewald3ef9a472013-12-02 16:06:09 +0000326
Nils Diewald8904c1d2015-02-26 16:13:18 +0000327 QueryBuilder kq = new QueryBuilder("base");
Nils Diewald3ef9a472013-12-02 16:06:09 +0000328
Akron4f52a632018-02-09 19:02:40 +0100329 SpanQuery sq = kq.nr(1, kq.seq(kq.seg("s:b")).append(kq.seg("s:a"))
330 .append(kq.nr(2, kq.seg("s:c")))).toQuery();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000331
Nils Diewald5c375702015-02-09 20:58:24 +0000332 kr = ki.search(sq, 0, (short) 20, true, (short) 2, true, (short) 5);
Nils Diewald3ef9a472013-12-02 16:06:09 +0000333
Nils Diewald5c375702015-02-09 20:58:24 +0000334 assertEquals("totalResults", kr.getTotalResults(), 1);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200335 assertEquals("SnippetBrackets (0)", "... ca[[{1:ba{2:c}}]]",
336 kr.getMatch(0).getSnippetBrackets());
Nils Diewald3ef9a472013-12-02 16:06:09 +0000337 };
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000338
339
340 @Test
341 public void indexExampleExtend () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000342 KrillIndex ki = new KrillIndex();
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000343
Nils Diewald5c375702015-02-09 20:58:24 +0000344 // abcabcabac
345 FieldDocument fd = new FieldDocument();
margaretha71c66ee2015-12-11 14:39:55 +0100346 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
347 + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
348 + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
349 + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
350 + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
351 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000352 ki.addDoc(fd);
353 ki.commit();
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000354
Nils Diewald5c375702015-02-09 20:58:24 +0000355 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000356 Result kr;
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000357
Eliza Margaretha6f989202016-10-14 21:48:29 +0200358 sq = new SpanFocusQuery(new SpanNextQuery(
359 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")),
360 (byte) 2),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000361 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
Eliza Margaretha6f989202016-10-14 21:48:29 +0200362 (byte) 3)),
363 (byte) 3);
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000364
Nils Diewald5c375702015-02-09 20:58:24 +0000365 kr = ki.search(sq, (short) 10);
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000366
Nils Diewald5c375702015-02-09 20:58:24 +0000367 assertEquals("totalResults", kr.getTotalResults(), 3);
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000368
Nils Diewald392bcf32015-02-26 20:01:17 +0000369 Match km = kr.getMatch(0);
Nils Diewald5c375702015-02-09 20:58:24 +0000370 assertEquals("StartPos (0)", 1, km.startPos);
371 assertEquals("EndPos (0)", 2, km.endPos);
Akronf05fde62016-08-03 23:46:17 +0200372 assertEquals("SnippetBrackets (0)", "a[[{3:b}]]cabcab ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000373 km.getSnippetBrackets());
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000374
Eliza Margaretha6f989202016-10-14 21:48:29 +0200375 sq = new SpanFocusQuery(
376 new SpanFocusQuery(
377 new SpanNextQuery(
378 new SpanClassQuery(
379 new SpanTermQuery(
380 new Term("base", "s:a")),
381 (byte) 2),
382 new SpanClassQuery(
383 new SpanTermQuery(
384 new Term("base", "s:b")),
385 (byte) 3)),
386 (byte) 3),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000387 (byte) 2);
388
Nils Diewald5c375702015-02-09 20:58:24 +0000389 kr = ki.search(sq, (short) 10);
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000390
Nils Diewald5c375702015-02-09 20:58:24 +0000391 km = kr.getMatch(0);
392 assertEquals("StartPos (0)", 0, km.startPos);
393 assertEquals("EndPos (0)", 1, km.endPos);
Akronf05fde62016-08-03 23:46:17 +0200394 assertEquals("SnippetBrackets (0)", "[[{2:a}]]bcabca ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000395 km.getSnippetBrackets());
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000396
Nils Diewald5c375702015-02-09 20:58:24 +0000397 // TODO: Check ID
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000398 };
Nils Diewald44693e82014-11-05 18:00:12 +0000399
400
401 @Test
Nils Diewald85f9c422015-02-06 21:09:16 +0000402 public void indexExampleFocusWithSpan () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000403 KrillIndex ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000404
Nils Diewald5c375702015-02-09 20:58:24 +0000405 // abcabcabac
406 FieldDocument fd = new FieldDocument();
margaretha71c66ee2015-12-11 14:39:55 +0100407 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
408 + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]"
409 + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]"
410 + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
411 + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
412 + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
413 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000414 ki.addDoc(fd);
415 ki.commit();
Nils Diewald44693e82014-11-05 18:00:12 +0000416
Nils Diewald5c375702015-02-09 20:58:24 +0000417 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000418 Result kr;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000419
margaretha50c76332015-03-19 10:10:39 +0100420 // sq = new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery(
421 // "base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(
422 // new Term("base", "s:b")), (byte) 3));
423 //
424 // kr = ki.search(sq, (short) 10);
425 // assertEquals(kr.getSerialQuery(),
426 // "spanContain({2: <base:s />}, {3: base:s:b})");
427 // assertEquals(kr.getMatch(0).getSnippetBrackets(),
428 // "a[{2:{3:b}cab}]cabac");
Nils Diewald44693e82014-11-05 18:00:12 +0000429
Eliza Margaretha6f989202016-10-14 21:48:29 +0200430 sq = new SpanFocusQuery(new SpanWithinQuery(
431 new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000432 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
Eliza Margaretha6f989202016-10-14 21:48:29 +0200433 (byte) 3)),
434 (byte) 3);
Nils Diewald44693e82014-11-05 18:00:12 +0000435
Nils Diewald5c375702015-02-09 20:58:24 +0000436 kr = ki.search(sq, (short) 10);
Marc Kupietz613f4322025-09-24 13:59:24 +0200437 assertEquals(
438 "focus(3: spanContain({2: <base:s />}, {3: base:s:b}))",
439 kr.getSerialQuery());
Akron6cc7b7b2016-01-14 21:39:18 +0100440
Marc Kupietz613f4322025-09-24 13:59:24 +0200441 assertEquals(
442 "a[[{3:b}]]cabcab ...",
443 kr.getMatch(0).getSnippetBrackets());
Nils Diewald44693e82014-11-05 18:00:12 +0000444 };
Nils Diewald5c375702015-02-09 20:58:24 +0000445
446
Nils Diewaldf075df02015-03-03 20:34:00 +0000447 @Ignore
Nils Diewald5c375702015-02-09 20:58:24 +0000448 public void indexExampleFocusWithSkip () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000449 KrillIndex ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000450
Nils Diewald5c375702015-02-09 20:58:24 +0000451 // abcabcabac
452 FieldDocument fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000453 fd.addTV("base", "abcabcabac",
Eliza Margaretha6f989202016-10-14 21:48:29 +0200454 // The payload should be ignored
455 "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + // |<>:p#0-10<i>9]" +
margaretha71c66ee2015-12-11 14:39:55 +0100456 "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]"
457 + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]"
458 + "[(3-4)s:a|i:a|_3$<i>3<i>4]"
459 + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
460 + "[(5-6)s:c|i:c|_5$<i>5<i>6]"
461 + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
462 + "[(7-8)s:b|i:b|_7$<i>7<i>8]"
463 + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
464 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000465 ki.addDoc(fd);
Nils Diewaldcd226862015-02-11 22:27:45 +0000466 fd = new FieldDocument();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000467 fd.addTV("base", "gbcgbcgbgc",
margaretha71c66ee2015-12-11 14:39:55 +0100468 "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>10<i>9]"
469 + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]"
470 + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]"
471 + "[(3-4)s:g|i:g|_3$<i>3<i>4]"
472 + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
473 + "[(5-6)s:c|i:c|_5$<i>5<i>6]"
474 + "[(6-7)s:g|i:g|_6$<i>6<i>7]"
475 + "[(7-8)s:b|i:b|_7$<i>7<i>8]"
476 + "[(8-9)s:g|i:g|_8$<i>8<i>9]"
477 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000478 ki.addDoc(fd);
Nils Diewaldcd226862015-02-11 22:27:45 +0000479 fd = new FieldDocument();
margaretha71c66ee2015-12-11 14:39:55 +0100480 fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10]"
481 + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
482 + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
483 + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]"
484 + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]"
485 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewald5c375702015-02-09 20:58:24 +0000486 ki.addDoc(fd);
Nils Diewaldcd226862015-02-11 22:27:45 +0000487 fd = new FieldDocument();
488 // contains(<p>, focus(3: contains({2:<s>}, {3:a})))
Nils Diewaldbb33da22015-03-04 16:24:25 +0000489 fd.addTV("base", "acabcabac",
margaretha71c66ee2015-12-11 14:39:55 +0100490 "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>9<i>8]"
491 + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]"
492 + "[(2-3)s:a|i:a|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]"
493 + "[(3-4)s:b|i:b|_3$<i>3<i>4]"
494 + "[(4-5)s:c|i:c|_4$<i>4<i>5]"
495 + "[(5-6)s:a|i:a|_5$<i>5<i>6]"
496 + "[(6-7)s:b|i:b|_6$<i>6<i>7]"
497 + "[(7-8)s:a|i:a|_7$<i>7<i>8]"
498 + "[(8-9)s:c|i:c|_8$<i>8<i>9]");
Nils Diewald5c375702015-02-09 20:58:24 +0000499 ki.addDoc(fd);
500 ki.commit();
501
502 SpanQuery sq;
Nils Diewald884dbcf2015-02-27 17:02:28 +0000503 Result kr;
Nils Diewald2d5f8102015-02-26 21:07:54 +0000504 KrillCollection kc = new KrillCollection(ki);
Nils Diewald5c375702015-02-09 20:58:24 +0000505
506 assertEquals("Documents", 4, kc.numberOf("documents"));
507
Nils Diewaldcd226862015-02-11 22:27:45 +0000508 // within(<p>, focus(3:within({2:<s>}, {3:a})))
Nils Diewaldbb33da22015-03-04 16:24:25 +0000509 sq = new SpanWithinQuery(new SpanElementQuery("base", "p"),
Eliza Margaretha6f989202016-10-14 21:48:29 +0200510 new SpanFocusQuery(new SpanWithinQuery(
511 new SpanClassQuery(new SpanElementQuery("base", "s"),
512 (byte) 2),
513 new SpanClassQuery(
514 new SpanTermQuery(new Term("base", "s:a")),
515 (byte) 3)),
516 (byte) 3));
Nils Diewald5c375702015-02-09 20:58:24 +0000517
Akron60971692016-06-08 12:56:21 +0200518 // fail("Skipping may go horribly wrong! (Known issue)");
Nils Diewaldcd226862015-02-11 22:27:45 +0000519
Akron60971692016-06-08 12:56:21 +0200520 Krill ks = new Krill(sq);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200521 ks.getMeta().setStartIndex(0).setCount((short) 20)
Akron08f4ceb2016-08-03 23:53:32 +0200522 .setContext(new SearchContext(true, (short) 5, true, (short) 5))
523 // .setCollection(kc)
524 ;
Nils Diewaldcd226862015-02-11 22:27:45 +0000525
Akron60971692016-06-08 12:56:21 +0200526 kr = ks.apply(ki);
527 // kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
Nils Diewaldcd226862015-02-11 22:27:45 +0000528
Marc Kupietz613f4322025-09-24 13:59:24 +0200529 assertEquals(
530 "spanContain(<base:p />, focus(3: spanContain({2: <base:s />}, {3: base:s:a})))",
531 kr.getSerialQuery());
Nils Diewald5c375702015-02-09 20:58:24 +0000532 assertEquals(12, kr.getTotalResults());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200533 assertEquals("[a{2:bc{3:a}b}cabac]",
534 kr.getMatch(0).getSnippetBrackets());
535 assertEquals("[ab{2:c{3:a}bcab}ac]",
536 kr.getMatch(1).getSnippetBrackets());
537 assertEquals("[ab{2:cabc{3:a}}bac]",
538 kr.getMatch(2).getSnippetBrackets());
Nils Diewald5c375702015-02-09 20:58:24 +0000539 };
540
Nils Diewaldf399a672013-11-18 17:55:22 +0000541};