blob: b3e030137db9086d5520eaf55573543f5c6b7775 [file] [log] [blame]
Eliza Margaretha01929182014-02-19 11:48:59 +00001package de.ids_mannheim.korap.index;
2
margaretha4f995582015-12-14 14:14:34 +01003import static org.junit.Assert.assertEquals;
Nils Diewaldf399a672013-11-18 17:55:22 +00004
margaretha4f995582015-12-14 14:14:34 +01005import java.io.IOException;
Nils Diewaldf399a672013-11-18 17:55:22 +00006
margaretha4f995582015-12-14 14:14:34 +01007import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000010import org.junit.Test;
Nils Diewaldf399a672013-11-18 17:55:22 +000011import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
13
Nils Diewalda14ecd62015-02-26 21:00:20 +000014import de.ids_mannheim.korap.KrillIndex;
Nils Diewaldf399a672013-11-18 17:55:22 +000015import de.ids_mannheim.korap.query.SpanClassQuery;
margaretha4f995582015-12-14 14:14:34 +010016import de.ids_mannheim.korap.query.SpanNextQuery;
17import de.ids_mannheim.korap.response.Result;
Nils Diewaldf399a672013-11-18 17:55:22 +000018
19// mvn -Dtest=TestWithinIndex#indexExample1 test
20
21@RunWith(JUnit4.class)
22public class TestClassIndex {
23
24 @Test
25 public void indexExample1 () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +000026 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +000027
Nils Diewaldbb33da22015-03-04 16:24:25 +000028 // abcabcabac
29 FieldDocument fd = new FieldDocument();
margaretha4f995582015-12-14 14:14:34 +010030 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
31 + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
32 + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
33 + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
34 + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
35 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewaldbb33da22015-03-04 16:24:25 +000036 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +000037
Nils Diewaldbb33da22015-03-04 16:24:25 +000038 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +000039
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 SpanQuery sq;
41 Result kr;
Nils Diewaldf399a672013-11-18 17:55:22 +000042
Nils Diewaldbb33da22015-03-04 16:24:25 +000043 sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:b")),
44 new SpanTermQuery(new Term("base", "s:a")));
45 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +000046
Nils Diewaldbb33da22015-03-04 16:24:25 +000047 assertEquals("totalResults", kr.getTotalResults(), 1);
48 assertEquals("StartPos (0)", 7, kr.getMatch(0).startPos);
49 assertEquals("EndPos (0)", 9, kr.getMatch(0).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +020050 assertEquals("SnippetBrackets (0)", "... bcabca[[ba]]c",
51 kr.getMatch(0).getSnippetBrackets());
52 assertEquals("SnippetHTML (0)",
Nils Diewaldbb33da22015-03-04 16:24:25 +000053 "<span class=\"context-left\"><span class=\"more\">"
Akronf05fde62016-08-03 23:46:17 +020054 + "</span>bcabca</span><span class=\"match\"><mark>ba</mark></span><span class=\"context-right"
Eliza Margaretha6f989202016-10-14 21:48:29 +020055 + "\">c</span>",
56 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +000057
Nils Diewaldbb33da22015-03-04 16:24:25 +000058 sq = new SpanTermQuery(new Term("base", "s:b"));
59 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +000060
Nils Diewaldbb33da22015-03-04 16:24:25 +000061 assertEquals("totalResults", kr.getTotalResults(), 3);
62 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
63 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +020064 assertEquals("SnippetBrackets (0)", "a[[b]]cabcab ...",
65 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +000066
67
Eliza Margaretha6f989202016-10-14 21:48:29 +020068 assertEquals("SnippetHTML (0)",
Akronf05fde62016-08-03 23:46:17 +020069 "<span class=\"context-left\">a</span><span class=\"match\"><mark>"
70 + "b</mark></span><span class=\"context-right\">cabcab<span class=\"more\"></span></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +000071 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +000072
Nils Diewaldbb33da22015-03-04 16:24:25 +000073 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
74 assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +020075 assertEquals("SnippetBrackets (1)", "abca[[b]]cabac",
76 kr.getMatch(1).getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +000077 assertEquals("StartPos (2)", 7, kr.getMatch(2).startPos);
78 assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +020079 assertEquals("SnippetBrackets (2)", "... bcabca[[b]]ac",
80 kr.getMatch(2).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +000081
Nils Diewaldbb33da22015-03-04 16:24:25 +000082 sq = new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")));
83 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +000084
Nils Diewaldbb33da22015-03-04 16:24:25 +000085 assertEquals("totalResults", kr.getTotalResults(), 3);
86 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
87 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +020088 assertEquals("snippetBrackets (0)", "a[[{1:b}]]cabcab ...",
89 kr.getMatch(0).getSnippetBrackets());
90 assertEquals("snippetHTML (0)",
Akronf05fde62016-08-03 23:46:17 +020091 "<span class=\"context-left\">a</span><span class=\"match\"><mark>"
92 + "<mark class=\"class-1 level-0\">b</mark></mark></span><span class=\"context-right\">cabcab<span "
Eliza Margaretha6f989202016-10-14 21:48:29 +020093 + "class=\"more\"></span></span>",
94 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +000095
Nils Diewaldbb33da22015-03-04 16:24:25 +000096 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
97 assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +020098 assertEquals("snippetBrackets (1)", "abca[[{1:b}]]cabac",
99 kr.getMatch(1).getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000100
101 assertEquals("StartPos (2)", 7, kr.getMatch(2).startPos);
102 assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200103 assertEquals("snippetBrackets (2)", "... bcabca[[{1:b}]]ac",
104 kr.getMatch(2).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000105
106
Nils Diewaldbb33da22015-03-04 16:24:25 +0000107 sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")),
108 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
109 (byte) 1));
Nils Diewaldf399a672013-11-18 17:55:22 +0000110
Nils Diewaldbb33da22015-03-04 16:24:25 +0000111 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000112
Nils Diewaldbb33da22015-03-04 16:24:25 +0000113 assertEquals("totalResults", kr.getTotalResults(), 3);
114 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
115 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200116 assertEquals("SnippetBrackets (0)", "[[a{1:b}]]cabcab ...",
117 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000118
Eliza Margaretha6f989202016-10-14 21:48:29 +0200119 assertEquals("SnippetHTML (0)",
Akronf05fde62016-08-03 23:46:17 +0200120 "<span class=\"context-left\"></span><span class=\"match\"><mark>a<mark class=\"class-1 level-0\">b</mark></mark></span><span class=\"context-right\">cabcab<span class=\"more\"></span></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000121 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +0000122
Nils Diewaldbb33da22015-03-04 16:24:25 +0000123 assertEquals("StartPos (1)", 3, kr.getMatch(1).startPos);
124 assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200125 assertEquals("SnippetBrackets (1)", "abc[[a{1:b}]]cabac",
126 kr.getMatch(1).getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000127 assertEquals("StartPos (2)", 6, kr.getMatch(2).startPos);
128 assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200129 assertEquals("SnippetBrackets (2)", "abcabc[[a{1:b}]]ac",
130 kr.getMatch(2).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000131
132
Nils Diewaldbb33da22015-03-04 16:24:25 +0000133 // abcabcabac
Eliza Margaretha6f989202016-10-14 21:48:29 +0200134 sq = new SpanNextQuery(
135 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")),
136 (byte) 2),
137 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
138 (byte) 3));
Nils Diewaldf399a672013-11-18 17:55:22 +0000139
Nils Diewaldbb33da22015-03-04 16:24:25 +0000140 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000141
Nils Diewaldbb33da22015-03-04 16:24:25 +0000142 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
143 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200144 assertEquals("SnippetBrackets (0)", "[[{2:a}{3:b}]]cabcab ...",
145 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000146 assertEquals("StartPos (1)", 3, kr.getMatch(1).startPos);
147 assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200148 assertEquals("SnippetBrackets (1)", "abc[[{2:a}{3:b}]]cabac",
149 kr.getMatch(1).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000150
Nils Diewaldbb33da22015-03-04 16:24:25 +0000151 assertEquals("StartPos (2)", 6, kr.getMatch(2).startPos);
152 assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200153 assertEquals("SnippetBrackets (2)", "abcabc[[{2:a}{3:b}]]ac",
154 kr.getMatch(2).getSnippetBrackets());
Nils Diewaldf399a672013-11-18 17:55:22 +0000155
Nils Diewaldbb33da22015-03-04 16:24:25 +0000156 // abcabcabac
157 sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")),
Eliza Margaretha6f989202016-10-14 21:48:29 +0200158 new SpanClassQuery(new SpanNextQuery(
159 new SpanTermQuery(new Term("base", "s:b")),
160 new SpanClassQuery(
161 new SpanTermQuery(new Term("base", "s:a")))),
162 (byte) 2));
Nils Diewaldf399a672013-11-18 17:55:22 +0000163
Nils Diewaldbb33da22015-03-04 16:24:25 +0000164 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000165
Nils Diewaldbb33da22015-03-04 16:24:25 +0000166 assertEquals("totalResults", kr.getTotalResults(), 1);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200167 assertEquals("SnippetBrackets (0)", "abcabc[[a{2:b{1:a}}]]c",
168 kr.getMatch(0).getSnippetBrackets());
169 assertEquals("SnippetHTML (0)",
Akronf05fde62016-08-03 23:46:17 +0200170 "<span class=\"context-left\">abcabc</span><span class=\"match\"><mark>a<mark class=\"class-2 level-0\">b<mark class=\"class-1 level-1\">a</mark></mark></mark></span><span class=\"context-right\">c</span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000171 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +0000172
Nils Diewaldbb33da22015-03-04 16:24:25 +0000173 // Offset tokens
174 kr = ki.search(sq, 0, (short) 10, true, (short) 2, true, (short) 2);
175 assertEquals("totalResults", kr.getTotalResults(), 1);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200176 assertEquals("SnippetBrackets (0)", "... bc[[a{2:b{1:a}}]]c",
177 kr.getMatch(0).getSnippetBrackets());
178 assertEquals("SnippetHTML (0)",
Akronf05fde62016-08-03 23:46:17 +0200179 "<span class=\"context-left\"><span class=\"more\"></span>bc</span><span class=\"match\"><mark>a<mark class=\"class-2 level-0\">b<mark class=\"class-1 level-1\">a</mark></mark></mark></span><span class=\"context-right\">c</span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000180 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +0000181
Nils Diewaldbb33da22015-03-04 16:24:25 +0000182 // Offset Characters
183 kr = ki.search(sq, 0, (short) 10, false, (short) 2, false, (short) 2);
184 assertEquals("totalResults", kr.getTotalResults(), 1);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200185 assertEquals("SnippetBrackets (0)", "... bc[[a{2:b{1:a}}]]c",
186 kr.getMatch(0).getSnippetBrackets());
187 assertEquals("SnippetHTML (0)",
Akronf05fde62016-08-03 23:46:17 +0200188 "<span class=\"context-left\"><span class=\"more\"></span>bc</span><span class=\"match\"><mark>a<mark class=\"class-2 level-0\">b<mark class=\"class-1 level-1\">a</mark></mark></mark></span><span class=\"context-right\">c</span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000189 kr.getMatch(0).getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +0000190
191
Nils Diewaldbb33da22015-03-04 16:24:25 +0000192 // System.err.println(kr.toJSON());
Nils Diewaldf399a672013-11-18 17:55:22 +0000193
Eliza Margaretha6f989202016-10-14 21:48:29 +0200194 sq = new SpanNextQuery(
195 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")),
196 (byte) 1),
197 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:c")),
198 (byte) 2));
Nils Diewaldf399a672013-11-18 17:55:22 +0000199
Nils Diewaldbb33da22015-03-04 16:24:25 +0000200 kr = ki.search(sq, (short) 10);
Nils Diewaldf399a672013-11-18 17:55:22 +0000201
Nils Diewaldbb33da22015-03-04 16:24:25 +0000202 assertEquals("totalResults", kr.getTotalResults(), 2);
203 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
204 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
205 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
206 assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
207
208 assertEquals("Document count", 1, ki.numberOf("base", "documents"));
209 assertEquals("Token count", 10, ki.numberOf("base", "t"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000210
211
Nils Diewaldbb33da22015-03-04 16:24:25 +0000212 sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")),
Eliza Margaretha6f989202016-10-14 21:48:29 +0200213 new SpanClassQuery(new SpanNextQuery(
214 new SpanTermQuery(new Term("base", "s:b")),
215 new SpanTermQuery(new Term("base", "s:c")))));
Nils Diewaldf399a672013-11-18 17:55:22 +0000216
Nils Diewaldbb33da22015-03-04 16:24:25 +0000217 kr = ki.search(sq, (short) 2);
Nils Diewaldf399a672013-11-18 17:55:22 +0000218
Nils Diewaldbb33da22015-03-04 16:24:25 +0000219 assertEquals("totalResults", kr.getTotalResults(), 2);
220 assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
221 assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
222 assertEquals("StartPos (1)", 3, kr.getMatch(1).startPos);
223 assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
224
225 assertEquals(1, ki.numberOf("base", "documents"));
226 assertEquals(10, ki.numberOf("base", "t"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000227 };
228
229
230 @Test
231 public void indexExample2 () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000232 KrillIndex ki = new KrillIndex();
Nils Diewaldf399a672013-11-18 17:55:22 +0000233
Nils Diewaldbb33da22015-03-04 16:24:25 +0000234 // abcabcabac
235 FieldDocument fd = new FieldDocument();
margaretha4f995582015-12-14 14:14:34 +0100236 fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
237 + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
238 + "[(3-4)s:a|i:a|_3$<i>3<i>4|<>:x$<b>64<i>3<i>7<i>7]"
239 + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]"
240 + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]"
Eliza Margaretha6f989202016-10-14 21:48:29 +0200241 + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
242 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000243 ki.addDoc(fd);
Nils Diewaldf399a672013-11-18 17:55:22 +0000244
Nils Diewaldbb33da22015-03-04 16:24:25 +0000245 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +0000246
Nils Diewaldbb33da22015-03-04 16:24:25 +0000247 SpanQuery sq;
248 Result kr;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000249 /*
Akron08f4ceb2016-08-03 23:53:32 +0200250 sq = new SpanNextQuery(
251 new SpanTermQuery(new Term("base", "s:c")),
252 new SpanElementQuery("base", "x")
253 );
254
255 kr = ki.search(sq, (short) 10);
256 assertEquals("ab[cabca]bac", kr.getMatch(0).getSnippetBrackets());
257 */
258 /*
259 System.err.println();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000260 sq = new SpanNextQuery(
261 new SpanElementQuery("base", "x"),
262 new SpanTermQuery(new Term("base", "s:b"))
263 );
264
265 kr = ki.search(sq, (short) 10);
266 assertEquals("abc[abcab}ac]", kr.getMatch(0).getSnippetBrackets());
267 System.err.println();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200268
Nils Diewaldbb33da22015-03-04 16:24:25 +0000269 */
270
271 /*
272 sq = new SpanWithinQuery(
273 new SpanElementQuery("base", "x"),
274 new SpanClassQuery(
275 new SpanTermQuery(new Term("base", "s:a"))
276 )
277 );
Eliza Margaretha6f989202016-10-14 21:48:29 +0200278
Nils Diewaldbb33da22015-03-04 16:24:25 +0000279 // new SpanTermQuery(new Term("base", "s:a")),
280 // new SpanClassQuery(
281 // )
282 // );
Eliza Margaretha6f989202016-10-14 21:48:29 +0200283
Nils Diewaldbb33da22015-03-04 16:24:25 +0000284 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000285
286 }
287};