blob: 46f8f41c0e3a0be8c302cbf1513e56c78afbbe60 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001import java.util.*;
2import java.io.*;
3
4import org.apache.lucene.util.Version;
5import org.apache.lucene.util.BytesRef;
6import org.apache.lucene.util.Bits;
7
8import static org.junit.Assert.*;
9import org.junit.Test;
10import org.junit.Ignore;
11import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
13
14import de.ids_mannheim.korap.KorapIndex;
15import de.ids_mannheim.korap.KorapQuery;
16import de.ids_mannheim.korap.KorapResult;
17import de.ids_mannheim.korap.query.SpanNextQuery;
Nils Diewaldfdb94d82014-02-13 03:30:06 +000018import de.ids_mannheim.korap.query.SpanMatchModifyClassQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000019import de.ids_mannheim.korap.query.SpanClassQuery;
20import de.ids_mannheim.korap.index.FieldDocument;
21import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
22
23import org.apache.lucene.search.spans.SpanQuery;
Nils Diewaldf3b30ae2013-11-27 17:42:37 +000024import org.apache.lucene.search.spans.SpanOrQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000025import org.apache.lucene.search.spans.SpanTermQuery;
26import org.apache.lucene.index.Term;
27
28// mvn -Dtest=TestWithinIndex#indexExample1 test
29
30// match is shrink and split
31
32@RunWith(JUnit4.class)
33public class TestMatchIndex {
34
35 @Test
36 public void indexExample1 () throws IOException {
37 KorapIndex ki = new KorapIndex();
38
39 // abcabcabac
40 FieldDocument fd = new FieldDocument();
41 fd.addTV("base",
42 "abcabcabac",
43 "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
44 "[(1-2)s:b|i:b|_1#1-2]" +
45 "[(2-3)s:c|i:c|_2#2-3]" +
46 "[(3-4)s:a|i:a|_3#3-4]" +
47 "[(4-5)s:b|i:b|_4#4-5]" +
48 "[(5-6)s:c|i:c|_5#5-6]" +
49 "[(6-7)s:a|i:a|_6#6-7]" +
50 "[(7-8)s:b|i:b|_7#7-8]" +
51 "[(8-9)s:a|i:a|_8#8-9]" +
52 "[(9-10)s:c|i:c|_9#9-10]");
53 ki.addDoc(fd);
54
55 ki.commit();
56
57 SpanQuery sq;
58 KorapResult kr;
59
60 sq = new SpanNextQuery(
61 new SpanTermQuery(new Term("base", "s:b")),
62 new SpanClassQuery(
63 new SpanTermQuery(new Term("base", "s:a"))
64 )
65 );
66 kr = ki.search(sq, (short) 10);
67
68 assertEquals("totalResults", 1, kr.totalResults());
69 assertEquals("StartPos (0)", 7, kr.match(0).startPos);
70 assertEquals("EndPos (0)", 9, kr.match(0).endPos);
71 assertEquals("SnippetBrackets (0)", "... bcabca[b{a}]c", kr.match(0).snippetBrackets());
72
Nils Diewald3caa00d2013-12-13 02:24:04 +000073 assertEquals("Test no 'more' context", "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><span class=\"match\">b<em class=\"class-0 level-0\">a</em></span><span class=\"context-right\">c</span>", kr.match(0).snippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +000074
Nils Diewaldfdb94d82014-02-13 03:30:06 +000075 sq = new SpanMatchModifyClassQuery(
Nils Diewaldf399a672013-11-18 17:55:22 +000076 new SpanNextQuery(
77 new SpanTermQuery(new Term("base", "s:b")),
78 new SpanClassQuery(
79 new SpanTermQuery(new Term("base", "s:a"))
80 )
81 )
82 );
83 kr = ki.search(sq, (short) 10);
84
85 assertEquals("totalResults", 1, kr.totalResults());
86 assertEquals("StartPos (0)", 8, kr.match(0).startPos);
87 assertEquals("EndPos (0)", 9, kr.match(0).endPos);
88 assertEquals("SnippetBrackets (0)", "... cabcab[a]c", kr.match(0).snippetBrackets());
89
Nils Diewaldfdb94d82014-02-13 03:30:06 +000090 sq = new SpanMatchModifyClassQuery(
Nils Diewaldf399a672013-11-18 17:55:22 +000091 new SpanNextQuery(
92 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
93 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)
94 ), (byte) 3
95 );
96
97 kr = ki.search(sq, (short) 10);
98
99 assertEquals("totalResults", 3, kr.totalResults());
100 assertEquals("StartPos (0)", 1, kr.match(0).startPos);
101 assertEquals("EndPos (0)", 2, kr.match(0).endPos);
102 assertEquals("SnippetBrackets (0)", "a[b]cabcab ...", kr.match(0).snippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000103
Nils Diewald3caa00d2013-12-13 02:24:04 +0000104 assertEquals("<span class=\"context-left\">a</span><span class=\"match\">b</span><span class=\"context-right\">cabcab<span class=\"more\"></span></span>", kr.match(0).snippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000105
Nils Diewaldf399a672013-11-18 17:55:22 +0000106 assertEquals("StartPos (1)", 4, kr.match(1).startPos);
107 assertEquals("EndPos (1)", 5, kr.match(1).endPos);
108 assertEquals("SnippetBrackets (1)", "abca[b]cabac", kr.match(1).snippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000109
Nils Diewald3caa00d2013-12-13 02:24:04 +0000110 assertEquals("<span class=\"context-left\">abca</span><span class=\"match\">b</span><span class=\"context-right\">cabac</span>", kr.match(1).snippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000111
Nils Diewaldf399a672013-11-18 17:55:22 +0000112 assertEquals("StartPos (2)", 7, kr.match(2).startPos);
113 assertEquals("EndPos (2)", 8, kr.match(2).endPos);
114 assertEquals("SnippetBrackets (2)", "... bcabca[b]ac", kr.match(2).snippetBrackets());
115
116
117
118 // abcabcabac
Nils Diewaldfdb94d82014-02-13 03:30:06 +0000119 sq = new SpanMatchModifyClassQuery(
Nils Diewaldf399a672013-11-18 17:55:22 +0000120 new SpanNextQuery(
121 new SpanTermQuery(new Term("base", "s:a")),
122 new SpanClassQuery(
123 new SpanNextQuery(
124 new SpanTermQuery(new Term("base", "s:b")),
125 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")))
126 ), (byte) 2
127 )), (byte) 2);
128
129 kr = ki.search(sq, (short) 10);
130
131 // System.err.println(kr.toJSON());
132
133 assertEquals("totalResults", 1, kr.totalResults());
134 assertEquals("SnippetBrackets (0)", "... bcabca[b{a}]c", kr.match(0).snippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000135
Nils Diewald3caa00d2013-12-13 02:24:04 +0000136 assertEquals("SnippetHTML (0) 1", "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><span class=\"match\">b<em class=\"class-0 level-0\">a</em></span><span class=\"context-right\">c</span>", kr.match(0).snippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +0000137
138 // Offset tokens
139 kr = ki.search(sq, 0, (short) 10, true, (short) 2, true, (short) 2);
140 assertEquals("totalResults", 1, kr.totalResults());
141 assertEquals("SnippetBrackets (0)", "... ca[b{a}]c", kr.match(0).snippetBrackets());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000142
143
144
Nils Diewaldf399a672013-11-18 17:55:22 +0000145 // Offset Characters
146 kr = ki.search(sq, 0, (short) 10, false, (short) 1, false, (short) 0);
147 assertEquals("totalResults", 1, kr.totalResults());
148 assertEquals("SnippetBrackets (0)", "... a[b{a}] ...", kr.match(0).snippetBrackets());
149
Nils Diewald3caa00d2013-12-13 02:24:04 +0000150 assertEquals("SnippetHTML (0) 2", "<span class=\"context-left\"><span class=\"more\"></span>a</span><span class=\"match\">b<em class=\"class-0 level-0\">a</em></span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.match(0).snippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000151
Nils Diewaldf399a672013-11-18 17:55:22 +0000152 // System.err.println(kr.toJSON());
153
Nils Diewaldfdb94d82014-02-13 03:30:06 +0000154 sq = new SpanMatchModifyClassQuery(
Nils Diewaldf399a672013-11-18 17:55:22 +0000155 new SpanNextQuery(
156 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 1),
157 new SpanClassQuery(new SpanTermQuery(new Term("base", "s:c")), (byte) 2)
158 ), (byte) 3
159 );
160
161 kr = ki.search(sq, (short) 10);
162
163 assertEquals("totalResults", 2, kr.totalResults());
164 assertEquals("StartPos (0)", 1, kr.match(0).startPos);
165 assertEquals("EndPos (0)", 3, kr.match(0).endPos);
166 assertEquals("SnippetBrackets (0)", "a[{1:b}{2:c}]abcaba ...", kr.match(0).snippetBrackets());
167 assertEquals("StartPos (1)", 4, kr.match(1).startPos);
168 assertEquals("EndPos (1)", 6, kr.match(1).endPos);
169 assertEquals("SnippetBrackets (1)", "abca[{1:b}{2:c}]abac", kr.match(1).snippetBrackets());
170
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000171 assertEquals("Document count", 1, ki.numberOf("base", "documents"));
172 assertEquals("Token count", 10, ki.numberOf("base", "t"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000173
174
Nils Diewaldfdb94d82014-02-13 03:30:06 +0000175 sq = new SpanMatchModifyClassQuery(
Nils Diewaldf399a672013-11-18 17:55:22 +0000176 new SpanNextQuery(
177 new SpanTermQuery(new Term("base", "s:a")),
178 new SpanClassQuery(
179 new SpanNextQuery(
180 new SpanTermQuery(new Term("base", "s:b")),
181 new SpanTermQuery(new Term("base", "s:c"))
182 )
183 )
184 )
185 );
186
187 kr = ki.search(sq, (short) 2);
188
189 assertEquals("totalResults", 2, kr.totalResults());
190 assertEquals("StartPos (0)", 1, kr.match(0).startPos);
191 assertEquals("EndPos (0)", 3, kr.match(0).endPos);
192 assertEquals("SnippetBrackets (0)", "a[bc]abcaba ...", kr.match(0).snippetBrackets());
193 assertEquals("StartPos (1)", 4, kr.match(1).startPos);
194 assertEquals("EndPos (1)", 6, kr.match(1).endPos);
195 assertEquals("SnippetBrackets (1)", "abca[bc]abac", kr.match(1).snippetBrackets());
196
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000197 assertEquals(1, ki.numberOf("base", "documents"));
198 assertEquals(10, ki.numberOf("base", "t"));
Nils Diewaldf399a672013-11-18 17:55:22 +0000199 };
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000200
201
202 @Test
203 public void indexExample2 () throws IOException {
204 KorapIndex ki = new KorapIndex();
205
206 // abcabcabac
207 FieldDocument fd = new FieldDocument();
208 fd.addTV("base",
209 "abcabcabac",
210 "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
211 "[(1-2)s:b|i:b|_1#1-2]" +
212 "[(2-3)s:c|i:c|_2#2-3]" +
213 "[(3-4)s:a|i:a|_3#3-4]" +
214 "[(4-5)s:b|i:b|_4#4-5]" +
215 "[(5-6)s:c|i:c|_5#5-6]" +
216 "[(6-7)s:a|i:a|_6#6-7]" +
217 "[(7-8)s:b|i:b|_7#7-8]" +
218 "[(8-9)s:a|i:a|_8#8-9]" +
219 "[(9-10)s:c|i:c|_9#9-10]");
220 ki.addDoc(fd);
221
222 ki.commit();
223
224 SpanQuery sq;
225 KorapResult kr;
226
227 // No contexts:
228 sq = new SpanOrQuery(
229 new SpanTermQuery(new Term("base", "s:a")),
230 new SpanTermQuery(new Term("base", "s:c"))
231 );
232 kr = ki.search(sq, (short) 20);
233
234 assertEquals("totalResults", 7, kr.totalResults());
Nils Diewald3caa00d2013-12-13 02:24:04 +0000235 assertEquals("SnippetBrackets (0)", "<span class=\"context-left\"></span><span class=\"match\">a</span><span class=\"context-right\">bcabca<span class=\"more\"></span></span>", kr.match(0).snippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000236 assertEquals("SnippetBrackets (0)", "[a]bcabca ...", kr.match(0).snippetBrackets());
237
238 assertEquals("SnippetBrackets (1)", "ab[c]abcaba ...", kr.match(1).snippetBrackets());
Nils Diewald3caa00d2013-12-13 02:24:04 +0000239 assertEquals("SnippetBrackets (1)", "<span class=\"context-left\">ab</span><span class=\"match\">c</span><span class=\"context-right\">abcaba<span class=\"more\"></span></span>", kr.match(1).snippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000240
241 assertEquals("SnippetBrackets (6)", "... abcaba[c]", kr.match(6).snippetBrackets());
Nils Diewald3caa00d2013-12-13 02:24:04 +0000242 assertEquals("SnippetBrackets (6)", "<span class=\"context-left\"><span class=\"more\"></span>abcaba</span><span class=\"match\">c</span><span class=\"context-right\"></span>", kr.match(6).snippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000243
244
245 kr = ki.search(sq, 0, (short) 20, true, (short) 0, true, (short) 0);
246
247 assertEquals("totalResults", 7, kr.totalResults());
248 assertEquals("SnippetBrackets (0)", "[a] ...", kr.match(0).snippetBrackets());
Nils Diewald3caa00d2013-12-13 02:24:04 +0000249 assertEquals("SnippetHTML (0)", "<span class=\"context-left\"></span><span class=\"match\">a</span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.match(0).snippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000250
251 assertEquals("SnippetBrackets (1)", "... [c] ...", kr.match(1).snippetBrackets());
Nils Diewald3caa00d2013-12-13 02:24:04 +0000252 assertEquals("SnippetHTML (1)", "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\">c</span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.match(1).snippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000253
254 assertEquals("SnippetBrackets (6)", "... [c]", kr.match(6).snippetBrackets());
Nils Diewald3caa00d2013-12-13 02:24:04 +0000255 assertEquals("SnippetBrackets (6)", "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\">c</span><span class=\"context-right\"></span>", kr.match(6).snippetHTML());
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000256 };
Nils Diewald3ef9a472013-12-02 16:06:09 +0000257
258
259 @Test
260 public void indexExample3 () throws IOException {
261 KorapIndex ki = new KorapIndex();
262
263 // abcabcabac
264 FieldDocument fd = new FieldDocument();
265 fd.addTV("base",
266 "abcabcabac",
267 "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
268 "[(1-2)s:b|i:b|_1#1-2]" +
269 "[(2-3)s:c|i:c|_2#2-3]" +
270 "[(3-4)s:a|i:a|_3#3-4]" +
271 "[(4-5)s:b|i:b|_4#4-5]" +
272 "[(5-6)s:c|i:c|_5#5-6]" +
273 "[(6-7)s:a|i:a|_6#6-7]" +
274 "[(7-8)s:b|i:b|_7#7-8]" +
275 "[(8-9)s:a|i:a|_8#8-9]" +
276 "[(9-10)s:c|i:c|_9#9-10]");
277 ki.addDoc(fd);
278
279 ki.commit();
280
281 KorapResult kr;
282
283 KorapQuery kq = new KorapQuery("base");
284
285 SpanQuery sq = kq._(1,kq.seq(kq.seg("s:b")).append(kq.seg("s:a")).append(kq._(2,kq.seg("s:c")))).toQuery();
286
287 kr = ki.search(sq, 0, (short) 20, true, (short) 2, true, (short) 5);
288
289 assertEquals("totalResults", 1, kr.totalResults());
290 assertEquals("SnippetBrackets (0)", "... ca[{1:ba{2:c}}]", kr.match(0).snippetBrackets());
291 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000292};