blob: 39d239212bd31b93d1fe83e42dbd5bb1358a33eb [file] [log] [blame]
Eliza Margarethaa2603fa2014-01-22 10:59:25 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
13
Nils Diewalda14ecd62015-02-26 21:00:20 +000014import de.ids_mannheim.korap.KrillIndex;
Eliza Margarethad4693462014-03-17 13:16:18 +000015import de.ids_mannheim.korap.query.DistanceConstraint;
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000016import de.ids_mannheim.korap.query.SpanDistanceQuery;
17import de.ids_mannheim.korap.query.SpanElementQuery;
18import de.ids_mannheim.korap.query.SpanSegmentQuery;
margaretha71c66ee2015-12-11 14:39:55 +010019import de.ids_mannheim.korap.response.Result;
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000020
21@RunWith(JUnit4.class)
Eliza Margarethadb292872014-02-03 09:36:43 +000022public class TestDistanceIndex {
Nils Diewald884dbcf2015-02-27 17:02:28 +000023 Result kr;
Nils Diewaldbb33da22015-03-04 16:24:25 +000024 KrillIndex ki;
25
26
27 private FieldDocument createFieldDoc0 () {
28 FieldDocument fd = new FieldDocument();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000029 fd.addString("ID", "doc-0");
margaretha71c66ee2015-12-11 14:39:55 +010030 fd.addTV("base", "text", "[(0-1)s:b|s:c|_1$<i>0<i>1]"
31 + "[(1-2)s:b|_2$<i>1<i>2]" + "[(2-3)s:c|_3$<i>2<i>3]"
32 + "[(3-4)s:c|_4$<i>3<i>4]" + "[(4-5)s:d|_5$<i>4<i>5]"
33 + "[(5-6)s:d|_6$<i>5<i>6]");
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000034 return fd;
Nils Diewaldbb33da22015-03-04 16:24:25 +000035 }
36
37
38 private FieldDocument createFieldDoc1 () {
39 FieldDocument fd = new FieldDocument();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000040 fd.addString("ID", "doc-1");
margaretha71c66ee2015-12-11 14:39:55 +010041 fd.addTV("base", "text", "[(0-1)s:c|_1$<i>0<i>1]"
42 + "[(1-2)s:e|_2$<i>1<i>2]"
margaretha4f995582015-12-14 14:14:34 +010043 + "[(2-3)s:c|_3$<i>2<i>3|<>:y$<b>64<i>2<i>4<i>4<b>0]"
44 + "[(3-4)s:c|_4$<i>3<i>4|<>:x$<b>64<i>3<i>7<i>7<b>0]"
45 + "[(4-5)s:d|_5$<i>4<i>5|<>:y$<b>64<i>4<i>6<i>6<b>0]"
46 + "[(5-6)s:c|_6$<i>5<i>6|<>:y$<b>64<i>5<i>8<i>8<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010047 + "[(6-7)s:d|_7$<i>6<i>7]"
margaretha4f995582015-12-14 14:14:34 +010048 + "[(7-8)s:e|_8$<i>7<i>8|<>:x$<b>64<i>7<i>9<i>9<b>0]"
49 + "[(8-9)s:e|_9$<i>8<i>9|<>:x$<b>64<i>8<i>10<i>10<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010050 + "[(9-10)s:d|_10$<i>9<i>10]");
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000051 return fd;
52 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000053
54
55 private FieldDocument createFieldDoc2 () {
56 FieldDocument fd = new FieldDocument();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000057 fd.addString("ID", "doc-2");
margaretha71c66ee2015-12-11 14:39:55 +010058 fd.addTV("base", "text", "[(0-1)s:b|_1$<i>0<i>1]"
59 + "[(1-2)s:b|_2$<i>1<i>2]" + "[(2-3)s:d|_3$<i>2<i>3]"
60 + "[(3-4)s:e|_4$<i>3<i>4]" + "[(4-5)s:d|_5$<i>4<i>5]"
61 + "[(5-6)s:e|_6$<i>5<i>6]");
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000062 return fd;
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000063 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000064
65
66 private SpanQuery createQuery (String x, String y, int min, int max,
67 boolean isOrdered) {
68 SpanQuery sq = new SpanDistanceQuery(new SpanTermQuery(new Term("base",
69 x)), new SpanTermQuery(new Term("base", y)),
70 new DistanceConstraint(min, max, isOrdered, false), true);
71 return sq;
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000072 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000073
74
75 private SpanQuery createElementQuery (String x, String y, int min, int max,
76 boolean isOrdered) {
77 SpanQuery sq = new SpanDistanceQuery(new SpanElementQuery("base", x),
78 new SpanElementQuery("base", y), new DistanceConstraint(min,
79 max, isOrdered, false), true);
80 return sq;
81 }
82
83
84 /**
85 * - Intersection
86 * - Multiple occurrences in the same doc
87 * - hasMoreFirstSpans = false for the current secondspan
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000088 * */
89 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +000090 public void testCase1 () throws IOException {
91 ki = new KrillIndex();
92 ki.addDoc(createFieldDoc0());
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000093 ki.commit();
94 SpanQuery sq;
95 // ---- Distance 0 to 1
Nils Diewaldbb33da22015-03-04 16:24:25 +000096 sq = createQuery("s:b", "s:c", 0, 1, true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000097 kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +000098 // System.out.println(sq);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +000099 assertEquals(kr.getTotalResults(), 2);
100 assertEquals(0, kr.getMatch(0).startPos);
101 assertEquals(1, kr.getMatch(0).endPos);
102 assertEquals(1, kr.getMatch(1).startPos);
103 assertEquals(3, kr.getMatch(1).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000104
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000105 // ---- Distance 2 to 2
Nils Diewaldbb33da22015-03-04 16:24:25 +0000106 sq = createQuery("s:b", "s:c", 2, 2, true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000107 kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000108
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000109 assertEquals(kr.getTotalResults(), 2);
110 assertEquals(0, kr.getMatch(0).startPos);
111 assertEquals(3, kr.getMatch(0).endPos);
112 assertEquals(1, kr.getMatch(1).startPos);
113 assertEquals(4, kr.getMatch(1).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000114
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000115 // ---- Distance 2 to 3
Nils Diewaldbb33da22015-03-04 16:24:25 +0000116 sq = createQuery("s:b", "s:c", 2, 3, true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000117 kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000118
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000119 assertEquals(kr.getTotalResults(), 3);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000120
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000121 ki.close();
122 }
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000123
Nils Diewaldbb33da22015-03-04 16:24:25 +0000124
125 /**
126 * - Check candidate list:
127 * - CandidateList should not contain firstspans that are too far
128 * from
129 * the current secondspan
130 * - Add new candidates
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000131 * */
132 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000133 public void testCase2 () throws IOException {
134 ki = new KrillIndex();
135 ki.addDoc(createFieldDoc1());
136 ki.commit();
137
138 // ---- Distance 1 to 3
139 // Candidate list for the current secondspan, is empty
140 SpanQuery sq = createQuery("s:c", "s:d", 1, 3, true);
141 kr = ki.search(sq, (short) 10);
142
143 assertEquals((long) 4, kr.getTotalResults());
144 assertEquals(2, kr.getMatch(0).startPos);
145 assertEquals(5, kr.getMatch(0).endPos);
146 assertEquals(3, kr.getMatch(2).startPos);
147 assertEquals(7, kr.getMatch(2).endPos);
148
149 ki.addDoc(createFieldDoc0());
150 ki.commit();
151
152 // ---- Distance 3 to 3
153 // Candidate list is empty, but there are secondspans in the other doc
154 sq = createQuery("s:c", "s:d", 3, 3, true);
155 kr = ki.search(sq, (short) 10);
156 assertEquals((long) 2, kr.getTotalResults());
157
158 ki.close();
159 }
160
161
162 /**
163 * - Ensure the same document
164 * - Multiple matches in multiple documents and atomic indices
165 * */
166 @Test
167 public void testCase3 () throws IOException {
168 ki = new KrillIndex();
169 ki.addDoc(createFieldDoc0());
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000170 ki.commit();
171 ki.addDoc(createFieldDoc2());
172 ki.addDoc(createFieldDoc1());
173 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000174
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000175 SpanQuery sq;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000176 sq = createQuery("s:c", "s:d", 3, 3, true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000177 kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000178
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000179 assertEquals(kr.getTotalResults(), 2);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000180 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000181
182
183 /**
184 * - Firstspan.next() is in the other doc, but there is
185 * still a secondspans in the same doc
186 * - hasMoreFirstSpan and secondspans.next() are true,
187 * but ensureSameDoc() = false
188 * */
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000189 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000190 public void testCase4 () throws IOException {
191 ki = new KrillIndex();
192 ki.addDoc(createFieldDoc0());
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000193 ki.commit();
194 ki.addDoc(createFieldDoc2());
195 ki.addDoc(createFieldDoc1());
196 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000197
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000198 // ---- Distance 1 to 2
Nils Diewaldbb33da22015-03-04 16:24:25 +0000199 SpanQuery sq = createQuery("s:b", "s:c", 1, 2, true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000200 kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000201
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000202 assertEquals(kr.getTotalResults(), 3);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000203 assertEquals(0, kr.getMatch(0).startPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000204 assertEquals(3, kr.getMatch(0).endPos);
205 assertEquals(1, kr.getMatch(1).startPos);
206 assertEquals(3, kr.getMatch(1).endPos);
207 assertEquals(1, kr.getMatch(2).startPos);
208 assertEquals(4, kr.getMatch(2).endPos);
209 ki.close();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000210 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000211
212
213 /** ElementQueries */
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000214 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000215 public void testCase5 () throws IOException {
216 ki = new KrillIndex();
217 ki.addDoc(createFieldDoc1());
218 ki.commit();
219
220 // Intersection ---- Distance 0:0
221 SpanQuery sq = createElementQuery("x", "y", 0, 0, true);
222 kr = ki.search(sq, (short) 10);
223
224 assertEquals(kr.getTotalResults(), 4);
225 assertEquals(2, kr.getMatch(0).startPos);
226 assertEquals(7, kr.getMatch(0).endPos);
227 assertEquals(3, kr.getMatch(1).startPos);
228 assertEquals(7, kr.getMatch(1).endPos);
229 assertEquals(3, kr.getMatch(2).startPos);
230 assertEquals(8, kr.getMatch(2).endPos);
231
232 // Next to ---- Distance 1:1
233 sq = createElementQuery("y", "x", 1, 1, true);
234 kr = ki.search(sq, (short) 10);
235
236 assertEquals(kr.getTotalResults(), 1);
237 assertEquals(5, kr.getMatch(0).startPos);
238 assertEquals(10, kr.getMatch(0).endPos);
239
240 // ---- Distance 1:2
241 sq = createElementQuery("y", "x", 1, 2, true);
242 kr = ki.search(sq, (short) 10);
243
244 assertEquals(kr.getTotalResults(), 2);
245 assertEquals(4, kr.getMatch(0).startPos);
246 assertEquals(9, kr.getMatch(0).endPos);
247 assertEquals(5, kr.getMatch(1).startPos);
248 assertEquals(10, kr.getMatch(1).endPos);
249
250 // The same element type ---- Distance 1:2
251 sq = createElementQuery("x", "x", 1, 2, true);
252 kr = ki.search(sq, (short) 10);
253
254 assertEquals(kr.getTotalResults(), 2);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000255 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000256
257
258 /** Skip to */
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000259 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000260 public void testCase6 () throws IOException {
261 ki = new KrillIndex();
262 ki.addDoc(createFieldDoc2());
263 ki.addDoc(createFieldDoc1());
264 ki.commit();
265
266 SpanQuery firstClause = createQuery("s:d", "s:e", 3, 4, true);
267 kr = ki.search(firstClause, (short) 10);
268
269 assertEquals(kr.getTotalResults(), 3);
270 assertEquals(0, kr.getMatch(0).getLocalDocID());
271 assertEquals(2, kr.getMatch(0).startPos);
272 assertEquals(6, kr.getMatch(0).endPos);
273 assertEquals(1, kr.getMatch(1).getLocalDocID());
274 assertEquals(4, kr.getMatch(1).startPos);
275 assertEquals(8, kr.getMatch(1).endPos);
276 assertEquals(4, kr.getMatch(2).startPos);
277 assertEquals(9, kr.getMatch(2).endPos);
278
279 // The secondspans is skipped to doc# of the current firstspans
280 SpanQuery sq = new SpanSegmentQuery(createQuery("s:d", "s:e", 3, 4,
281 true), createElementQuery("y", "x", 1, 2, true));
282 kr = ki.search(sq, (short) 10);
283
284 assertEquals(kr.getTotalResults(), 1);
285 assertEquals(4, kr.getMatch(0).startPos);
286 assertEquals(9, kr.getMatch(0).endPos);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000287 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000288
289
Eliza Margaretha8f9934c2014-02-06 13:04:32 +0000290 /** Same tokens */
291 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000292 public void testCase7 () throws IOException {
293 ki = new KrillIndex();
294 ki.addDoc(createFieldDoc1());
295 ki.commit();
296
297 SpanQuery sq = createQuery("s:c", "s:c", 1, 2, true);
298 kr = ki.search(sq, (short) 10);
299
300 assertEquals(kr.getTotalResults(), 3);
Eliza Margaretha8f9934c2014-02-06 13:04:32 +0000301 assertEquals(0, kr.getMatch(0).startPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000302 assertEquals(3, kr.getMatch(0).endPos);
303 assertEquals(2, kr.getMatch(1).startPos);
304 assertEquals(4, kr.getMatch(1).endPos);
305 assertEquals(3, kr.getMatch(2).startPos);
306 assertEquals(6, kr.getMatch(2).endPos);
307
308 ki.addDoc(createFieldDoc2());
309 ki.commit();
310
311 // with order
312 sq = createQuery("s:e", "s:e", 1, 1, true);
313 kr = ki.search(sq, (short) 10);
314
315 assertEquals(kr.getTotalResults(), 1);
316
317 // without order
318 sq = createQuery("s:e", "s:e", 1, 1, false);
319 kr = ki.search(sq, (short) 10);
320
321 assertEquals(kr.getTotalResults(), 2);
322 }
323
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000324}