blob: eba418d5e8933b5bdfedd7232af08fbfbd661ca9 [file] [log] [blame]
Eliza Margarethaa2603fa2014-01-22 10:59:25 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
13
14import de.ids_mannheim.korap.KorapIndex;
15import de.ids_mannheim.korap.KorapResult;
Eliza Margarethad4693462014-03-17 13:16:18 +000016import de.ids_mannheim.korap.query.DistanceConstraint;
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000017import de.ids_mannheim.korap.query.SpanDistanceQuery;
18import de.ids_mannheim.korap.query.SpanElementQuery;
19import de.ids_mannheim.korap.query.SpanSegmentQuery;
20
21@RunWith(JUnit4.class)
Eliza Margarethadb292872014-02-03 09:36:43 +000022public class TestDistanceIndex {
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000023 KorapResult kr;
24 KorapIndex ki;
25
Eliza Margaretha9738c392014-02-03 17:04:53 +000026 private FieldDocument createFieldDoc0() {
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000027 FieldDocument fd = new FieldDocument();
28 fd.addString("ID", "doc-0");
29 fd.addTV("base",
30 "text",
31 "[(0-1)s:b|s:c|_1#0-1]" +
32 "[(1-2)s:b|_2#1-2]" +
33 "[(2-3)s:c|_3#2-3]" +
34 "[(3-4)s:c|_4#3-4]" +
35 "[(4-5)s:d|_5#4-5]" +
36 "[(5-6)s:d|_6#5-6]");
37 return fd;
38 }
39
Eliza Margaretha9738c392014-02-03 17:04:53 +000040 private FieldDocument createFieldDoc1(){
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000041 FieldDocument fd = new FieldDocument();
42 fd.addString("ID", "doc-1");
43 fd.addTV("base",
44 "text",
45 "[(0-1)s:c|_1#0-1]" +
46 "[(1-2)s:e|_2#1-2]" +
47 "[(2-3)s:c|_3#2-3|<>:y#2-4$<i>4]" +
48 "[(3-4)s:c|_4#3-4|<>:x#3-7$<i>7]" +
49 "[(4-5)s:d|_5#4-5|<>:y#4-6$<i>6]" +
50 "[(5-6)s:c|_6#5-6|<>:y#5-8$<i>8]" +
51 "[(6-7)s:d|_7#6-7]" +
52 "[(7-8)s:e|_8#7-8|<>:x#7-9$<i>9]" +
53 "[(8-9)s:e|_9#8-9|<>:x#8-10$<i>10]" +
54 "[(9-10)s:d|_10#9-10]");
55 return fd;
56 }
57
Eliza Margaretha9738c392014-02-03 17:04:53 +000058 private FieldDocument createFieldDoc2() {
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000059 FieldDocument fd = new FieldDocument();
60 fd.addString("ID", "doc-2");
61 fd.addTV("base",
62 "text",
63 "[(0-1)s:b|_1#0-1]" +
64 "[(1-2)s:b|_2#1-2]" +
65 "[(2-3)s:d|_3#2-3]" +
66 "[(3-4)s:e|_4#3-4]" +
67 "[(4-5)s:d|_5#4-5]" +
68 "[(5-6)s:e|_6#5-6]");
69 return fd;
70 }
71
Eliza Margarethad4693462014-03-17 13:16:18 +000072 private SpanQuery createQuery(String x, String y, int min, int max, boolean isOrdered){
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000073 SpanQuery sq = new SpanDistanceQuery(
74 new SpanTermQuery(new Term("base",x)),
75 new SpanTermQuery(new Term("base",y)),
Eliza Margarethad4693462014-03-17 13:16:18 +000076 new DistanceConstraint(min, max, isOrdered, false),
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000077 true
78 );
79 return sq;
80 }
81
Eliza Margaretha9738c392014-02-03 17:04:53 +000082 private SpanQuery createElementQuery(String x, String y, int min, int max, boolean isOrdered){
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000083 SpanQuery sq = new SpanDistanceQuery(
84 new SpanElementQuery("base",x),
85 new SpanElementQuery("base",y),
Eliza Margarethad4693462014-03-17 13:16:18 +000086 new DistanceConstraint(min, max, isOrdered, false),
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000087 true
88 );
89 return sq;
90 }
91
92 /** - Intersection
93 * - Multiple occurrences in the same doc
94 * - hasMoreFirstSpans = false for the current secondspan
95 * */
96 @Test
97 public void testCase1() throws IOException{
98 ki = new KorapIndex();
99 ki.addDoc(createFieldDoc0());
100 ki.commit();
101 SpanQuery sq;
102 // ---- Distance 0 to 1
Eliza Margarethadb292872014-02-03 09:36:43 +0000103 sq = createQuery("s:b","s:c",0,1,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000104 kr = ki.search(sq, (short) 10);
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000105// System.out.println(sq);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000106 assertEquals(kr.getTotalResults(), 2);
107 assertEquals(0, kr.getMatch(0).startPos);
108 assertEquals(1, kr.getMatch(0).endPos);
109 assertEquals(1, kr.getMatch(1).startPos);
110 assertEquals(3, kr.getMatch(1).endPos);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000111
112 // ---- Distance 2 to 2
Eliza Margarethadb292872014-02-03 09:36:43 +0000113 sq = createQuery("s:b","s:c",2,2,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000114 kr = ki.search(sq, (short) 10);
115
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000116 assertEquals(kr.getTotalResults(), 2);
117 assertEquals(0, kr.getMatch(0).startPos);
118 assertEquals(3, kr.getMatch(0).endPos);
119 assertEquals(1, kr.getMatch(1).startPos);
120 assertEquals(4, kr.getMatch(1).endPos);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000121
122 // ---- Distance 2 to 3
Eliza Margarethadb292872014-02-03 09:36:43 +0000123 sq = createQuery("s:b","s:c",2,3,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000124 kr = ki.search(sq, (short) 10);
125
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000126 assertEquals(kr.getTotalResults(), 3);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000127
128 ki.close();
129 }
130
131 /** - Check candidate list:
132 * - CandidateList should not contain firstspans that are too far from
133 * the current secondspan
134 * - Add new candidates
135 * */
136 @Test
137 public void testCase2() throws IOException{
138 ki = new KorapIndex();
139 ki.addDoc(createFieldDoc1());
140 ki.commit();
141
142 // ---- Distance 1 to 3
143 // Candidate list for the current secondspan, is empty
Eliza Margarethadb292872014-02-03 09:36:43 +0000144 SpanQuery sq = createQuery("s:c","s:d",1,3,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000145 kr = ki.search(sq, (short) 10);
146
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000147 assertEquals((long) 4, kr.getTotalResults());
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000148 assertEquals(2, kr.getMatch(0).startPos);
149 assertEquals(5, kr.getMatch(0).endPos);
150 assertEquals(3, kr.getMatch(2).startPos);
151 assertEquals(7, kr.getMatch(2).endPos);
152
153 ki.addDoc(createFieldDoc0());
154 ki.commit();
155
156 // ---- Distance 3 to 3
157 // Candidate list is empty, but there are secondspans in the other doc
Eliza Margarethadb292872014-02-03 09:36:43 +0000158 sq = createQuery("s:c","s:d",3,3,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000159 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000160 assertEquals((long) 2, kr.getTotalResults());
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000161
162 ki.close();
163 }
164
165 /** - Ensure the same document
166 * - Multiple matches in multiple documents and atomic indices
167 * */
168 @Test
169 public void testCase3() throws IOException{
170 ki = new KorapIndex();
171 ki.addDoc(createFieldDoc0());
172 ki.commit();
173 ki.addDoc(createFieldDoc2());
174 ki.addDoc(createFieldDoc1());
175 ki.commit();
176
177 SpanQuery sq;
Eliza Margarethadb292872014-02-03 09:36:43 +0000178 sq = createQuery("s:c","s:d",3,3,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000179 kr = ki.search(sq, (short) 10);
180
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000181 assertEquals(kr.getTotalResults(), 2);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000182 }
183
184 /** - Firstspan.next() is in the other doc, but there is
185 * still a secondspans in the same doc
186 * - hasMoreFirstSpan and secondspans.next() are true,
187 * but ensureSameDoc() = false
188 * */
189 @Test
190 public void testCase4() throws IOException{
191 ki = new KorapIndex();
192 ki.addDoc(createFieldDoc0());
193 ki.commit();
194 ki.addDoc(createFieldDoc2());
195 ki.addDoc(createFieldDoc1());
196 ki.commit();
197
198 // ---- Distance 1 to 2
Eliza Margarethadb292872014-02-03 09:36:43 +0000199 SpanQuery sq = createQuery("s:b","s:c",1,2,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000200 kr = ki.search(sq, (short) 10);
201
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000202 assertEquals(kr.getTotalResults(), 3);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000203 assertEquals(0, kr.getMatch(0).startPos);
204 assertEquals(3, kr.getMatch(0).endPos);
205 assertEquals(1, kr.getMatch(1).startPos);
206 assertEquals(3, kr.getMatch(1).endPos);
207 assertEquals(1, kr.getMatch(2).startPos);
208 assertEquals(4, kr.getMatch(2).endPos);
209 ki.close();
210 }
211
212 /** ElementQueries */
213 @Test
214 public void testCase5() throws IOException{
215 ki = new KorapIndex();
216 ki.addDoc(createFieldDoc1());
217 ki.commit();
218
219 // Intersection ---- Distance 0:0
Eliza Margarethadb292872014-02-03 09:36:43 +0000220 SpanQuery sq = createElementQuery("x","y",0,0,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000221 kr = ki.search(sq, (short) 10);
222
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000223 assertEquals(kr.getTotalResults(), 4);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000224 assertEquals(2, kr.getMatch(0).startPos);
225 assertEquals(7, kr.getMatch(0).endPos);
226 assertEquals(3, kr.getMatch(1).startPos);
227 assertEquals(7, kr.getMatch(1).endPos);
228 assertEquals(3, kr.getMatch(2).startPos);
229 assertEquals(8, kr.getMatch(2).endPos);
230
231 // Next to ---- Distance 1:1
Eliza Margarethadb292872014-02-03 09:36:43 +0000232 sq = createElementQuery("y","x",1,1,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000233 kr = ki.search(sq, (short) 10);
234
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000235 assertEquals(kr.getTotalResults(), 1);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000236 assertEquals(5, kr.getMatch(0).startPos);
237 assertEquals(10, kr.getMatch(0).endPos);
238
239 // ---- Distance 1:2
Eliza Margarethadb292872014-02-03 09:36:43 +0000240 sq = createElementQuery("y","x",1,2,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000241 kr = ki.search(sq, (short) 10);
242
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000243 assertEquals(kr.getTotalResults(), 2);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000244 assertEquals(4, kr.getMatch(0).startPos);
245 assertEquals(9, kr.getMatch(0).endPos);
246 assertEquals(5, kr.getMatch(1).startPos);
247 assertEquals(10, kr.getMatch(1).endPos);
248
249 // The same element type ---- Distance 1:2
Eliza Margarethadb292872014-02-03 09:36:43 +0000250 sq = createElementQuery("x","x",1,2,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000251 kr = ki.search(sq, (short) 10);
252
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000253 assertEquals(kr.getTotalResults(), 2);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000254 }
255
256 /** Skip to */
257 @Test
258 public void testCase6() throws IOException{
259 ki = new KorapIndex();
260 ki.addDoc(createFieldDoc2());
261 ki.addDoc(createFieldDoc1());
262 ki.commit();
263
Eliza Margarethadb292872014-02-03 09:36:43 +0000264 SpanQuery firstClause = createQuery("s:d", "s:e", 3, 4,true);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000265 kr = ki.search(firstClause, (short) 10);
266
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000267 assertEquals(kr.getTotalResults(), 3);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000268 assertEquals(0, kr.getMatch(0).getLocalDocID());
269 assertEquals(2, kr.getMatch(0).startPos);
270 assertEquals(6, kr.getMatch(0).endPos);
271 assertEquals(1, kr.getMatch(1).getLocalDocID());
272 assertEquals(4, kr.getMatch(1).startPos);
273 assertEquals(8, kr.getMatch(1).endPos);
274 assertEquals(4, kr.getMatch(2).startPos);
275 assertEquals(9, kr.getMatch(2).endPos);
276
277 // The secondspans is skipped to doc# of the current firstspans
278 SpanQuery sq = new SpanSegmentQuery(
Eliza Margarethadb292872014-02-03 09:36:43 +0000279 createQuery("s:d","s:e",3,4,true),
280 createElementQuery("y","x",1,2,true)
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000281 );
282 kr = ki.search(sq, (short) 10);
283
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000284 assertEquals(kr.getTotalResults(), 1);
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000285 assertEquals(4, kr.getMatch(0).startPos);
286 assertEquals(9, kr.getMatch(0).endPos);
287 }
288
Eliza Margaretha8f9934c2014-02-06 13:04:32 +0000289 /** Same tokens */
290 @Test
291 public void testCase7() throws IOException{
292 ki = new KorapIndex();
293 ki.addDoc(createFieldDoc1());
294 ki.commit();
295
Eliza Margaretha198e4ef2014-02-10 13:50:50 +0000296 SpanQuery sq = createQuery("s:c", "s:c", 1, 2,true);
297 kr = ki.search(sq, (short) 10);
Eliza Margaretha8f9934c2014-02-06 13:04:32 +0000298
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000299 assertEquals(kr.getTotalResults(), 3);
Eliza Margaretha8f9934c2014-02-06 13:04:32 +0000300 assertEquals(0, kr.getMatch(0).startPos);
301 assertEquals(3, kr.getMatch(0).endPos);
302 assertEquals(2, kr.getMatch(1).startPos);
303 assertEquals(4, kr.getMatch(1).endPos);
304 assertEquals(3, kr.getMatch(2).startPos);
305 assertEquals(6, kr.getMatch(2).endPos);
Eliza Margaretha198e4ef2014-02-10 13:50:50 +0000306
307 ki.addDoc(createFieldDoc2());
308 ki.commit();
309
310 // with order
311 sq = createQuery("s:e", "s:e", 1, 1,true);
312 kr = ki.search(sq, (short) 10);
313
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000314 assertEquals(kr.getTotalResults(), 1);
Eliza Margaretha198e4ef2014-02-10 13:50:50 +0000315
316 // without order
317 sq = createQuery("s:e", "s:e", 1, 1,false);
318 kr = ki.search(sq, (short) 10);
319
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000320 assertEquals(kr.getTotalResults(), 2);
Eliza Margaretha198e4ef2014-02-10 13:50:50 +0000321 }
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000322
323}