blob: b57e8884edf5a96b0756ec948d47e1af948b42c5 [file] [log] [blame]
Eliza Margarethae18d62e2014-02-11 11:30:48 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.*;
4
5import java.io.IOException;
6import java.util.ArrayList;
7import java.util.List;
8
9import org.apache.lucene.index.Term;
10import org.apache.lucene.search.spans.SpanQuery;
11import org.apache.lucene.search.spans.SpanTermQuery;
12import org.junit.Test;
13import org.junit.runner.RunWith;
14import org.junit.runners.JUnit4;
15
16import de.ids_mannheim.korap.KorapIndex;
17import de.ids_mannheim.korap.KorapResult;
18import de.ids_mannheim.korap.query.DistanceConstraint;
Eliza Margaretha5f606922014-02-18 15:33:49 +000019import de.ids_mannheim.korap.query.SpanDistanceQuery;
Eliza Margarethae18d62e2014-02-11 11:30:48 +000020import de.ids_mannheim.korap.query.SpanElementQuery;
21import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
22import de.ids_mannheim.korap.query.SpanNextQuery;
23
24@RunWith(JUnit4.class)
25public class TestMultipleDistanceIndex {
26
27 private KorapIndex ki;
28 private KorapResult kr;
29
30 public SpanQuery createQuery(String x, String y, List<DistanceConstraint>
31 constraints, boolean isOrdered){
32
33 SpanQuery sx = new SpanTermQuery(new Term("base",x));
34 SpanQuery sy = new SpanTermQuery(new Term("base",y));
35
36 return new SpanMultipleDistanceQuery(sx, sy, constraints, isOrdered, true);
37 }
38
Eliza Margarethad4693462014-03-17 13:16:18 +000039 public DistanceConstraint createConstraint(String unit, int min, int max,
40 boolean isOrdered, boolean exclusion){
41
Eliza Margarethae18d62e2014-02-11 11:30:48 +000042 if (unit.equals("w")){
Eliza Margarethad4693462014-03-17 13:16:18 +000043 return new DistanceConstraint(min, max,isOrdered,exclusion);
Eliza Margaretha5f606922014-02-18 15:33:49 +000044 }
45 return new DistanceConstraint(new SpanElementQuery("base", unit),
Eliza Margarethad4693462014-03-17 13:16:18 +000046 min, max, isOrdered, exclusion);
Eliza Margarethae18d62e2014-02-11 11:30:48 +000047 }
48
49 private FieldDocument createFieldDoc0() {
50 FieldDocument fd = new FieldDocument();
51 fd.addString("ID", "doc-0");
52 fd.addTV("base",
53 "text",
54 "[(0-1)s:b|_1#0-1|<>:s#0-2$<i>2|<>:p#0-4$<i>4]" +
55 "[(1-2)s:b|s:c|_2#1-2]" +
56 "[(2-3)s:c|_3#2-3|<>:s#2-3$<i>4]" +
57 "[(3-4)s:b|_4#3-4]" +
58 "[(4-5)s:c|_5#4-5|<>:s#4-6$<i>6|<>:p#4-6$<i>6]" +
59 "[(5-6)s:e|_6#5-6]");
60 return fd;
61 }
62
63 private FieldDocument createFieldDoc1() {
64 FieldDocument fd = new FieldDocument();
65 fd.addString("ID", "doc-1");
66 fd.addTV("base",
67 "text",
68 "[(0-1)s:c|_1#0-1|<>:s#0-2$<i>2|<>:p#0-4$<i>4]" +
69 "[(1-2)s:c|s:e|_2#1-2]" +
70 "[(2-3)s:e|_3#2-3|<>:s#2-3$<i>4]" +
71 "[(3-4)s:c|_4#3-4]" +
72 "[(4-5)s:e|_5#4-5|<>:s#4-6$<i>6|<>:p#4-6$<i>6]" +
73 "[(5-6)s:c|_6#5-6]");
74 return fd;
75 }
76
Eliza Margarethae18d62e2014-02-11 11:30:48 +000077 private FieldDocument createFieldDoc2() {
78 FieldDocument fd = new FieldDocument();
79 fd.addString("ID", "doc-2");
80 fd.addTV("base",
81 "text",
82 "[(0-1)s:b|_1#0-1|<>:s#0-2$<i>2|<>:p#0-4$<i>4]" +
83 "[(1-2)s:b|s:e|_2#1-2]" +
84 "[(2-3)s:e|_3#2-3|<>:s#2-3$<i>4]" +
85 "[(3-4)s:b|s:c|_4#3-4]" +
86 "[(4-5)s:e|_5#4-5|<>:s#4-6$<i>6|<>:p#4-6$<i>6]" +
Eliza Margarethae335beb2014-02-27 12:56:14 +000087 "[(5-6)s:d|_6#5-6]" +
88 "[(6-7)s:b|_7#6-7|<>:s#6-7$<i>7|<>:p#6-7$<i>7]" );
Eliza Margarethae18d62e2014-02-11 11:30:48 +000089 return fd;
90 }
91
92 private FieldDocument createFieldDoc3() {
93 FieldDocument fd = new FieldDocument();
94 fd.addString("ID", "doc-0");
95 fd.addTV("base",
96 "text",
97 "[(0-1)s:b|_1#0-1|<>:s#0-2$<i>2|<>:p#0-4$<i>4]" +
98 "[(1-2)s:b|s:c|_2#1-2]" +
Eliza Margaretha5f606922014-02-18 15:33:49 +000099 "[(2-3)s:c|_3#2-3|<>:s#2-3$<i>5]" +
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000100 "[(3-4)s:b|_4#3-4]" +
Eliza Margaretha5f606922014-02-18 15:33:49 +0000101 "[(4-5)s:b|_5#4-5]" +
102 "[(5-6)s:b|_6#5-6]" + // gap
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000103 "[(6-7)s:c|_7#6-7|<>:s#6-7$<i>7|<>:p#6-7$<i>7]" );
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000104 return fd;
105 }
106
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000107 /** Unordered, same sentence
108 * */
109 @Test
110 public void testCase1() throws IOException {
111 ki = new KorapIndex();
112 ki.addDoc(createFieldDoc0());
113 ki.commit();
114
115 List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
Eliza Margarethad4693462014-03-17 13:16:18 +0000116 constraints.add(createConstraint("w", 0, 2, false, false));
117 constraints.add(createConstraint("s", 0, 0, false, false));
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000118
119 SpanQuery mdq;
Eliza Margarethad4693462014-03-17 13:16:18 +0000120 mdq = createQuery("s:b", "s:c", constraints,false);
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000121 kr = ki.search(mdq, (short) 10);
Nils Diewald0f5a2792014-02-13 17:20:36 +0000122 // System.out.println(mdq);
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000123
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000124 assertEquals((long) 3, kr.getTotalResults());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000125 assertEquals(0, kr.getMatch(0).getStartPos());
126 assertEquals(2, kr.getMatch(0).getEndPos());
127 assertEquals(1, kr.getMatch(1).getStartPos());
128 assertEquals(2, kr.getMatch(1).getEndPos());
129 assertEquals(2, kr.getMatch(2).getStartPos());
130 assertEquals(4, kr.getMatch(2).getEndPos());
131 }
132
133 /** Ordered
134 * Unordered
135 * Two constraints
136 * Three constraints
137 * */
138 @Test
139 public void testCase2() throws IOException {
140 ki = new KorapIndex();
141 ki.addDoc(createFieldDoc0());
142 ki.commit();
143
Eliza Margarethad4693462014-03-17 13:16:18 +0000144 // Ordered - two constraints
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000145 List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
Eliza Margarethad4693462014-03-17 13:16:18 +0000146 constraints.add(createConstraint("w", 0, 2, true, false));
147 constraints.add(createConstraint("s", 1, 1, true, false));
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000148
Eliza Margarethad4693462014-03-17 13:16:18 +0000149 SpanQuery mdq;
150 mdq = createQuery("s:b", "s:c", constraints,true);
151 kr = ki.search(mdq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000152 assertEquals((long) 3, kr.getTotalResults());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000153 assertEquals(0, kr.getMatch(0).getStartPos());
154 assertEquals(3, kr.getMatch(0).getEndPos());
155 assertEquals(1, kr.getMatch(1).getStartPos());
156 assertEquals(3, kr.getMatch(1).getEndPos());
157 assertEquals(3, kr.getMatch(2).getStartPos());
Eliza Margarethad4693462014-03-17 13:16:18 +0000158 assertEquals(5, kr.getMatch(2).getEndPos());
159
160 // Three constraints
161 constraints.add(createConstraint("p", 0, 0, true, false));
162 mdq = createQuery("s:b", "s:c", constraints,true);
163 kr = ki.search(mdq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000164 assertEquals((long) 2, kr.getTotalResults());
Eliza Margarethad4693462014-03-17 13:16:18 +0000165
166
167 // Unordered - two constraints
168 constraints.clear();
169 constraints.add(createConstraint("w", 0, 2, false, false));
170 constraints.add(createConstraint("s", 1, 1, false, false));
171
172 mdq = createQuery("s:c", "s:b", constraints,false);
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000173 kr = ki.search(mdq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000174 assertEquals((long) 4, kr.getTotalResults());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000175 assertEquals(1, kr.getMatch(2).getStartPos());
176 assertEquals(4, kr.getMatch(2).getEndPos());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000177
Eliza Margarethad4693462014-03-17 13:16:18 +0000178 // Three constraints
179 constraints.add(createConstraint("p", 0, 0, false, false));
180 mdq = createQuery("s:b", "s:c", constraints,false);
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000181 kr = ki.search(mdq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000182 assertEquals((long) 3, kr.getTotalResults());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000183
184 }
185
186 /** Multiple documents
187 * Ensure same doc (inner term span)
188 * */
189 @Test
190 public void testCase3() throws IOException {
191 ki = new KorapIndex();
192 ki.addDoc(createFieldDoc0());
193 ki.addDoc(createFieldDoc1());
194 ki.addDoc(createFieldDoc2());
195 ki.commit();
196
197 List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
Eliza Margarethad4693462014-03-17 13:16:18 +0000198 constraints.add(createConstraint("w", 1, 2, false, false));
199 constraints.add(createConstraint("s", 1, 2, false, false));
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000200
201 SpanQuery mdq;
Eliza Margarethad4693462014-03-17 13:16:18 +0000202 mdq = createQuery("s:b", "s:e", constraints,false);
Eliza Margaretha5f606922014-02-18 15:33:49 +0000203 kr = ki.search(mdq, (short) 10);
204
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000205 assertEquals((long) 5, kr.getTotalResults());
Eliza Margaretha5f606922014-02-18 15:33:49 +0000206 assertEquals(3, kr.getMatch(0).getStartPos());
207 assertEquals(6, kr.getMatch(0).getEndPos());
208 assertEquals(2, kr.getMatch(1).getLocalDocID());
209 assertEquals(1, kr.getMatch(2).getStartPos());
210 assertEquals(4, kr.getMatch(2).getEndPos());
211 assertEquals(3, kr.getMatch(3).getStartPos());
212 assertEquals(5, kr.getMatch(3).getEndPos());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000213 assertEquals(4, kr.getMatch(4).getStartPos());
214 assertEquals(7, kr.getMatch(4).getEndPos());
215
216// System.out.print(kr.getTotalResults()+"\n");
217// for (int i=0; i< kr.getTotalResults(); i++){
218// System.out.println(
219// kr.match(i).getLocalDocID()+" "+
220// kr.match(i).startPos + " " +
221// kr.match(i).endPos
222// );
223// }
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000224
225 }
226
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000227 /** Skip to
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000228 * */
229 @Test
230 public void testCase4() throws IOException {
231 ki = new KorapIndex();
232 ki.addDoc(createFieldDoc0());
233 ki.addDoc(createFieldDoc3());
234 ki.addDoc(createFieldDoc1());
235 ki.addDoc(createFieldDoc2());
236 ki.commit();
237
238 List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
Eliza Margarethad4693462014-03-17 13:16:18 +0000239 constraints.add(createConstraint("w", 1, 2, false, false));
240 constraints.add(createConstraint("s", 1, 2, false, false));
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000241
242 SpanQuery mdq;
Eliza Margarethad4693462014-03-17 13:16:18 +0000243 mdq = createQuery("s:b", "s:c", constraints,false);
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000244
245 SpanQuery sq = new SpanNextQuery(mdq,
246 new SpanTermQuery(new Term("base","s:e")));
247 kr = ki.search(sq, (short) 10);
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000248
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000249 assertEquals((long) 2, kr.getTotalResults());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000250 assertEquals(3, kr.getMatch(0).getStartPos());
251 assertEquals(6, kr.getMatch(0).getEndPos());
252 assertEquals(3, kr.getMatch(1).getLocalDocID());
253 assertEquals(1, kr.getMatch(1).getStartPos());
254 assertEquals(5, kr.getMatch(1).getEndPos());
255
256 }
257
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000258 /** Same tokens: ordered and unordered yield the same results
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000259 * */
260 @Test
261 public void testCase5() throws IOException {
262 ki = new KorapIndex();
263 ki.addDoc(createFieldDoc0());
264 ki.addDoc(createFieldDoc1());
265 ki.commit();
266
267 List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
Eliza Margarethad4693462014-03-17 13:16:18 +0000268 constraints.add(createConstraint("w", 1, 2, false, false));
269 constraints.add(createConstraint("s", 1, 2, false, false));
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000270
271 SpanQuery mdq;
Eliza Margarethad4693462014-03-17 13:16:18 +0000272 mdq = createQuery("s:c", "s:c", constraints,false);
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000273 kr = ki.search(mdq, (short) 10);
274
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000275 assertEquals((long) 4, kr.getTotalResults());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000276 assertEquals(1, kr.getMatch(0).getStartPos());
277 assertEquals(3, kr.getMatch(0).getEndPos());
278 assertEquals(2, kr.getMatch(1).getStartPos());
279 assertEquals(5, kr.getMatch(1).getEndPos());
280 assertEquals(1, kr.getMatch(2).getLocalDocID());
281 assertEquals(1, kr.getMatch(2).getStartPos());
282 assertEquals(4, kr.getMatch(2).getEndPos());
283 assertEquals(3, kr.getMatch(3).getStartPos());
284 assertEquals(6, kr.getMatch(3).getEndPos());
285
286 }
287
Eliza Margaretha01929182014-02-19 11:48:59 +0000288 /** Exclusion
289 * Gaps
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000290 * */
291 @Test
292 public void testCase6() throws IOException {
293 ki = new KorapIndex();
Eliza Margaretha5f606922014-02-18 15:33:49 +0000294 ki.addDoc(createFieldDoc3());
Eliza Margaretha01929182014-02-19 11:48:59 +0000295 ki.commit();
296
297 // First constraint - token exclusion
Eliza Margaretha5f606922014-02-18 15:33:49 +0000298 SpanQuery sx = new SpanTermQuery(new Term("base","s:b"));
299 SpanQuery sy = new SpanTermQuery(new Term("base","s:c"));
Eliza Margarethad4693462014-03-17 13:16:18 +0000300
301 DistanceConstraint dc1 = createConstraint("w", 0, 1, false, true);
302 SpanDistanceQuery sq = new SpanDistanceQuery(sx, sy, dc1, true);
Eliza Margaretha5f606922014-02-18 15:33:49 +0000303
304 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000305 assertEquals((long) 1, kr.getTotalResults());
Eliza Margaretha01929182014-02-19 11:48:59 +0000306 // 4-5
Eliza Margaretha5f606922014-02-18 15:33:49 +0000307
Eliza Margaretha01929182014-02-19 11:48:59 +0000308 // Second constraint - element distance
Eliza Margarethad4693462014-03-17 13:16:18 +0000309 DistanceConstraint dc2 = createConstraint("s", 1, 1, false, false);
310 sq = new SpanDistanceQuery(sx, sy, dc2, true);
Eliza Margaretha01929182014-02-19 11:48:59 +0000311 kr = ki.search(sq, (short) 10);
312 // 0-3, 1-3, 1-4, 1-5, 3-7, 4-7
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000313 assertEquals((long) 6, kr.getTotalResults());
Eliza Margaretha5f606922014-02-18 15:33:49 +0000314
Eliza Margarethad4693462014-03-17 13:16:18 +0000315
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000316 List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
Eliza Margarethad4693462014-03-17 13:16:18 +0000317 constraints.add(dc1);
318 constraints.add(dc2);
Eliza Margaretha01929182014-02-19 11:48:59 +0000319
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000320 SpanQuery mdq;
Eliza Margarethad4693462014-03-17 13:16:18 +0000321 mdq = createQuery("s:b", "s:c", constraints,false);
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000322 kr = ki.search(mdq, (short) 10);
323
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000324 assertEquals((long) 2, kr.getTotalResults());
Eliza Margaretha01929182014-02-19 11:48:59 +0000325 assertEquals(1, kr.getMatch(0).getStartPos());
326 assertEquals(5, kr.getMatch(0).getEndPos());
327 assertEquals(4, kr.getMatch(1).getStartPos());
328 assertEquals(7, kr.getMatch(1).getEndPos());
329 }
Eliza Margaretha5f606922014-02-18 15:33:49 +0000330
Eliza Margaretha01929182014-02-19 11:48:59 +0000331
332 /** Exclusion, multiple documents
Eliza Margaretha01929182014-02-19 11:48:59 +0000333 * */
Eliza Margarethae335beb2014-02-27 12:56:14 +0000334 @Test
Eliza Margaretha01929182014-02-19 11:48:59 +0000335 public void testCase7() throws IOException {
Eliza Margarethae335beb2014-02-27 12:56:14 +0000336 ki = new KorapIndex();
Eliza Margaretha01929182014-02-19 11:48:59 +0000337 ki.addDoc(createFieldDoc2());
338 ki.commit();
339
Eliza Margarethae335beb2014-02-27 12:56:14 +0000340 SpanQuery sx = new SpanTermQuery(new Term("base","s:b"));
341 SpanQuery sy = new SpanTermQuery(new Term("base","s:c"));
342 // Second constraint
Eliza Margarethad4693462014-03-17 13:16:18 +0000343 SpanDistanceQuery sq = new SpanDistanceQuery(sx,sy,
344 createConstraint("s", 0, 0, false, true),
345 true);
346 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000347 assertEquals((long) 3, kr.getTotalResults());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000348 // 0-1, 1-2, 6-7
349
Eliza Margarethad4693462014-03-17 13:16:18 +0000350 // Exclusion within the same sentence
Eliza Margaretha01929182014-02-19 11:48:59 +0000351 List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
Eliza Margarethad4693462014-03-17 13:16:18 +0000352 constraints.add(createConstraint("w", 0, 2,false,true));
353 constraints.add(createConstraint("s", 0, 0,false,true));
354
Eliza Margaretha01929182014-02-19 11:48:59 +0000355 SpanQuery mdq;
Eliza Margarethad4693462014-03-17 13:16:18 +0000356 mdq = createQuery("s:b", "s:c", constraints,false);
357 kr = ki.search(mdq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000358 assertEquals((long) 2, kr.getTotalResults());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000359 assertEquals(0, kr.getMatch(0).getStartPos());
360 assertEquals(1, kr.getMatch(0).getEndPos());
361 assertEquals(6, kr.getMatch(1).getStartPos());
362 assertEquals(7, kr.getMatch(1).getEndPos());
363
Eliza Margarethad4693462014-03-17 13:16:18 +0000364
Eliza Margarethae335beb2014-02-27 12:56:14 +0000365 // Third constraint
Eliza Margarethad4693462014-03-17 13:16:18 +0000366 sq = new SpanDistanceQuery(sx, sy,
367 createConstraint("p", 0, 0, false, true),
368 true);
369 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000370 assertEquals((long) 1, kr.getTotalResults());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000371 // 6-7
372
Eliza Margarethad4693462014-03-17 13:16:18 +0000373 constraints.add(createConstraint("p", 0, 0, false, true));
374 mdq = createQuery("s:b", "s:c", constraints,false);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000375 kr = ki.search(mdq, (short) 10);
376
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000377 assertEquals((long) 1, kr.getTotalResults());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000378 assertEquals(6, kr.getMatch(0).getStartPos());
379 assertEquals(7, kr.getMatch(0).getEndPos());
380
381 }
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000382}
383