blob: 1cb78c99a9456616b1479d29374c7e7e3b188de6 [file] [log] [blame]
Eliza Margaretha9738c392014-02-03 17:04:53 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
13
Nils Diewalda14ecd62015-02-26 21:00:20 +000014import de.ids_mannheim.korap.KrillIndex;
Nils Diewald884dbcf2015-02-27 17:02:28 +000015import de.ids_mannheim.korap.response.Result;
Eliza Margarethad4693462014-03-17 13:16:18 +000016import de.ids_mannheim.korap.query.DistanceConstraint;
Eliza Margaretha9738c392014-02-03 17:04:53 +000017import de.ids_mannheim.korap.query.SpanDistanceQuery;
18import de.ids_mannheim.korap.query.SpanElementQuery;
19import de.ids_mannheim.korap.query.SpanNextQuery;
Eliza Margaretha9738c392014-02-03 17:04:53 +000020
21@RunWith(JUnit4.class)
Nils Diewaldbb33da22015-03-04 16:24:25 +000022public class TestUnorderedDistanceIndex {
23
Nils Diewalda14ecd62015-02-26 21:00:20 +000024 private KrillIndex ki;
Nils Diewaldbb33da22015-03-04 16:24:25 +000025 private Result kr;
Eliza Margaretha9738c392014-02-03 17:04:53 +000026
Nils Diewaldbb33da22015-03-04 16:24:25 +000027
28 private FieldDocument createFieldDoc0 () {
29 FieldDocument fd = new FieldDocument();
Eliza Margaretha9738c392014-02-03 17:04:53 +000030 fd.addString("ID", "doc-0");
Nils Diewaldbb33da22015-03-04 16:24:25 +000031 fd.addTV("base", "text", "[(0-1)s:c|_1#0-1]" + "[(1-2)s:e|_2#1-2]"
32 + "[(2-3)s:c|_3#2-3|<>:y#2-4$<i>4]"
33 + "[(3-4)s:c|_4#3-4|<>:x#3-7$<i>7]"
34 + "[(4-5)s:d|_5#4-5|<>:y#4-6$<i>6]"
35 + "[(5-6)s:c|_6#5-6|<>:y#5-8$<i>8]" + "[(6-7)s:d|_7#6-7]"
36 + "[(7-8)s:f|_8#7-8|<>:x#7-9$<i>9]"
37 + "[(8-9)s:e|_9#8-9|<>:x#8-10$<i>10]" + "[(9-10)s:d|_10#9-10]");
Eliza Margaretha9738c392014-02-03 17:04:53 +000038 return fd;
39 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000040
41
42 private FieldDocument createFieldDoc1 () {
43 FieldDocument fd = new FieldDocument();
Eliza Margaretha9738c392014-02-03 17:04:53 +000044 fd.addString("ID", "doc-1");
Nils Diewaldbb33da22015-03-04 16:24:25 +000045 fd.addTV("base", "text", "[(0-1)s:d|_1#0-1]" + "[(1-2)s:c|_2#1-2]"
46 + "[(2-3)s:e|_3#2-3]" + "[(3-4)s:e|_4#3-4]"
47 + "[(4-5)s:d|_5#4-5]" + "[(5-6)s:e|_6#5-6]"
48 + "[(6-7)s:e|_7#6-7]" + "[(7-8)s:c|_8#7-8]"
49 + "[(8-9)s:e|_9#8-9]" + "[(9-10)s:d|_10#9-10]");
Eliza Margaretha9738c392014-02-03 17:04:53 +000050 return fd;
Nils Diewaldbb33da22015-03-04 16:24:25 +000051 }
52
53
54 private FieldDocument createFieldDoc2 () {
55 FieldDocument fd = new FieldDocument();
Eliza Margaretha9738c392014-02-03 17:04:53 +000056 fd.addString("ID", "doc-2");
Nils Diewaldbb33da22015-03-04 16:24:25 +000057 fd.addTV("base", "text", "[(0-1)s:f|_1#0-1]" + "[(1-2)s:c|_2#1-2]"
58 + "[(2-3)s:e|_3#2-3]" + "[(3-4)s:e|_4#3-4]"
59 + "[(4-5)s:d|_5#4-5]" + "[(5-6)s:f|_6#5-6]"
60 + "[(6-7)s:f|_7#6-7]");
Eliza Margaretha9738c392014-02-03 17:04:53 +000061 return fd;
Eliza Margaretha9738c392014-02-03 17:04:53 +000062 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000063
64
65 private SpanQuery createQuery (String x, String y, int min, int max,
66 boolean isOrdered) {
67 SpanQuery sq = new SpanDistanceQuery(new SpanTermQuery(new Term("base",
68 x)), new SpanTermQuery(new Term("base", y)),
69 new DistanceConstraint(min, max, isOrdered, false), true);
70 return sq;
Eliza Margaretha9738c392014-02-03 17:04:53 +000071 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000072
73
74 private SpanQuery createElementQuery (String x, String y, int min, int max,
75 boolean isOrdered) {
76 SpanQuery sq = new SpanDistanceQuery(new SpanElementQuery("base", x),
77 new SpanElementQuery("base", y), new DistanceConstraint(min,
78 max, isOrdered, false), true);
79 return sq;
80 }
81
82
83 /**
84 * One document, multiple occurrences
85 * The first first and second spans are too far from each other
86 * One of the spans ends first
87 * One of the candidate list is empty
Eliza Margaretha9738c392014-02-03 17:04:53 +000088 * */
Eliza Margarethaadedcb62014-02-03 17:21:17 +000089 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +000090 public void testCase1 () throws IOException {
91 //System.out.println("testcase 1");
92 ki = new KrillIndex();
93 ki.addDoc(createFieldDoc0());
94 ki.commit();
95
96 SpanQuery sq = createQuery("s:c", "s:d", 0, 3, false);
97 kr = ki.search(sq, (short) 10);
98
99 assertEquals(kr.getTotalResults(), 5);
Eliza Margarethab0449d02014-02-04 11:54:41 +0000100 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000101
102
103 /**
104 * Multiple documents
105 * Ensure same doc
106 * Both candidate lists are empty, but there is a span left in the
107 * doc
108 * Both candidate lists are empty, but there are more matches in
109 * the doc
Eliza Margarethab0449d02014-02-04 11:54:41 +0000110 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000111 * @throws IOException
Eliza Margarethab0449d02014-02-04 11:54:41 +0000112 * */
113 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000114 public void testCase2 () throws IOException {
115 //System.out.println("testcase 2");
116 ki = new KrillIndex();
117 ki.addDoc(createFieldDoc0());
118 ki.addDoc(createFieldDoc1());
119 ki.commit();
120
121 SpanQuery sq = createQuery("s:c", "s:d", 1, 2, false);
122 kr = ki.search(sq, (short) 10);
123
124 assertEquals(kr.getTotalResults(), 6);
Eliza Margarethaadedcb62014-02-03 17:21:17 +0000125 }
Eliza Margaretha6651fc32014-02-18 14:57:47 +0000126
Eliza Margarethaecf8b592014-09-30 17:08:09 +0000127
Nils Diewaldbb33da22015-03-04 16:24:25 +0000128 /**
129 * Multiple documents
130 * Ensure same Doc
131 *
132 * @throws IOException
133 * */
134 @Test
135 public void testCase3 () throws IOException {
136 //System.out.println("testcase 3");
137 ki = new KrillIndex();
138 ki.addDoc(createFieldDoc0());
139 ki.addDoc(createFieldDoc1());
140 ki.addDoc(createFieldDoc2());
141 ki.commit();
142
143 SpanQuery sq = createQuery("s:e", "s:f", 1, 2, false);
144 kr = ki.search(sq, (short) 10);
145
146 assertEquals(kr.getTotalResults(), 3);
147 assertEquals(0, kr.getMatch(0).getLocalDocID());
148 assertEquals(7, kr.getMatch(0).getStartPos());
149 assertEquals(9, kr.getMatch(0).getEndPos());
150 assertEquals(2, kr.getMatch(1).getLocalDocID());
151 assertEquals(0, kr.getMatch(1).getStartPos());
152 assertEquals(3, kr.getMatch(1).getEndPos());
153 }
154
155
156 /** Skip to */
157 @Test
158 public void testCase4 () throws IOException {
159 //System.out.println("testcase 4");
160 ki = new KrillIndex();
161 ki.addDoc(createFieldDoc0());
162 ki.addDoc(createFieldDoc1());
163 ki.addDoc(createFieldDoc2());
164 ki.commit();
165
166 SpanQuery sq = new SpanNextQuery(
167 createQuery("s:d", "s:e", 1, 2, false), new SpanTermQuery(
168 new Term("base", "s:f")));
169
170 kr = ki.search(sq, (short) 10);
171 assertEquals(kr.getTotalResults(), 2);
172 assertEquals(2, kr.getMatch(0).getLocalDocID());
173 assertEquals(2, kr.getMatch(0).getStartPos());
174 assertEquals(6, kr.getMatch(0).getEndPos());
175 assertEquals(3, kr.getMatch(1).getStartPos());
176 assertEquals(6, kr.getMatch(1).getEndPos());
177 }
178
179
180 /** ElementQueries */
181 @Test
182 public void testCase5 () throws IOException {
183 ki = new KrillIndex();
184 ki.addDoc(createFieldDoc0());
185 ki.commit();
186
187 // Intersection ---- Distance 0:0
188 //System.out.println("Intersection ---- Distance 0:0");
189 SpanQuery sq = createElementQuery("x", "y", 0, 0, false);
190 kr = ki.search(sq, (short) 10);
191
192 assertEquals(kr.getTotalResults(), 4);
193 assertEquals(2, kr.getMatch(0).startPos);
194 assertEquals(7, kr.getMatch(0).endPos);
195 assertEquals(3, kr.getMatch(1).startPos);
196 assertEquals(7, kr.getMatch(1).endPos);
197 assertEquals(3, kr.getMatch(2).startPos);
198 assertEquals(8, kr.getMatch(2).endPos);
199
200 // Next to ---- Distance 1:1
201 //System.out.println("Next to ---- Distance 1:1");
202 sq = createElementQuery("x", "y", 1, 1, false);
203 kr = ki.search(sq, (short) 10);
204
205 assertEquals(kr.getTotalResults(), 1);
206 assertEquals(5, kr.getMatch(0).startPos);
207 assertEquals(10, kr.getMatch(0).endPos);
208
209 // ---- Distance 1:2
210 //System.out.println("---- Distance 1:2");
211 sq = createElementQuery("x", "y", 1, 2, false);
212 kr = ki.search(sq, (short) 10);
213
214 assertEquals(kr.getTotalResults(), 2);
215 assertEquals(4, kr.getMatch(0).startPos);
216 assertEquals(9, kr.getMatch(0).endPos);
217 assertEquals(5, kr.getMatch(1).startPos);
218 assertEquals(10, kr.getMatch(1).endPos);
219
220 }
221
222
223 /**
224 * The same element type
225 *
226 * WARNING:
227 * This kind of query is not appropriate for an unordered distance
228 * span query.
229 * Instead, it must be an ordered distance span query. Such an
230 * unordered distance
231 * span query yields "redundant results" because matches are
232 * searched for each
233 * child span.
234 * */
235 @Test
236 public void testCase6 () throws IOException {
237 ki = new KrillIndex();
238 ki.addDoc(createFieldDoc0());
239 ki.commit();
240
241 //---- Distance 1:2
242 SpanQuery sq = createElementQuery("x", "x", 1, 2, false);
243 kr = ki.search(sq, (short) 10);
244
245 assertEquals(kr.getTotalResults(), 4);
246 }
247
248
249 /**
250 * Nested distance queries
251 * */
252 @Test
253 public void testCase7 () throws IOException {
254 //System.out.println("testcase 7");
255 ki = new KrillIndex();
256 ki.addDoc(createFieldDoc0());
257 ki.addDoc(createFieldDoc1());
258 ki.commit();
259
260 SpanQuery sq = createQuery("s:c", "s:d", 1, 2, false);
261 SpanQuery sq2 = new SpanDistanceQuery(sq, new SpanTermQuery(new Term(
262 "base", "s:e")), new DistanceConstraint(1, 2, true, false),
263 true);
264 kr = ki.search(sq2, (short) 10);
265 assertEquals(kr.getTotalResults(), 3);
266 assertEquals(5, kr.getMatch(0).getStartPos());
267 assertEquals(9, kr.getMatch(0).getEndPos());
268 assertEquals(1, kr.getMatch(1).getLocalDocID());
269 assertEquals(0, kr.getMatch(1).getStartPos());
270 assertEquals(3, kr.getMatch(1).getEndPos());
271 assertEquals(0, kr.getMatch(2).getStartPos());
272 assertEquals(4, kr.getMatch(2).getEndPos());
273 }
274
275
276 /**
277 * Multiple NextSpans in the same first span position
278 * */
279 @Test
280 public void testCase8 () throws IOException {
281 ki = new KrillIndex();
282 ki.addDoc(createFieldDoc1());
283 ki.commit();
284 SpanQuery sq = new SpanNextQuery(new SpanTermQuery(new Term("base",
285 "s:d")), createQuery("s:c", "s:e", 1, 2, false));
286 kr = ki.search(sq, (short) 10);
287
288 assertEquals(kr.getTotalResults(), 3);
289 assertEquals(0, kr.getMatch(1).getStartPos());
290 assertEquals(4, kr.getMatch(1).getEndPos());
291
292 }
293
Eliza Margaretha9738c392014-02-03 17:04:53 +0000294}