blob: c08535ed95bbff831459e93524fb31a8099c1bb4 [file] [log] [blame]
Eliza Margarethae18d62e2014-02-11 11:30:48 +00001package de.ids_mannheim.korap.index;
2
Akrond6f5f592018-06-19 15:58:16 +02003import static de.ids_mannheim.korap.TestSimple.*;
margaretha71c66ee2015-12-11 14:39:55 +01004import static org.junit.Assert.assertEquals;
Eliza Margarethae18d62e2014-02-11 11:30:48 +00005
6import java.io.IOException;
margaretha9880aec2017-08-31 16:58:07 +02007import java.nio.file.Paths;
Eliza Margarethae18d62e2014-02-11 11:30:48 +00008import java.util.ArrayList;
9import java.util.List;
10
11import org.apache.lucene.index.Term;
margarethaf14f3802017-08-30 16:34:01 +020012import org.apache.lucene.search.WildcardQuery;
13import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
Eliza Margarethae18d62e2014-02-11 11:30:48 +000014import org.apache.lucene.search.spans.SpanQuery;
15import org.apache.lucene.search.spans.SpanTermQuery;
margaretha9880aec2017-08-31 16:58:07 +020016import org.apache.lucene.store.MMapDirectory;
17import org.junit.Ignore;
Eliza Margarethae18d62e2014-02-11 11:30:48 +000018import org.junit.Test;
19import org.junit.runner.RunWith;
20import org.junit.runners.JUnit4;
21
margaretha953fd012017-09-04 16:33:39 +020022import com.fasterxml.jackson.databind.JsonNode;
23import com.fasterxml.jackson.databind.ObjectMapper;
24
25import de.ids_mannheim.korap.Krill;
26import de.ids_mannheim.korap.KrillCollection;
Nils Diewalda14ecd62015-02-26 21:00:20 +000027import de.ids_mannheim.korap.KrillIndex;
margaretha953fd012017-09-04 16:33:39 +020028import de.ids_mannheim.korap.KrillQuery;
Eliza Margarethae18d62e2014-02-11 11:30:48 +000029import de.ids_mannheim.korap.query.DistanceConstraint;
margaretha9880aec2017-08-31 16:58:07 +020030import de.ids_mannheim.korap.query.SpanClassQuery;
Eliza Margaretha5f606922014-02-18 15:33:49 +000031import de.ids_mannheim.korap.query.SpanDistanceQuery;
Eliza Margarethae18d62e2014-02-11 11:30:48 +000032import de.ids_mannheim.korap.query.SpanElementQuery;
33import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
34import de.ids_mannheim.korap.query.SpanNextQuery;
margaretha9880aec2017-08-31 16:58:07 +020035import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
margaretha59b905f2017-09-05 11:15:48 +020036import de.ids_mannheim.korap.response.Match;
margaretha71c66ee2015-12-11 14:39:55 +010037import de.ids_mannheim.korap.response.Result;
margaretha9880aec2017-08-31 16:58:07 +020038import de.ids_mannheim.korap.util.QueryException;
Eliza Margarethae18d62e2014-02-11 11:30:48 +000039
40@RunWith(JUnit4.class)
41public class TestMultipleDistanceIndex {
Eliza Margarethae18d62e2014-02-11 11:30:48 +000042
Nils Diewaldbb33da22015-03-04 16:24:25 +000043 private KrillIndex ki;
44 private Result kr;
45
Nils Diewaldbb33da22015-03-04 16:24:25 +000046 public SpanQuery createQuery (String x, String y,
47 List<DistanceConstraint> constraints, boolean isOrdered) {
48
49 SpanQuery sx = new SpanTermQuery(new Term("base", x));
50 SpanQuery sy = new SpanTermQuery(new Term("base", y));
51
52 return new SpanMultipleDistanceQuery(sx, sy, constraints, isOrdered,
53 true);
54 }
55
56
margaretha3d811102017-09-06 14:20:42 +020057 public static DistanceConstraint createConstraint (String unit, int min, int max,
Nils Diewaldbb33da22015-03-04 16:24:25 +000058 boolean isOrdered, boolean exclusion) {
margaretha9880aec2017-08-31 16:58:07 +020059 return createConstraint("base", unit, min, max, isOrdered, exclusion);
60 }
61
62
margaretha3d811102017-09-06 14:20:42 +020063 public static DistanceConstraint createConstraint (String field, String unit,
margaretha9880aec2017-08-31 16:58:07 +020064 int min, int max, boolean isOrdered, boolean exclusion) {
Nils Diewaldbb33da22015-03-04 16:24:25 +000065
66 if (unit.equals("w")) {
67 return new DistanceConstraint(min, max, isOrdered, exclusion);
68 }
margaretha9880aec2017-08-31 16:58:07 +020069 return new DistanceConstraint(new SpanElementQuery(field, unit), min,
Nils Diewaldbb33da22015-03-04 16:24:25 +000070 max, isOrdered, exclusion);
71 }
72
73
74 private FieldDocument createFieldDoc0 () {
75 FieldDocument fd = new FieldDocument();
Eliza Margarethae18d62e2014-02-11 11:30:48 +000076 fd.addString("ID", "doc-0");
Eliza Margaretha6f989202016-10-14 21:48:29 +020077 fd.addTV("base", "text",
margaretha4f995582015-12-14 14:14:34 +010078 "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]"
79 + "[(1-2)s:b|s:c|_2$<i>1<i>2]"
80 + "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>4<b>0]"
81 + "[(3-4)s:b|_4$<i>3<i>4]"
82 + "[(4-5)s:c|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0|<>:p$<b>64<i>4<i>6<i>6<b>0]"
83 + "[(5-6)s:e|_6$<i>5<i>6]");
Eliza Margarethae18d62e2014-02-11 11:30:48 +000084 return fd;
Nils Diewaldbb33da22015-03-04 16:24:25 +000085 }
86
87
88 private FieldDocument createFieldDoc1 () {
89 FieldDocument fd = new FieldDocument();
Eliza Margarethae18d62e2014-02-11 11:30:48 +000090 fd.addString("ID", "doc-1");
Eliza Margaretha6f989202016-10-14 21:48:29 +020091 fd.addTV("base", "text",
margaretha4f995582015-12-14 14:14:34 +010092 "[(0-1)s:c|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]"
93 + "[(1-2)s:c|s:e|_2$<i>1<i>2]"
94 + "[(2-3)s:e|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>4<b>0]"
95 + "[(3-4)s:c|_4$<i>3<i>4]"
96 + "[(4-5)s:e|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0|<>:p$<b>64<i>4<i>6<i>6<b>0]"
97 + "[(5-6)s:c|_6$<i>5<i>6]");
Eliza Margarethae18d62e2014-02-11 11:30:48 +000098 return fd;
Nils Diewaldbb33da22015-03-04 16:24:25 +000099 }
100
101
102 private FieldDocument createFieldDoc2 () {
103 FieldDocument fd = new FieldDocument();
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000104 fd.addString("ID", "doc-2");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200105 fd.addTV("base", "text",
margaretha4f995582015-12-14 14:14:34 +0100106 "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]"
107 + "[(1-2)s:b|s:e|_2$<i>1<i>2]"
108 + "[(2-3)s:e|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>4<b>0]"
109 + "[(3-4)s:b|s:c|_4$<i>3<i>4]"
110 + "[(4-5)s:e|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0|<>:p$<b>64<i>4<i>6<i>6<b>0]"
111 + "[(5-6)s:d|_6$<i>5<i>6]"
112 + "[(6-7)s:b|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0|<>:p$<b>64<i>6<i>7<i>7<b>0]");
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000113 return fd;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000114 }
115
116
117 private FieldDocument createFieldDoc3 () {
118 FieldDocument fd = new FieldDocument();
margaretha329e1672017-06-20 15:04:24 +0200119 fd.addString("ID", "doc-3");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000120 fd.addTV("base", "text",
margaretha4f995582015-12-14 14:14:34 +0100121 "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]"
122 + "[(1-2)s:b|s:c|_2$<i>1<i>2]"
123 + "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>5<b>0]"
124 + "[(3-4)s:b|_4$<i>3<i>4]" + "[(4-5)s:b|_5$<i>4<i>5]"
125 + "[(5-6)s:b|_6$<i>5<i>6]" + // gap
126 "[(6-7)s:c|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0|<>:p$<b>64<i>6<i>7<i>7<b>0]");
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000127 return fd;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000128 }
margarethaf14f3802017-08-30 16:34:01 +0200129
130
margaretha329e1672017-06-20 15:04:24 +0200131 private FieldDocument createFieldDoc4 () {
132 FieldDocument fd = new FieldDocument();
133 fd.addString("ID", "doc-4");
134 fd.addTV("base", "text",
135 "[(0-1)s:Zum|_1$<i>0<i>1|<>:s$<b>64<i>0<i>9<i>9<b>0]"
136 + "[(1-2)s:Begin|_2$<i>1<i>2]"
137 + "[(2-3)s:der|_3$<i>2<i>3]"
margarethaf14f3802017-08-30 16:34:01 +0200138 + "[(3-4)s:Veranstaltung|_4$<i>3<i>4]"
margaretha329e1672017-06-20 15:04:24 +0200139 + "[(4-5)s:ruft|_5$<i>4<i>5]"
margarethaf14f3802017-08-30 16:34:01 +0200140 + "[(5-6)s:der|_6$<i>5<i>6]"
margaretha329e1672017-06-20 15:04:24 +0200141 + "[(6-7)s:Moderator|_7$<i>6<i>7]"
142 + "[(7-8)s:die|_8$<i>7<i>8]"
143 + "[(8-9)s:Gäste|_9$<i>8<i>9]");
144 return fd;
145 }
margaretha9880aec2017-08-31 16:58:07 +0200146
147
margarethaf14f3802017-08-30 16:34:01 +0200148 private FieldDocument createFieldDoc5 () {
149 FieldDocument fd = new FieldDocument();
150 fd.addString("ID", "doc-5");
margaretha9880aec2017-08-31 16:58:07 +0200151 fd.addTV("tokens", "text",
margarethaf14f3802017-08-30 16:34:01 +0200152 "[(0-1)s:Meine|_1$<i>0<i>1|<>:s$<b>64<i>0<i>9<i>9<b>0]"
margaretha9880aec2017-08-31 16:58:07 +0200153 + "[(1-2)l:Erfahrung|_2$<i>1<i>2]"
margarethaf14f3802017-08-30 16:34:01 +0200154 + "[(2-3)s:Meiner|_3$<i>2<i>3]"
margaretha9880aec2017-08-31 16:58:07 +0200155 + "[(3-4)l:Erfahrung|_4$<i>3<i>4]"
margarethaf14f3802017-08-30 16:34:01 +0200156 + "[(4-5)s:Mein|_5$<i>4<i>5]"
margaretha9880aec2017-08-31 16:58:07 +0200157 + "[(5-6)l:Erfahrung|_6$<i>5<i>6]"
margarethaf14f3802017-08-30 16:34:01 +0200158 + "[(6-7)s:Meinem|_7$<i>6<i>7]"
margaretha9880aec2017-08-31 16:58:07 +0200159 + "[(7-8)l:Erfahrung|_8$<i>7<i>8]"
margarethaf14f3802017-08-30 16:34:01 +0200160 + "[(8-9)s:Meinen|_9$<i>8<i>9]");
161 return fd;
162 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000163
margaretha9880aec2017-08-31 16:58:07 +0200164
165 private FieldDocument createFieldDoc6 () {
166 FieldDocument fd = new FieldDocument();
Akrone68b9a12017-08-30 20:07:16 +0200167 fd.addString("ID", "doc-6");
margaretha9880aec2017-08-31 16:58:07 +0200168 fd.addTV("tokens", "text",
Akrone68b9a12017-08-30 20:07:16 +0200169 "[(0-1)s:Meine|_1$<i>0<i>1|<>:s$<b>64<i>0<i>5<i>5<b>0]"
170 + "[(1-2)s:Meiner|_2$<i>1<i>2]"
171 + "[(2-3)s:Mein|_3$<i>2<i>3]"
172 + "[(3-4)s:Meinem|_4$<i>3<i>4]"
173 + "[(4-5)s:Meinen|_5$<i>4<i>5]");
174 return fd;
175 }
176
margaretha9880aec2017-08-31 16:58:07 +0200177
178 private FieldDocument createFieldDoc7 () {
179 FieldDocument fd = new FieldDocument();
Akrone68b9a12017-08-30 20:07:16 +0200180 fd.addString("ID", "doc-7");
margaretha9880aec2017-08-31 16:58:07 +0200181 fd.addTV("tokens", "text",
182 "[(0-1)l:Erfahrung|_1$<i>0<i>1|<>:s$<b>64<i>0<i>4<i>4<b>0]"
183 + "[(1-2)l:Erfahrung|_2$<i>1<i>2]"
184 + "[(2-3)l:Erfahrung|_3$<i>2<i>3]"
185 + "[(3-4)l:Erfahrung|_4$<i>3<i>4]");
Akrone68b9a12017-08-30 20:07:16 +0200186 return fd;
187 }
188
margaretha9880aec2017-08-31 16:58:07 +0200189
190 private FieldDocument createFieldDoc8 () {
191 FieldDocument fd = new FieldDocument();
Akrone68b9a12017-08-30 20:07:16 +0200192 fd.addString("ID", "doc-8");
margaretha9880aec2017-08-31 16:58:07 +0200193 fd.addTV("tokens", "text",
Akrone68b9a12017-08-30 20:07:16 +0200194 "[(0-1)s:Meine|_1$<i>0<i>1|<>:s$<b>64<i>0<i>9<i>9<b>0]"
margaretha9880aec2017-08-31 16:58:07 +0200195 + "[(1-2)l:Erfahrung|_2$<i>1<i>2]"
Akrone68b9a12017-08-30 20:07:16 +0200196 + "[(2-3)s:Meiner|_3$<i>2<i>3]"
margaretha9880aec2017-08-31 16:58:07 +0200197 + "[(3-4)l:Erfahrung|_4$<i>3<i>4]"
Akrone68b9a12017-08-30 20:07:16 +0200198 + "[(4-5)s:Mein|_5$<i>4<i>5]"
margaretha9880aec2017-08-31 16:58:07 +0200199 + "[(5-6)l:Erfahrung|_6$<i>5<i>6]"
Akrone68b9a12017-08-30 20:07:16 +0200200 + "[(6-7)s:Meinem|_7$<i>6<i>7]"
margaretha9880aec2017-08-31 16:58:07 +0200201 + "[(7-8)l:Erfahrung|_8$<i>7<i>8]"
Akrone68b9a12017-08-30 20:07:16 +0200202 + "[(8-9)s:Meinen|_9$<i>8<i>9]");
203 return fd;
204 }
205
margarethaf14f3802017-08-30 16:34:01 +0200206
margaretha9880aec2017-08-31 16:58:07 +0200207 @Test
208 public void testQueryWithWildCard () throws IOException {
209 // meine* /+w1:2,s0 &Erfahrung
210 ki = new KrillIndex();
211 ki.addDoc(createFieldDoc5());
212 ki.commit();
margarethaf14f3802017-08-30 16:34:01 +0200213
margaretha9880aec2017-08-31 16:58:07 +0200214 // Check simple rewriting
215 WildcardQuery wcquery =
216 new WildcardQuery(new Term("tokens", "s:Meine*"));
217 SpanMultiTermQueryWrapper<WildcardQuery> mtq =
218 new SpanMultiTermQueryWrapper<WildcardQuery>(wcquery);
Akrone68b9a12017-08-30 20:07:16 +0200219
margaretha9880aec2017-08-31 16:58:07 +0200220 assertEquals(wcquery.toString(), "tokens:s:Meine*");
Akronb4ba4302017-08-30 17:25:27 +0200221
margaretha9880aec2017-08-31 16:58:07 +0200222 kr = ki.search(mtq, (short) 10);
223 assertEquals(4, kr.getMatches().size());
224 assertEquals(0, kr.getMatch(0).getStartPos());
225 assertEquals(1, kr.getMatch(0).getEndPos());
Akron5782e592017-08-30 19:58:47 +0200226
margaretha9880aec2017-08-31 16:58:07 +0200227 // Check rewriting in multidistance query
228 SpanQuery sq = new SpanTermQuery(new Term("tokens", "l:Erfahrung"));
229 kr = ki.search(sq, (short) 10);
230 assertEquals(4, kr.getMatches().size());
Akrone68b9a12017-08-30 20:07:16 +0200231
margaretha9880aec2017-08-31 16:58:07 +0200232 List<DistanceConstraint> constraints =
233 new ArrayList<DistanceConstraint>();
Akron5782e592017-08-30 19:58:47 +0200234 constraints.add(createConstraint("w", 1, 2, true, false));
margaretha9880aec2017-08-31 16:58:07 +0200235 constraints.add(createConstraint("tokens", "s", 0, 0, true, false));
Akron5782e592017-08-30 19:58:47 +0200236
margaretha9880aec2017-08-31 16:58:07 +0200237 SpanQuery mdsq =
238 new SpanMultipleDistanceQuery(mtq, sq, constraints, true, true);
239 assertEquals(mdsq.toString(),
240 "spanMultipleDistance(SpanMultiTermQueryWrapper(tokens:s:Meine*), "
241 + "tokens:l:Erfahrung, [(w[1:2], ordered, notExcluded), (s[0:0], "
242 + "ordered, notExcluded)])");
Akrone68b9a12017-08-30 20:07:16 +0200243
margaretha9880aec2017-08-31 16:58:07 +0200244 kr = ki.search(mdsq, (short) 10);
245 assertEquals(3, kr.getMatches().size());
246 assertEquals(0, kr.getMatch(0).getStartPos());
247 assertEquals(2, kr.getMatch(0).getEndPos());
248
249 // Check skipping with multiple documents
250 ki.addDoc(createFieldDoc6());
251 ki.addDoc(createFieldDoc7());
252 ki.addDoc(createFieldDoc8());
253 ki.commit();
254 kr = ki.search(mdsq, (short) 10);
255 assertEquals(6, kr.getMatches().size());
256 }
257
Akrond3e077f2017-09-04 18:58:12 +0200258
259 @Test
260 public void queryJSONwildcardNoFoundry () throws QueryException, IOException {
261 // meine*
262 ki = new KrillIndex();
263 ki.addDoc(createFieldDoc5());
264 ki.commit();
265
266 // treat merging gracefully
Akrond6f5f592018-06-19 15:58:16 +0200267 SpanQueryWrapper sqw = getJsonQuery(
Akrond3e077f2017-09-04 18:58:12 +0200268 getClass().getResource("/queries/bugs/cosmas_wildcards_missingfoundry.jsonld")
269 .getFile());
270 SpanQuery sq = sqw.toQuery();
271 assertEquals(sq.toString(),"SpanMultiTermQueryWrapper(tokens:l:Erfahr*)");
272
273 kr = ki.search(sq, (short) 10);
274 assertEquals(4, kr.getMatches().size());
275 assertEquals(1, kr.getMatch(0).getStartPos());
276 assertEquals(2, kr.getMatch(0).getEndPos());
277 };
278
margaretha9880aec2017-08-31 16:58:07 +0200279
280 @Test
margaretha329e1672017-06-20 15:04:24 +0200281 public void testUnorderedTokenDistance () throws IOException {
282 ki = new KrillIndex();
283 ki.addDoc(createFieldDoc4());
284 ki.commit();
285
margarethaf14f3802017-08-30 16:34:01 +0200286 List<DistanceConstraint> constraints =
287 new ArrayList<DistanceConstraint>();
margaretha329e1672017-06-20 15:04:24 +0200288 constraints.add(createConstraint("w", 0, 5, true, false));
289 constraints.add(createConstraint("s", 0, 0, true, false));
290
291 SpanQuery mdq;
292 mdq = createQuery("s:Begin", "s:Moderator", constraints, false);
293 kr = ki.search(mdq, (short) 10);
294 assertEquals(1, kr.getMatch(0).getStartPos());
295 assertEquals(7, kr.getMatch(0).getEndPos());
margarethaf14f3802017-08-30 16:34:01 +0200296
297 SpanQuery sq = new SpanDistanceQuery(mdq,
margaretha329e1672017-06-20 15:04:24 +0200298 new SpanTermQuery(new Term("base", "s:ruft")),
299 new DistanceConstraint(0, 0, false, false), true);
margarethaf14f3802017-08-30 16:34:01 +0200300
margaretha329e1672017-06-20 15:04:24 +0200301 kr = ki.search(sq, (short) 10);
302 assertEquals(1, kr.getMatch(0).getStartPos());
303 assertEquals(7, kr.getMatch(0).getEndPos());
304 }
margarethaf14f3802017-08-30 16:34:01 +0200305
Nils Diewaldbb33da22015-03-04 16:24:25 +0000306
307 /**
308 * Unordered, same sentence
Eliza Margaretha6f989202016-10-14 21:48:29 +0200309 */
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000310 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000311 public void testCase1 () throws IOException {
312 ki = new KrillIndex();
313 ki.addDoc(createFieldDoc0());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000314 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000315
margarethaf14f3802017-08-30 16:34:01 +0200316 List<DistanceConstraint> constraints =
317 new ArrayList<DistanceConstraint>();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000318 constraints.add(createConstraint("w", 0, 2, false, false));
319 constraints.add(createConstraint("s", 0, 0, false, false));
320
321 SpanQuery mdq;
322 mdq = createQuery("s:b", "s:c", constraints, false);
323 kr = ki.search(mdq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000324
325 assertEquals((long) 3, kr.getTotalResults());
326 assertEquals(0, kr.getMatch(0).getStartPos());
327 assertEquals(2, kr.getMatch(0).getEndPos());
328 assertEquals(1, kr.getMatch(1).getStartPos());
329 assertEquals(2, kr.getMatch(1).getEndPos());
330 assertEquals(2, kr.getMatch(2).getStartPos());
331 assertEquals(4, kr.getMatch(2).getEndPos());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000332 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000333
334
335 /**
336 * Ordered
337 * Unordered
338 * Two constraints
339 * Three constraints
Eliza Margaretha6f989202016-10-14 21:48:29 +0200340 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000341 @Test
342 public void testCase2 () throws IOException {
343 ki = new KrillIndex();
344 ki.addDoc(createFieldDoc0());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000345 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000346
Eliza Margarethad4693462014-03-17 13:16:18 +0000347 // Ordered - two constraints
margarethaf14f3802017-08-30 16:34:01 +0200348 List<DistanceConstraint> constraints =
349 new ArrayList<DistanceConstraint>();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000350 constraints.add(createConstraint("w", 0, 2, true, false));
351 constraints.add(createConstraint("s", 1, 1, true, false));
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000352
Nils Diewaldbb33da22015-03-04 16:24:25 +0000353 SpanQuery mdq;
354 mdq = createQuery("s:b", "s:c", constraints, true);
355 kr = ki.search(mdq, (short) 10);
356 assertEquals((long) 3, kr.getTotalResults());
357 assertEquals(0, kr.getMatch(0).getStartPos());
358 assertEquals(3, kr.getMatch(0).getEndPos());
359 assertEquals(1, kr.getMatch(1).getStartPos());
360 assertEquals(3, kr.getMatch(1).getEndPos());
361 assertEquals(3, kr.getMatch(2).getStartPos());
362 assertEquals(5, kr.getMatch(2).getEndPos());
Eliza Margaretha5f606922014-02-18 15:33:49 +0000363
Nils Diewaldbb33da22015-03-04 16:24:25 +0000364 // Three constraints
365 constraints.add(createConstraint("p", 0, 0, true, false));
366 mdq = createQuery("s:b", "s:c", constraints, true);
367 kr = ki.search(mdq, (short) 10);
368 assertEquals((long) 2, kr.getTotalResults());
369
370
371 // Unordered - two constraints
372 constraints.clear();
373 constraints.add(createConstraint("w", 0, 2, false, false));
374 constraints.add(createConstraint("s", 1, 1, false, false));
375
376 mdq = createQuery("s:c", "s:b", constraints, false);
377 kr = ki.search(mdq, (short) 10);
378 assertEquals((long) 4, kr.getTotalResults());
379 assertEquals(1, kr.getMatch(2).getStartPos());
380 assertEquals(4, kr.getMatch(2).getEndPos());
381
382 // Three constraints
383 constraints.add(createConstraint("p", 0, 0, false, false));
384 mdq = createQuery("s:b", "s:c", constraints, false);
385 kr = ki.search(mdq, (short) 10);
386 assertEquals((long) 3, kr.getTotalResults());
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000387
388 }
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000389
Nils Diewaldbb33da22015-03-04 16:24:25 +0000390
391 /**
392 * Multiple documents
393 * Ensure same doc (inner term span)
Eliza Margaretha6f989202016-10-14 21:48:29 +0200394 */
Eliza Margarethacdb769b2014-02-11 17:24:13 +0000395 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000396 public void testCase3 () throws IOException {
397 ki = new KrillIndex();
398 ki.addDoc(createFieldDoc0());
399 ki.addDoc(createFieldDoc1());
400 ki.addDoc(createFieldDoc2());
401 ki.commit();
402
margarethaf14f3802017-08-30 16:34:01 +0200403 List<DistanceConstraint> constraints =
404 new ArrayList<DistanceConstraint>();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000405 constraints.add(createConstraint("w", 1, 2, false, false));
406 constraints.add(createConstraint("s", 1, 2, false, false));
407
408 SpanQuery mdq;
409 mdq = createQuery("s:b", "s:e", constraints, false);
410 kr = ki.search(mdq, (short) 10);
411
412 assertEquals((long) 5, kr.getTotalResults());
413 assertEquals(3, kr.getMatch(0).getStartPos());
414 assertEquals(6, kr.getMatch(0).getEndPos());
415 assertEquals(2, kr.getMatch(1).getLocalDocID());
416 assertEquals(1, kr.getMatch(2).getStartPos());
417 assertEquals(4, kr.getMatch(2).getEndPos());
418 assertEquals(3, kr.getMatch(3).getStartPos());
419 assertEquals(5, kr.getMatch(3).getEndPos());
420 assertEquals(4, kr.getMatch(4).getStartPos());
421 assertEquals(7, kr.getMatch(4).getEndPos());
422
margaretha59b905f2017-09-05 11:15:48 +0200423 // System.out.print(kr.getTotalResults()+"\n");
424 // for (int i=0; i< kr.getTotalResults(); i++){
425 // System.out.println(
426 // kr.match(i).getLocalDocID()+" "+
427 // kr.match(i).startPos + " " +
428 // kr.match(i).endPos
429 // );
430 // }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000431
Eliza Margaretha01929182014-02-19 11:48:59 +0000432 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000433
434
435 /**
436 * Skip to
Eliza Margaretha6f989202016-10-14 21:48:29 +0200437 */
Eliza Margarethae335beb2014-02-27 12:56:14 +0000438 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000439 public void testCase4 () throws IOException {
440 ki = new KrillIndex();
441 ki.addDoc(createFieldDoc0());
442 ki.addDoc(createFieldDoc3());
443 ki.addDoc(createFieldDoc1());
444 ki.addDoc(createFieldDoc2());
445 ki.commit();
Eliza Margarethad4693462014-03-17 13:16:18 +0000446
margarethaf14f3802017-08-30 16:34:01 +0200447 List<DistanceConstraint> constraints =
448 new ArrayList<DistanceConstraint>();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000449 constraints.add(createConstraint("w", 1, 2, false, false));
450 constraints.add(createConstraint("s", 1, 2, false, false));
Eliza Margarethae335beb2014-02-27 12:56:14 +0000451
Nils Diewaldbb33da22015-03-04 16:24:25 +0000452 SpanQuery mdq;
453 mdq = createQuery("s:b", "s:c", constraints, false);
454
Eliza Margaretha6f989202016-10-14 21:48:29 +0200455 SpanQuery sq = new SpanNextQuery(mdq,
456 new SpanTermQuery(new Term("base", "s:e")));
Nils Diewaldbb33da22015-03-04 16:24:25 +0000457 kr = ki.search(sq, (short) 10);
458
459 assertEquals((long) 2, kr.getTotalResults());
460 assertEquals(3, kr.getMatch(0).getStartPos());
461 assertEquals(6, kr.getMatch(0).getEndPos());
462 assertEquals(3, kr.getMatch(1).getLocalDocID());
463 assertEquals(1, kr.getMatch(1).getStartPos());
464 assertEquals(5, kr.getMatch(1).getEndPos());
465
466 }
467
468
469 /**
margaretha35120872016-12-19 18:24:22 +0100470 * Same tokens: unordered yields twice the same results as ordered
Eliza Margaretha6f989202016-10-14 21:48:29 +0200471 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000472 @Test
473 public void testCase5 () throws IOException {
474 ki = new KrillIndex();
475 ki.addDoc(createFieldDoc0());
476 ki.addDoc(createFieldDoc1());
477 ki.commit();
478
margaretha35120872016-12-19 18:24:22 +0100479 // ordered
margarethaf14f3802017-08-30 16:34:01 +0200480 List<DistanceConstraint> constraints =
481 new ArrayList<DistanceConstraint>();
margaretha35120872016-12-19 18:24:22 +0100482 constraints.add(createConstraint("w", 1, 2, true, false));
483 constraints.add(createConstraint("s", 1, 2, true, false));
Nils Diewaldbb33da22015-03-04 16:24:25 +0000484
485 SpanQuery mdq;
486 mdq = createQuery("s:c", "s:c", constraints, false);
487 kr = ki.search(mdq, (short) 10);
margarethaf14f3802017-08-30 16:34:01 +0200488
Nils Diewaldbb33da22015-03-04 16:24:25 +0000489 assertEquals((long) 4, kr.getTotalResults());
490 assertEquals(1, kr.getMatch(0).getStartPos());
491 assertEquals(3, kr.getMatch(0).getEndPos());
492 assertEquals(2, kr.getMatch(1).getStartPos());
493 assertEquals(5, kr.getMatch(1).getEndPos());
494 assertEquals(1, kr.getMatch(2).getLocalDocID());
495 assertEquals(1, kr.getMatch(2).getStartPos());
496 assertEquals(4, kr.getMatch(2).getEndPos());
497 assertEquals(3, kr.getMatch(3).getStartPos());
498 assertEquals(6, kr.getMatch(3).getEndPos());
margarethaf14f3802017-08-30 16:34:01 +0200499
margaretha35120872016-12-19 18:24:22 +0100500 //unordered
501 constraints = new ArrayList<DistanceConstraint>();
502 constraints.add(createConstraint("w", 1, 2, false, false));
503 constraints.add(createConstraint("s", 1, 2, false, false));
504
505 mdq = createQuery("s:c", "s:c", constraints, false);
506 kr = ki.search(mdq, (short) 10);
507 assertEquals((long) 8, kr.getTotalResults());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000508
509 }
510
511
512 /**
513 * Exclusion
514 * Gaps
Eliza Margaretha6f989202016-10-14 21:48:29 +0200515 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000516 @Test
517 public void testCase6 () throws IOException {
518 ki = new KrillIndex();
519 ki.addDoc(createFieldDoc3());
520 ki.commit();
521
522 // First constraint - token exclusion
523 SpanQuery sx = new SpanTermQuery(new Term("base", "s:b"));
524 SpanQuery sy = new SpanTermQuery(new Term("base", "s:c"));
525
526 DistanceConstraint dc1 = createConstraint("w", 0, 1, false, true);
527 SpanDistanceQuery sq = new SpanDistanceQuery(sx, sy, dc1, true);
528
529 kr = ki.search(sq, (short) 10);
530 assertEquals((long) 1, kr.getTotalResults());
531 // 4-5
532
533 // Second constraint - element distance
534 DistanceConstraint dc2 = createConstraint("s", 1, 1, false, false);
535 sq = new SpanDistanceQuery(sx, sy, dc2, true);
536 kr = ki.search(sq, (short) 10);
537 // 0-3, 1-3, 1-4, 1-5, 3-7, 4-7
538 assertEquals((long) 6, kr.getTotalResults());
539
540
margarethaf14f3802017-08-30 16:34:01 +0200541 List<DistanceConstraint> constraints =
542 new ArrayList<DistanceConstraint>();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000543 constraints.add(dc1);
544 constraints.add(dc2);
545
546 SpanQuery mdq;
547 mdq = createQuery("s:b", "s:c", constraints, false);
548 kr = ki.search(mdq, (short) 10);
549
550 assertEquals((long) 2, kr.getTotalResults());
551 assertEquals(1, kr.getMatch(0).getStartPos());
552 assertEquals(5, kr.getMatch(0).getEndPos());
553 assertEquals(4, kr.getMatch(1).getStartPos());
554 assertEquals(7, kr.getMatch(1).getEndPos());
555 }
556
557
558 /**
559 * Exclusion, multiple documents
Eliza Margaretha6f989202016-10-14 21:48:29 +0200560 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000561 @Test
562 public void testCase7 () throws IOException {
563 ki = new KrillIndex();
564 ki.addDoc(createFieldDoc2());
565 ki.commit();
566
567 SpanQuery sx = new SpanTermQuery(new Term("base", "s:b"));
568 SpanQuery sy = new SpanTermQuery(new Term("base", "s:c"));
569 // Second constraint
Eliza Margaretha6f989202016-10-14 21:48:29 +0200570 SpanDistanceQuery sq = new SpanDistanceQuery(sx, sy,
571 createConstraint("s", 0, 0, false, true), true);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000572 kr = ki.search(sq, (short) 10);
573 assertEquals((long) 3, kr.getTotalResults());
574 // 0-1, 1-2, 6-7
575
576 // Exclusion within the same sentence
margarethaf14f3802017-08-30 16:34:01 +0200577 List<DistanceConstraint> constraints =
578 new ArrayList<DistanceConstraint>();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000579 constraints.add(createConstraint("w", 0, 2, false, true));
580 constraints.add(createConstraint("s", 0, 0, false, true));
581
582 SpanQuery mdq;
583 mdq = createQuery("s:b", "s:c", constraints, false);
584 kr = ki.search(mdq, (short) 10);
585 assertEquals((long) 2, kr.getTotalResults());
586 assertEquals(0, kr.getMatch(0).getStartPos());
587 assertEquals(1, kr.getMatch(0).getEndPos());
588 assertEquals(6, kr.getMatch(1).getStartPos());
589 assertEquals(7, kr.getMatch(1).getEndPos());
590
591
592 // Third constraint
Eliza Margaretha6f989202016-10-14 21:48:29 +0200593 sq = new SpanDistanceQuery(sx, sy,
594 createConstraint("p", 0, 0, false, true), true);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000595 kr = ki.search(sq, (short) 10);
596 assertEquals((long) 1, kr.getTotalResults());
597 // 6-7
598
599 constraints.add(createConstraint("p", 0, 0, false, true));
600 mdq = createQuery("s:b", "s:c", constraints, false);
601 kr = ki.search(mdq, (short) 10);
602
603 assertEquals((long) 1, kr.getTotalResults());
604 assertEquals(6, kr.getMatch(0).getStartPos());
605 assertEquals(7, kr.getMatch(0).getEndPos());
606
607 }
Eliza Margarethae18d62e2014-02-11 11:30:48 +0000608}