blob: 23b6ac8a5fbfa91395a1008d2e80fb3a7e2a6246 [file] [log] [blame]
Eliza Margarethae335beb2014-02-27 12:56:14 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11
Nils Diewalda14ecd62015-02-26 21:00:20 +000012import de.ids_mannheim.korap.KrillIndex;
Eliza Margarethad4693462014-03-17 13:16:18 +000013import de.ids_mannheim.korap.query.DistanceConstraint;
Eliza Margarethae335beb2014-02-27 12:56:14 +000014import de.ids_mannheim.korap.query.SpanDistanceQuery;
15import de.ids_mannheim.korap.query.SpanElementQuery;
16import de.ids_mannheim.korap.query.SpanNextQuery;
margaretha71c66ee2015-12-11 14:39:55 +010017import de.ids_mannheim.korap.response.Result;
Eliza Margarethae335beb2014-02-27 12:56:14 +000018
19public class TestElementDistanceExclusionIndex {
Nils Diewaldbb33da22015-03-04 16:24:25 +000020
Nils Diewald884dbcf2015-02-27 17:02:28 +000021 Result kr;
Nils Diewaldbb33da22015-03-04 16:24:25 +000022 KrillIndex ki;
23
24
25 private SpanQuery createQuery (String e, String x, String y, int min,
26 int max, boolean isOrdered, boolean exclusion) {
27 SpanElementQuery eq = new SpanElementQuery("base", e);
28 SpanDistanceQuery sq = new SpanDistanceQuery(new SpanTermQuery(
29 new Term("base", x)), new SpanTermQuery(new Term("base", y)),
30 new DistanceConstraint(eq, min, max, isOrdered, exclusion),
31 true);
32 return sq;
Eliza Margarethae335beb2014-02-27 12:56:14 +000033 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000034
35
36 private FieldDocument createFieldDoc0 () {
37 FieldDocument fd = new FieldDocument();
Eliza Margarethae335beb2014-02-27 12:56:14 +000038 fd.addString("ID", "doc-0");
margaretha71c66ee2015-12-11 14:39:55 +010039 fd.addTV("base", "ceccdcdecd",
margaretha4f995582015-12-14 14:14:34 +010040 "[(0-1)s:c|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]"
41 + "[(1-2)s:e|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]"
42 + "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010043 + "[(3-4)s:c|_4$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +010044 + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010045 + "[(5-6)s:c|_6$<i>5<i>6]"
margaretha4f995582015-12-14 14:14:34 +010046 + "[(6-7)s:d|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]"
47 + "[(7-8)s:e|_8$<i>7<i>8|<>:s$<b>64<i>7<i>9<i>9<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010048 + "[(8-9)s:c|_9$<i>8<i>9]"
49 + "[(9-10)s:d|_10$<i>9<i>10]");
Eliza Margarethae335beb2014-02-27 12:56:14 +000050 return fd;
51 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000052
53
54 private FieldDocument createFieldDoc1 () {
55 FieldDocument fd = new FieldDocument();
Eliza Margarethae335beb2014-02-27 12:56:14 +000056 fd.addString("ID", "doc-1");
margaretha71c66ee2015-12-11 14:39:55 +010057 fd.addTV("base", "eedadaeed",
margaretha4f995582015-12-14 14:14:34 +010058 "[(0-1)s:e|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]"
59 + "[(1-2)s:e|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]"
60 + "[(2-3)s:d|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010061 + "[(3-4)s:a|_4$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +010062 + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>7<i>6<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010063 + "[(5-6)s:a|_6$<i>5<i>6]"
margaretha4f995582015-12-14 14:14:34 +010064 + "[(6-7)s:e|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>9<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010065 + "[(7-8)s:e|_8$<i>7<i>8]" + "[(8-9)s:d|_9$<i>8<i>9]");
Eliza Margarethae335beb2014-02-27 12:56:14 +000066 return fd;
67 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000068
69
70 private FieldDocument createFieldDoc2 () {
71 FieldDocument fd = new FieldDocument();
72 fd.addString("ID", "doc-");
margaretha71c66ee2015-12-11 14:39:55 +010073 fd.addTV("base", "dcacacdac",
margaretha4f995582015-12-14 14:14:34 +010074 "[(0-1)s:d|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]"
75 + "[(1-2)s:c|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]"
76 + "[(2-3)s:a|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010077 + "[(3-4)s:c|_4$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +010078 + "[(4-5)s:a|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010079 + "[(5-6)s:c|_6$<i>5<i>6]"
margaretha4f995582015-12-14 14:14:34 +010080 + "[(6-7)s:d|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]"
81 + "[(7-8)s:a|_8$<i>7<i>8|<>:s$<b>64<i>7<i>9<i>9<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010082 + "[(8-9)s:c|_9$<i>8<i>9]");
Nils Diewaldbb33da22015-03-04 16:24:25 +000083 return fd;
84 }
85
86
87 /**
88 * Distance Zero, unordered
89 * There is a secondspan on the right side
Eliza Margarethae335beb2014-02-27 12:56:14 +000090 * */
91 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +000092 public void testCase1 () throws IOException {
93 ki = new KrillIndex();
94 ki.addDoc(createFieldDoc0());
Eliza Margarethae335beb2014-02-27 12:56:14 +000095 ki.commit();
96 SpanQuery sq;
Nils Diewaldbb33da22015-03-04 16:24:25 +000097 sq = createQuery("s", "s:d", "s:c", 0, 0, false, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +000098 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +000099 assertEquals(kr.getTotalResults(), 1);
100 assertEquals(6, kr.getMatch(0).startPos);
101 assertEquals(7, kr.getMatch(0).endPos);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000102 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000103
104
105 /**
106 * There is another firstspan within max distance
107 * Unordered
Eliza Margarethae335beb2014-02-27 12:56:14 +0000108 * */
109 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000110 public void testCase2 () throws IOException {
111 ki = new KrillIndex();
112 ki.addDoc(createFieldDoc0());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000113 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000114 SpanQuery sq;
115
116 sq = createQuery("s", "s:c", "s:d", 0, 0, false, true);
117 kr = ki.search(sq, (short) 10);
118
119 assertEquals(kr.getTotalResults(), 4);
120 assertEquals(0, kr.getMatch(0).startPos);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000121 assertEquals(1, kr.getMatch(0).endPos);
122 assertEquals(2, kr.getMatch(1).startPos);
123 assertEquals(3, kr.getMatch(1).endPos);
124 assertEquals(3, kr.getMatch(2).startPos);
125 assertEquals(4, kr.getMatch(2).endPos);
126 assertEquals(8, kr.getMatch(3).startPos);
127 assertEquals(9, kr.getMatch(3).endPos);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000128 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000129
130
131 /**
132 * Distance 0-1, ordered, unordered
Eliza Margarethae335beb2014-02-27 12:56:14 +0000133 * */
134 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000135 public void testCase3 () throws IOException {
136 ki = new KrillIndex();
137 ki.addDoc(createFieldDoc0());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000138 ki.commit();
139 SpanQuery sq;
140 // unordered
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141 sq = createQuery("s", "s:c", "s:e", 0, 1, false, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000142 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000143 assertEquals(kr.getTotalResults(), 1);
144 assertEquals(5, kr.getMatch(0).startPos);
145 assertEquals(6, kr.getMatch(0).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000146
Eliza Margarethae335beb2014-02-27 12:56:14 +0000147 //ordered
Nils Diewaldbb33da22015-03-04 16:24:25 +0000148 sq = createQuery("s", "s:c", "s:e", 0, 1, true, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000149 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000150 assertEquals(kr.getTotalResults(), 3);
151 assertEquals(2, kr.getMatch(0).startPos);
152 assertEquals(3, kr.getMatch(0).endPos);
153 assertEquals(3, kr.getMatch(1).startPos);
154 assertEquals(4, kr.getMatch(1).endPos);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000155 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000156
157
158 /**
159 * Multiple documents, ordered
160 * No more secondspans, but there is still a firstspan
161 * */
162 @Test
163 public void testCase4 () throws IOException {
164 ki = new KrillIndex();
165 ki.addDoc(createFieldDoc0());
166 ki.addDoc(createFieldDoc1());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000167 ki.commit();
168 SpanQuery sq;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000169
170 sq = createQuery("s", "s:d", "s:e", 1, 1, true, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000171 kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000172
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000173 assertEquals(kr.getTotalResults(), 3);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000174 assertEquals(4, kr.getMatch(0).startPos);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000175 assertEquals(5, kr.getMatch(0).endPos);
176 assertEquals(1, kr.getMatch(1).getLocalDocID());
177 assertEquals(2, kr.getMatch(1).startPos);
178 assertEquals(3, kr.getMatch(1).endPos);
179 assertEquals(8, kr.getMatch(2).startPos);
180 assertEquals(9, kr.getMatch(2).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000181 }
182
183
184 /**
185 * Skip to
186 * */
187 @Test
188 public void testCase5 () throws IOException {
189 ki = new KrillIndex();
190 ki.addDoc(createFieldDoc0());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000191 ki.addDoc(createFieldDoc1());
192 ki.addDoc(createFieldDoc0());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000193 ki.addDoc(createFieldDoc2());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000194 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000195
196 SpanQuery sq = createQuery("s", "s:c", "s:d", 1, 1, false, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000197 kr = ki.search(sq, (short) 10);
Nils Diewaldaa5c1d32014-03-20 23:46:55 +0000198
Nils Diewaldbb33da22015-03-04 16:24:25 +0000199 assertEquals(kr.getTotalResults(), 3);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000200 assertEquals(3, kr.getMatch(2).getLocalDocID());
201 assertEquals(3, kr.getMatch(2).startPos);
202 assertEquals(4, kr.getMatch(2).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000203
Eliza Margarethae335beb2014-02-27 12:56:14 +0000204 sq = new SpanNextQuery(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000205 createQuery("s", "s:c", "s:d", 1, 1, false, true),
206 new SpanTermQuery(new Term("base", "s:a")));
207
Eliza Margarethae335beb2014-02-27 12:56:14 +0000208 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000209 assertEquals(kr.getTotalResults(), 1);
210 assertEquals(3, kr.getMatch(0).getLocalDocID());
211 assertEquals(3, kr.getMatch(0).startPos);
212 assertEquals(5, kr.getMatch(0).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000213 }
Eliza Margarethae335beb2014-02-27 12:56:14 +0000214}