blob: e2aa2705d36c113780f437fe33beb280da59c42e [file] [log] [blame]
Eliza Margarethae335beb2014-02-27 12:56:14 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11
Nils Diewalda14ecd62015-02-26 21:00:20 +000012import de.ids_mannheim.korap.KrillIndex;
Eliza Margarethad4693462014-03-17 13:16:18 +000013import de.ids_mannheim.korap.query.DistanceConstraint;
Eliza Margarethae335beb2014-02-27 12:56:14 +000014import de.ids_mannheim.korap.query.SpanDistanceQuery;
15import de.ids_mannheim.korap.query.SpanElementQuery;
16import de.ids_mannheim.korap.query.SpanNextQuery;
margaretha71c66ee2015-12-11 14:39:55 +010017import de.ids_mannheim.korap.response.Result;
Eliza Margarethae335beb2014-02-27 12:56:14 +000018
19public class TestElementDistanceExclusionIndex {
Nils Diewaldbb33da22015-03-04 16:24:25 +000020
Nils Diewald884dbcf2015-02-27 17:02:28 +000021 Result kr;
Nils Diewaldbb33da22015-03-04 16:24:25 +000022 KrillIndex ki;
23
24
25 private SpanQuery createQuery (String e, String x, String y, int min,
26 int max, boolean isOrdered, boolean exclusion) {
27 SpanElementQuery eq = new SpanElementQuery("base", e);
Eliza Margaretha6f989202016-10-14 21:48:29 +020028 SpanDistanceQuery sq = new SpanDistanceQuery(
29 new SpanTermQuery(new Term("base", x)),
30 new SpanTermQuery(new Term("base", y)),
Nils Diewaldbb33da22015-03-04 16:24:25 +000031 new DistanceConstraint(eq, min, max, isOrdered, exclusion),
32 true);
33 return sq;
Eliza Margarethae335beb2014-02-27 12:56:14 +000034 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000035
36
37 private FieldDocument createFieldDoc0 () {
38 FieldDocument fd = new FieldDocument();
Eliza Margarethae335beb2014-02-27 12:56:14 +000039 fd.addString("ID", "doc-0");
margaretha71c66ee2015-12-11 14:39:55 +010040 fd.addTV("base", "ceccdcdecd",
margaretha4f995582015-12-14 14:14:34 +010041 "[(0-1)s:c|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]"
42 + "[(1-2)s:e|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]"
43 + "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010044 + "[(3-4)s:c|_4$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +010045 + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010046 + "[(5-6)s:c|_6$<i>5<i>6]"
margaretha4f995582015-12-14 14:14:34 +010047 + "[(6-7)s:d|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]"
48 + "[(7-8)s:e|_8$<i>7<i>8|<>:s$<b>64<i>7<i>9<i>9<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010049 + "[(8-9)s:c|_9$<i>8<i>9]"
50 + "[(9-10)s:d|_10$<i>9<i>10]");
Eliza Margarethae335beb2014-02-27 12:56:14 +000051 return fd;
52 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000053
54
55 private FieldDocument createFieldDoc1 () {
56 FieldDocument fd = new FieldDocument();
Eliza Margarethae335beb2014-02-27 12:56:14 +000057 fd.addString("ID", "doc-1");
margaretha71c66ee2015-12-11 14:39:55 +010058 fd.addTV("base", "eedadaeed",
margaretha4f995582015-12-14 14:14:34 +010059 "[(0-1)s:e|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]"
60 + "[(1-2)s:e|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]"
61 + "[(2-3)s:d|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010062 + "[(3-4)s:a|_4$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +010063 + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>7<i>6<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010064 + "[(5-6)s:a|_6$<i>5<i>6]"
margaretha4f995582015-12-14 14:14:34 +010065 + "[(6-7)s:e|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>9<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010066 + "[(7-8)s:e|_8$<i>7<i>8]" + "[(8-9)s:d|_9$<i>8<i>9]");
Eliza Margarethae335beb2014-02-27 12:56:14 +000067 return fd;
68 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000069
70
71 private FieldDocument createFieldDoc2 () {
72 FieldDocument fd = new FieldDocument();
73 fd.addString("ID", "doc-");
margaretha71c66ee2015-12-11 14:39:55 +010074 fd.addTV("base", "dcacacdac",
margaretha4f995582015-12-14 14:14:34 +010075 "[(0-1)s:d|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]"
76 + "[(1-2)s:c|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]"
77 + "[(2-3)s:a|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010078 + "[(3-4)s:c|_4$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +010079 + "[(4-5)s:a|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010080 + "[(5-6)s:c|_6$<i>5<i>6]"
margaretha4f995582015-12-14 14:14:34 +010081 + "[(6-7)s:d|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]"
82 + "[(7-8)s:a|_8$<i>7<i>8|<>:s$<b>64<i>7<i>9<i>9<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +010083 + "[(8-9)s:c|_9$<i>8<i>9]");
Nils Diewaldbb33da22015-03-04 16:24:25 +000084 return fd;
85 }
86
87
88 /**
89 * Distance Zero, unordered
90 * There is a secondspan on the right side
Eliza Margaretha6f989202016-10-14 21:48:29 +020091 */
Eliza Margarethae335beb2014-02-27 12:56:14 +000092 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +000093 public void testCase1 () throws IOException {
94 ki = new KrillIndex();
95 ki.addDoc(createFieldDoc0());
Eliza Margarethae335beb2014-02-27 12:56:14 +000096 ki.commit();
97 SpanQuery sq;
Nils Diewaldbb33da22015-03-04 16:24:25 +000098 sq = createQuery("s", "s:d", "s:c", 0, 0, false, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +000099 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000100 assertEquals(kr.getTotalResults(), 1);
101 assertEquals(6, kr.getMatch(0).startPos);
102 assertEquals(7, kr.getMatch(0).endPos);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000103 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000104
105
106 /**
107 * There is another firstspan within max distance
108 * Unordered
Eliza Margaretha6f989202016-10-14 21:48:29 +0200109 */
Eliza Margarethae335beb2014-02-27 12:56:14 +0000110 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000111 public void testCase2 () throws IOException {
112 ki = new KrillIndex();
113 ki.addDoc(createFieldDoc0());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000114 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000115 SpanQuery sq;
116
117 sq = createQuery("s", "s:c", "s:d", 0, 0, false, true);
118 kr = ki.search(sq, (short) 10);
119
120 assertEquals(kr.getTotalResults(), 4);
121 assertEquals(0, kr.getMatch(0).startPos);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000122 assertEquals(1, kr.getMatch(0).endPos);
123 assertEquals(2, kr.getMatch(1).startPos);
124 assertEquals(3, kr.getMatch(1).endPos);
125 assertEquals(3, kr.getMatch(2).startPos);
126 assertEquals(4, kr.getMatch(2).endPos);
127 assertEquals(8, kr.getMatch(3).startPos);
128 assertEquals(9, kr.getMatch(3).endPos);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000129 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000130
131
132 /**
133 * Distance 0-1, ordered, unordered
Eliza Margaretha6f989202016-10-14 21:48:29 +0200134 */
Eliza Margarethae335beb2014-02-27 12:56:14 +0000135 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000136 public void testCase3 () throws IOException {
137 ki = new KrillIndex();
138 ki.addDoc(createFieldDoc0());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000139 ki.commit();
140 SpanQuery sq;
141 // unordered
Nils Diewaldbb33da22015-03-04 16:24:25 +0000142 sq = createQuery("s", "s:c", "s:e", 0, 1, false, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000143 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000144 assertEquals(kr.getTotalResults(), 1);
145 assertEquals(5, kr.getMatch(0).startPos);
146 assertEquals(6, kr.getMatch(0).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000147
Eliza Margarethae335beb2014-02-27 12:56:14 +0000148 //ordered
Nils Diewaldbb33da22015-03-04 16:24:25 +0000149 sq = createQuery("s", "s:c", "s:e", 0, 1, true, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000150 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000151 assertEquals(kr.getTotalResults(), 3);
152 assertEquals(2, kr.getMatch(0).startPos);
153 assertEquals(3, kr.getMatch(0).endPos);
154 assertEquals(3, kr.getMatch(1).startPos);
155 assertEquals(4, kr.getMatch(1).endPos);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000156 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000157
158
159 /**
160 * Multiple documents, ordered
161 * No more secondspans, but there is still a firstspan
Eliza Margaretha6f989202016-10-14 21:48:29 +0200162 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000163 @Test
164 public void testCase4 () throws IOException {
165 ki = new KrillIndex();
166 ki.addDoc(createFieldDoc0());
167 ki.addDoc(createFieldDoc1());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000168 ki.commit();
169 SpanQuery sq;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000170
171 sq = createQuery("s", "s:d", "s:e", 1, 1, true, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000172 kr = ki.search(sq, (short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000173
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000174 assertEquals(kr.getTotalResults(), 3);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000175 assertEquals(4, kr.getMatch(0).startPos);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000176 assertEquals(5, kr.getMatch(0).endPos);
177 assertEquals(1, kr.getMatch(1).getLocalDocID());
178 assertEquals(2, kr.getMatch(1).startPos);
179 assertEquals(3, kr.getMatch(1).endPos);
180 assertEquals(8, kr.getMatch(2).startPos);
181 assertEquals(9, kr.getMatch(2).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000182 }
183
184
185 /**
186 * Skip to
Eliza Margaretha6f989202016-10-14 21:48:29 +0200187 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000188 @Test
189 public void testCase5 () throws IOException {
190 ki = new KrillIndex();
191 ki.addDoc(createFieldDoc0());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000192 ki.addDoc(createFieldDoc1());
193 ki.addDoc(createFieldDoc0());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000194 ki.addDoc(createFieldDoc2());
Eliza Margarethae335beb2014-02-27 12:56:14 +0000195 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000196
197 SpanQuery sq = createQuery("s", "s:c", "s:d", 1, 1, false, true);
Eliza Margarethae335beb2014-02-27 12:56:14 +0000198 kr = ki.search(sq, (short) 10);
Nils Diewaldaa5c1d32014-03-20 23:46:55 +0000199
Nils Diewaldbb33da22015-03-04 16:24:25 +0000200 assertEquals(kr.getTotalResults(), 3);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000201 assertEquals(3, kr.getMatch(2).getLocalDocID());
202 assertEquals(3, kr.getMatch(2).startPos);
203 assertEquals(4, kr.getMatch(2).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000204
Eliza Margarethae335beb2014-02-27 12:56:14 +0000205 sq = new SpanNextQuery(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000206 createQuery("s", "s:c", "s:d", 1, 1, false, true),
207 new SpanTermQuery(new Term("base", "s:a")));
208
Eliza Margarethae335beb2014-02-27 12:56:14 +0000209 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000210 assertEquals(kr.getTotalResults(), 1);
211 assertEquals(3, kr.getMatch(0).getLocalDocID());
212 assertEquals(3, kr.getMatch(0).startPos);
213 assertEquals(5, kr.getMatch(0).endPos);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000214 }
Eliza Margarethae335beb2014-02-27 12:56:14 +0000215}