blob: 0ef4ae52d66cf328109e35b63fea445bd37775cc [file] [log] [blame]
Eliza Margaretha609fcc62014-02-13 14:10:20 +00001package de.ids_mannheim.korap.index;
2
margaretha69726b12015-12-10 12:03:19 +01003import static org.junit.Assert.assertEquals;
Eliza Margaretha63926cb2014-02-13 16:54:23 +00004
Eliza Margaretha609fcc62014-02-13 14:10:20 +00005import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11
Nils Diewalda14ecd62015-02-26 21:00:20 +000012import de.ids_mannheim.korap.KrillIndex;
Eliza Margarethad4693462014-03-17 13:16:18 +000013import de.ids_mannheim.korap.query.DistanceConstraint;
Eliza Margaretha609fcc62014-02-13 14:10:20 +000014import de.ids_mannheim.korap.query.SpanDistanceQuery;
Eliza Margaretha63926cb2014-02-13 16:54:23 +000015import de.ids_mannheim.korap.query.SpanElementQuery;
Eliza Margaretha38a94662014-11-20 13:48:00 +000016import de.ids_mannheim.korap.query.SpanNextQuery;
margaretha69726b12015-12-10 12:03:19 +010017import de.ids_mannheim.korap.response.Result;
Eliza Margaretha609fcc62014-02-13 14:10:20 +000018
19public class TestDistanceExclusionIndex {
20
Nils Diewalda14ecd62015-02-26 21:00:20 +000021 private KrillIndex ki;
Nils Diewaldbb33da22015-03-04 16:24:25 +000022 private Result kr;
Eliza Margaretha609fcc62014-02-13 14:10:20 +000023
Nils Diewaldbb33da22015-03-04 16:24:25 +000024
25 /**
26 * Ordered, unordered
Eliza Margaretha6f989202016-10-14 21:48:29 +020027 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000028 @Test
29 public void testCase1 () throws IOException {
30 ki = new KrillIndex();
31 ki.addDoc(createFieldDoc0());
Eliza Margaretha63926cb2014-02-13 16:54:23 +000032 ki.commit();
33 SpanQuery sq;
Eliza Margaretha38a94662014-11-20 13:48:00 +000034 //ordered distance 0 to 1
Nils Diewaldbb33da22015-03-04 16:24:25 +000035 sq = createQuery("s:c", "s:e", 0, 1, true);
36 kr = ki.search(sq, (short) 10);
37 assertEquals((long) 3, kr.getTotalResults());
38 assertEquals(2, kr.getMatch(0).getStartPos());
39 assertEquals(3, kr.getMatch(0).getEndPos());
40 assertEquals(3, kr.getMatch(1).getStartPos());
41 assertEquals(4, kr.getMatch(1).getEndPos());
42 assertEquals(5, kr.getMatch(2).getStartPos());
43 assertEquals(6, kr.getMatch(2).getEndPos());
44
Eliza Margaretha63926cb2014-02-13 16:54:23 +000045 // Unordered
Nils Diewaldbb33da22015-03-04 16:24:25 +000046 sq = createQuery("s:c", "s:e", 0, 1, false);
Eliza Margaretha63926cb2014-02-13 16:54:23 +000047 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +000048 assertEquals((long) 2, kr.getTotalResults());
Eliza Margaretha63926cb2014-02-13 16:54:23 +000049 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000050
51
52 /**
53 * Multiple docs, unordered
54 * No more secondSpans
Eliza Margaretha6f989202016-10-14 21:48:29 +020055 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000056 @Test
57 public void testCase2 () throws IOException {
58 ki = new KrillIndex();
59 ki.addDoc(createFieldDoc0());
Eliza Margaretha609fcc62014-02-13 14:10:20 +000060 ki.addDoc(createFieldDoc1());
61 ki.commit();
62 SpanQuery sq;
63 // ---- Distance 0 to 1
Nils Diewaldbb33da22015-03-04 16:24:25 +000064 sq = createQuery("s:c", "s:e", 0, 1, false);
Eliza Margaretha63926cb2014-02-13 16:54:23 +000065 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +000066 assertEquals((long) 5, kr.getTotalResults());
Nils Diewaldbb33da22015-03-04 16:24:25 +000067 assertEquals(1, kr.getMatch(3).getLocalDocID());
68 }
69
70
71 /**
72 * Secondspans' document number is bigger than firstspans'
73 * Actual distance is smaller than min distance.
Eliza Margaretha6f989202016-10-14 21:48:29 +020074 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000075 @Test
76 public void testCase3 () throws IOException {
77 ki = new KrillIndex();
78 ki.addDoc(createFieldDoc1());
79 ki.addDoc(createFieldDoc0());
Eliza Margaretha63926cb2014-02-13 16:54:23 +000080 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +000081
Eliza Margaretha63926cb2014-02-13 16:54:23 +000082 SpanQuery sq;
83 // Unordered
Nils Diewaldbb33da22015-03-04 16:24:25 +000084 sq = createQuery("s:c", "s:e", 2, 2, false);
85 kr = ki.search(sq, (short) 10);
86 assertEquals((long) 5, kr.getTotalResults());
87 }
88
89
90 /**
91 * Unordered: firstspan in on the right side of the secondspan,
92 * but within max distance.
Eliza Margaretha6f989202016-10-14 21:48:29 +020093 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000094 @Test
95 public void testCase4 () throws IOException {
96 ki = new KrillIndex();
97 ki.addDoc(createFieldDoc2());
98 ki.commit();
99
100 SpanQuery sq;
101 // Unordered
102 sq = createQuery("s:b", "s:c", 2, 2, false);
Eliza Margaretha63926cb2014-02-13 16:54:23 +0000103 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000104 assertEquals((long) 1, kr.getTotalResults());
105 assertEquals(1, kr.getMatch(0).getStartPos());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000106 assertEquals(2, kr.getMatch(0).getEndPos());
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000107 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000108
109
110 /**
111 * Element queries
Eliza Margaretha6f989202016-10-14 21:48:29 +0200112 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000113 @Test
114 public void testCase5 () throws IOException {
115 ki = new KrillIndex();
116 ki.addDoc(createFieldDoc0());
Eliza Margaretha63926cb2014-02-13 16:54:23 +0000117 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000118
119 SpanDistanceQuery sq;
120 sq = new SpanDistanceQuery(new SpanElementQuery("base", "x"),
Eliza Margaretha6f989202016-10-14 21:48:29 +0200121 new SpanElementQuery("base", "y"),
122 new DistanceConstraint(0, 1, false, true), true);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000123
Eliza Margaretha63926cb2014-02-13 16:54:23 +0000124 kr = ki.search(sq, (short) 10);
margaretha69726b12015-12-10 12:03:19 +0100125
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000126 assertEquals((long) 1, kr.getTotalResults());
127 assertEquals(9, kr.getMatch(0).getStartPos());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000128 assertEquals(10, kr.getMatch(0).getEndPos());
Eliza Margaretha38a94662014-11-20 13:48:00 +0000129 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000130
131
132 // Add skipTo test
133 @Test
134 public void testCase6 () throws IOException {
135 ki = new KrillIndex();
136 ki.addDoc(createFieldDoc1());
137 ki.addDoc(createFieldDoc2());
Eliza Margaretha38a94662014-11-20 13:48:00 +0000138 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000139
Eliza Margaretha38a94662014-11-20 13:48:00 +0000140 SpanQuery sq;
141 //ordered distance 0 to 1
Nils Diewaldbb33da22015-03-04 16:24:25 +0000142 sq = createQuery("s:d", "s:b", 0, 1, true);
Eliza Margaretha38a94662014-11-20 13:48:00 +0000143 kr = ki.search(sq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000144 assertEquals((long) 4, kr.getTotalResults());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000145
146 SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
147 kr = ki.search(stq, (short) 10);
148 assertEquals((long) 6, kr.getTotalResults());
149
150 SpanNextQuery snq = new SpanNextQuery(stq, sq);
Eliza Margaretha38a94662014-11-20 13:48:00 +0000151 kr = ki.search(snq, (short) 10);
Nils Diewalde1ecd5e2014-11-27 02:17:24 +0000152 assertEquals((long) 2, kr.getTotalResults());
153 assertEquals(3, kr.getMatch(0).getStartPos());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000154 assertEquals(5, kr.getMatch(0).getEndPos());
155 assertEquals(8, kr.getMatch(1).getStartPos());
156 assertEquals(10, kr.getMatch(1).getEndPos());
157
Eliza Margaretha38a94662014-11-20 13:48:00 +0000158 /*System.out.print(kr.getTotalResults()+"\n");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000159 for (int i=0; i< kr.getTotalResults(); i++){
160 System.out.println(
161 kr.getMatch(i).getLocalDocID()+" "+
162 kr.getMatch(i).startPos + " " +
163 kr.getMatch(i).endPos
164 );
165 }*/
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000166 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000167
168
169 private SpanQuery createQuery (String x, String y, int min, int max,
170 boolean isOrdered) {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200171 SpanDistanceQuery sq = new SpanDistanceQuery(
172 new SpanTermQuery(new Term("base", x)),
173 new SpanTermQuery(new Term("base", y)),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000174 new DistanceConstraint(min, max, isOrdered, true), true);
175 return sq;
176 }
177
178
179 private FieldDocument createFieldDoc0 () {
180 FieldDocument fd = new FieldDocument();
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000181 fd.addString("ID", "doc-0");
margaretha4f995582015-12-14 14:14:34 +0100182 fd.addTV("base", "text", "[(0-1)s:c|_1$<i>0<i>1]"
183 + "[(1-2)s:e|_2$<i>1<i>2]"
184 + "[(2-3)s:c|_3$<i>2<i>3|<>:y$<b>64<i>2<i>4<i>4<b>0]"
185 + "[(3-4)s:c|_4$<i>3<i>4|<>:x$<b>64<i>3<i>7<i>7<b>0]"
186 + "[(4-5)s:d|_5$<i>4<i>5|<>:y$<b>64<i>4<i>6<i>6<b>0]"
187 + "[(5-6)s:c|_6$<i>5<i>6|<>:y$<b>64<i>5<i>8<i>8<b>0]"
188 + "[(6-7)s:d|_7$<i>6<i>7]"
189 + "[(7-8)s:e|_8$<i>7<i>8|<>:x$<b>64<i>7<i>9<i>9<b>0]"
190 + "[(8-9)s:e|_9$<i>8<i>9]"
191 + "[(9-10)s:d|_10$<i>9<i>10|<>:x$<b>64<i>9<i>10<i>10<b>0]");
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000192 return fd;
193 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000194
195
196 private FieldDocument createFieldDoc1 () {
197 FieldDocument fd = new FieldDocument();
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000198 fd.addString("ID", "doc-1");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200199 fd.addTV("base", "text",
200 "[(0-1)s:b|s:c|_1$<i>0<i>1]" + "[(1-2)s:b|_2$<i>1<i>2]"
201 + "[(2-3)s:c|_3$<i>2<i>3]" + "[(3-4)s:c|_4$<i>3<i>4]"
202 + "[(4-5)s:d|_5$<i>4<i>5]" + "[(5-6)s:d|_6$<i>5<i>6]");
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000203 return fd;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000204 }
205
206
207 private FieldDocument createFieldDoc2 () {
208 FieldDocument fd = new FieldDocument();
Eliza Margaretha63926cb2014-02-13 16:54:23 +0000209 fd.addString("ID", "doc-2");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200210 fd.addTV("base", "text",
211 "[(0-1)s:b|_1$<i>0<i>1]" + "[(1-2)s:b|_2$<i>1<i>2]"
212 + "[(2-3)s:c|_3$<i>2<i>3]" + "[(3-4)s:c|_4$<i>3<i>4]"
213 + "[(4-5)s:b|_5$<i>4<i>5]" + "[(5-6)s:d|_6$<i>5<i>6]"
214 + "[(6-7)s:b|_7$<i>6<i>7]" + "[(7-8)s:d|_8$<i>7<i>8]"
215 + "[(8-9)s:c|_9$<i>8<i>9]"
216 + "[(9-10)s:d|_10$<i>9<i>10]");
Eliza Margaretha63926cb2014-02-13 16:54:23 +0000217 return fd;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000218 }
219
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000220}