blob: 8497dd59b6b2a193f18443e0319a5b939ec0dc50 [file] [log] [blame]
Eliza Margarethac1960f62014-01-14 12:35:53 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
Eliza Margaretha76592d72014-01-16 16:04:23 +000013import org.slf4j.Logger;
14import org.slf4j.LoggerFactory;
Eliza Margarethac1960f62014-01-14 12:35:53 +000015
Nils Diewalda14ecd62015-02-26 21:00:20 +000016import de.ids_mannheim.korap.KrillIndex;
Eliza Margarethac1960f62014-01-14 12:35:53 +000017import de.ids_mannheim.korap.query.SpanElementQuery;
18import de.ids_mannheim.korap.query.SpanNextQuery;
19import de.ids_mannheim.korap.query.SpanSegmentQuery;
margaretha71c66ee2015-12-11 14:39:55 +010020import de.ids_mannheim.korap.response.Result;
Eliza Margarethac1960f62014-01-14 12:35:53 +000021
22
23@RunWith(JUnit4.class)
24public class TestSegmentIndex {
Nils Diewaldbb33da22015-03-04 16:24:25 +000025 private SpanQuery sq;
26 private KrillIndex ki;
27 private Result kr;
28 private FieldDocument fd;
29 private Logger log;
Eliza Margarethac1960f62014-01-14 12:35:53 +000030
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000031
Nils Diewaldbb33da22015-03-04 16:24:25 +000032 public TestSegmentIndex () throws IOException {
33 ki = new KrillIndex();
34 ki.addDoc(createFieldDoc0());
35 ki.addDoc(createFieldDoc1());
36 ki.addDoc(createFieldDoc2());
37 ki.commit();
38
39 log = LoggerFactory.getLogger(getClass());
40 }
41
42
43 /** Multiple matches in one document. */
44 @Test
45 public void testCase1 () throws IOException {
46 sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:b")),
47 new SpanTermQuery(new Term("base", "s:c")));
48
49 kr = ki.search(sq, (short) 10);
50 ki.close();
51
52 assertEquals("totalResults", kr.getTotalResults(), 3);
53 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
54 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
55 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
56 assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
57 }
58
59
60 /**
61 * Matches in multiple documents.
62 * Ensure the same document. The current secondspan is skipped to
63 * the doc number of the firstspan.
64 */
65 @Test
66 public void testCase2 () throws IOException {
67 // log.trace("Testcase2");
68 sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:a")),
69 new SpanTermQuery(new Term("base", "s:b")));
70
71 kr = ki.search(sq, (short) 10);
72 ki.close();
73
74 assertEquals("totalResults", kr.getTotalResults(), 3);
75 // Match #0
76 assertEquals("doc-number", 1, kr.getMatch(0).getLocalDocID());
77 assertEquals("StartPos", 1, kr.getMatch(0).startPos);
78 assertEquals("EndPos", 2, kr.getMatch(0).endPos);
79 // Match #2
80 assertEquals("doc-number", 2, kr.getMatch(2).getLocalDocID());
81 assertEquals("StartPos", 2, kr.getMatch(2).startPos);
82 assertEquals("EndPos", 3, kr.getMatch(2).endPos);
83 }
84
85
86 /** Ensure the same document, skip to a greater doc number */
87 @Test
88 public void testCase3 () throws IOException {
89 // log.trace("Testcase3");
90 sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:d")),
91 new SpanTermQuery(new Term("base", "s:b")));
92
93 kr = ki.search(sq, (short) 10);
94 ki.close();
95
96 assertEquals("totalResults", kr.getTotalResults(), 1);
97 assertEquals("doc-number", 2, kr.getMatch(0).getLocalDocID());
98 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
99 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
100 }
101
102
103 /**
104 * Matching a SpanElementQuery and a SpanNextQuery
105 * Multiple atomic indices
Eliza Margaretha6f989202016-10-14 21:48:29 +0200106 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000107 @Test
108 public void testCase4 () throws IOException {
109 // log.trace("Testcase4");
110
111 ki = new KrillIndex();
112 ki.addDoc(createFieldDoc0());
113 ki.commit();
114 ki.addDoc(createFieldDoc1());
115 ki.addDoc(createFieldDoc2());
116 ki.commit();
117
118 sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"),
119 new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")),
120 new SpanTermQuery(new Term("base", "s:b"))));
121
122 kr = ki.search(sq, (short) 10);
123 ki.close();
124
125 assertEquals("totalResults", kr.getTotalResults(), 2);
126 // Match #0
127 assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
128 assertEquals("StartPos", 3, kr.getMatch(0).startPos);
129 assertEquals("EndPos", 5, kr.getMatch(0).endPos);
130 // Match #1
131 assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID());
132 assertEquals("StartPos", 1, kr.getMatch(1).startPos);
133 assertEquals("EndPos", 3, kr.getMatch(1).endPos);
134 }
135
136
137 /** Matching SpanElementQueries */
138 @Test
139 public void testCase5 () throws IOException {
140 // log.trace("Testcase5");
141 sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"),
142 new SpanElementQuery("base", "e2"));
143
144 kr = ki.search(sq, (short) 10);
145 ki.close();
146
147 assertEquals("totalResults", kr.getTotalResults(), 1);
148 // Match #0
149 assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
150 assertEquals("StartPos", 3, kr.getMatch(0).startPos);
151 assertEquals("EndPos", 5, kr.getMatch(0).endPos);
152 }
153
154
155 /** Skip to SegmentSpan */
156 @Test
157 public void testcase6 () throws IOException {
158 ki.addDoc(createFieldDoc4());
159 ki.commit();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200160 sq = new SpanNextQuery(
161 new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:b")),
162 new SpanTermQuery(new Term("base", "s:c"))),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000163 new SpanTermQuery(new Term("base", "s:d")));
164
165 kr = ki.search(sq, (short) 10);
166 ki.close();
167
168 assertEquals("totalResults", kr.getTotalResults(), 2);
169 // Match #0
170 assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
171 assertEquals("StartPos (0)", 4, kr.getMatch(0).startPos);
172 assertEquals("EndPos (0)", 6, kr.getMatch(0).endPos);
173 // Match #1 in the other atomic index
174 assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID());
175 assertEquals("StartPos (0)", 0, kr.getMatch(1).startPos);
176 assertEquals("EndPos (0)", 2, kr.getMatch(1).endPos);
177 }
178
Akron6759b042016-04-28 01:25:00 +0200179
Nils Diewaldbb33da22015-03-04 16:24:25 +0000180 private FieldDocument createFieldDoc0 () {
181 fd = new FieldDocument();
182 fd.addString("ID", "doc-0");
margaretha71c66ee2015-12-11 14:39:55 +0100183 fd.addTV("base", "bcbabd", "[(0-1)s:b|i:b|_1$<i>0<i>1]"
184 + "[(1-2)s:c|i:c|s:b|_2$<i>1<i>2]"
margaretha4f995582015-12-14 14:14:34 +0100185 + "[(2-3)s:b|i:b|_3$<i>2<i>3|<>:e$<b>64<i>2<i>4<i>4<b>0]"
186 + "[(3-4)s:a|i:a|_4$<i>3<i>4|<>:e$<b>64<i>3<i>5<i>5<b>0|"
187 + "<>:e2$<b>64<i>3<i>5<i>5<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +0100188 + "[(4-5)s:b|i:b|s:c|_5$<i>4<i>5]"
margaretha4f995582015-12-14 14:14:34 +0100189 + "[(5-6)s:d|i:d|_6$<i>5<i>6|<>:e2$<b>64<i>5<i>6<i>6<b>0]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000190 return fd;
191 }
192
193
194 private FieldDocument createFieldDoc1 () {
195 fd = new FieldDocument();
196 fd.addString("ID", "doc-1");
margaretha71c66ee2015-12-11 14:39:55 +0100197 fd.addTV("base", "babaa", "[(0-1)s:b|i:b|s:c|_1$<i>0<i>1]"
margaretha4f995582015-12-14 14:14:34 +0100198 + "[(1-2)s:a|i:a|s:b|_2$<i>1<i>2|<>:e$<b>64<i>1<i>3<i>3<b>0]"
margaretha71c66ee2015-12-11 14:39:55 +0100199 + "[(2-3)s:b|i:b|s:a|_3$<i>2<i>3]"
200 + "[(3-4)s:a|i:a|_4$<i>3<i>4]" + "[(4-5)s:a|i:a|_5$<i>4<i>5]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000201 return fd;
202 }
203
204
205 private FieldDocument createFieldDoc2 () {
206 fd = new FieldDocument();
207 fd.addString("ID", "doc-2");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200208 fd.addTV("base", "bdb",
209 "[(0-1)s:b|i:b|_1$<i>0<i>1]" + "[(1-2)s:d|i:d|s:b|_2$<i>1<i>2]"
210 + "[(2-3)s:b|i:b|s:a|_3$<i>2<i>3]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000211 return fd;
212 }
213
214
215 private FieldDocument createFieldDoc4 () {
216 fd = new FieldDocument();
217 fd.addString("ID", "doc-4");
margaretha71c66ee2015-12-11 14:39:55 +0100218 fd.addTV("base", "bdb", "[(0-1)s:b|i:b|s:c|_1$<i>0<i>1]"
219 + "[(1-2)s:d|_2$<i>1<i>2]" + "[(2-3)s:d|i:d|_3$<i>2<i>3]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000220 return fd;
221 }
Eliza Margarethac1960f62014-01-14 12:35:53 +0000222}