blob: 2b52d9de4ef6b1cb72c1bd81933659348a169005 [file] [log] [blame]
Eliza Margarethac1960f62014-01-14 12:35:53 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
Eliza Margaretha76592d72014-01-16 16:04:23 +000013import org.slf4j.Logger;
14import org.slf4j.LoggerFactory;
Eliza Margarethac1960f62014-01-14 12:35:53 +000015
16import de.ids_mannheim.korap.KorapIndex;
17import de.ids_mannheim.korap.KorapResult;
18import de.ids_mannheim.korap.query.SpanElementQuery;
19import de.ids_mannheim.korap.query.SpanNextQuery;
20import de.ids_mannheim.korap.query.SpanSegmentQuery;
21
22
23@RunWith(JUnit4.class)
24public class TestSegmentIndex {
25 private SpanQuery sq;
26 private KorapIndex ki;
27 private KorapResult kr;
28 private FieldDocument fd;
Eliza Margaretha76592d72014-01-16 16:04:23 +000029 private Logger log;
Eliza Margarethac1960f62014-01-14 12:35:53 +000030
31 public TestSegmentIndex() throws IOException {
32 ki = new KorapIndex();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000033 ki.addDoc(createFieldDoc0());
Eliza Margarethac1960f62014-01-14 12:35:53 +000034 ki.addDoc(createFieldDoc1());
35 ki.addDoc(createFieldDoc2());
Eliza Margarethac1960f62014-01-14 12:35:53 +000036 ki.commit();
Eliza Margaretha76592d72014-01-16 16:04:23 +000037
38 log = LoggerFactory.getLogger(getClass());
Eliza Margarethac1960f62014-01-14 12:35:53 +000039 }
40
41 /** Multiple matches in one document. */
42 @Test
43 public void testCase1() throws IOException {
Eliza Margaretha76592d72014-01-16 16:04:23 +000044// log.trace("Testcase1");
Eliza Margarethac1960f62014-01-14 12:35:53 +000045 sq = new SpanSegmentQuery(
46 new SpanTermQuery(new Term("base","s:b")),
47 new SpanTermQuery(new Term("base","s:c"))
48 );
49
50 kr = ki.search(sq, (short) 10);
51 ki.close();
52
53 assertEquals("totalResults", 2, kr.totalResults());
54 assertEquals("StartPos (0)", 1, kr.match(0).startPos);
55 assertEquals("EndPos (0)", 2, kr.match(0).endPos);
56 assertEquals("StartPos (1)", 4, kr.match(1).startPos);
57 assertEquals("EndPos (1)", 5, kr.match(1).endPos);
58 }
59
60 /** Matches in multiple documents.
61 * Ensure the same document. The current secondspan is skipped to
62 * the doc number of the firstspan. */
63 @Test
Eliza Margaretha76592d72014-01-16 16:04:23 +000064 public void testCase2() throws IOException {
65// log.trace("Testcase2");
Eliza Margarethac1960f62014-01-14 12:35:53 +000066 sq = new SpanSegmentQuery(
67 new SpanTermQuery(new Term("base","s:a")),
68 new SpanTermQuery(new Term("base","s:b"))
69 );
70
71 kr = ki.search(sq, (short) 10);
72 ki.close();
73
74 assertEquals("totalResults", 3, kr.totalResults());
75 // Match #0
Eliza Margarethaf7bbb262014-01-14 17:17:29 +000076 assertEquals("doc-number", 1, kr.match(0).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +000077 assertEquals("StartPos", 1, kr.match(0).startPos);
78 assertEquals("EndPos", 2, kr.match(0).endPos);
79 // Match #2
Eliza Margarethaf7bbb262014-01-14 17:17:29 +000080 assertEquals("doc-number", 2, kr.match(2).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +000081 assertEquals("StartPos", 2, kr.match(2).startPos);
82 assertEquals("EndPos", 3, kr.match(2).endPos);
83 }
84
85
86 /** Ensure the same document, skip to a greater doc number */
87 @Test
88 public void testCase3() throws IOException{
Eliza Margaretha76592d72014-01-16 16:04:23 +000089// log.trace("Testcase3");
Eliza Margarethac1960f62014-01-14 12:35:53 +000090 sq = new SpanSegmentQuery(
91 new SpanTermQuery(new Term("base","s:d")),
92 new SpanTermQuery(new Term("base","s:b"))
93 );
94
95 kr = ki.search(sq, (short) 10);
96 ki.close();
97
98 assertEquals("totalResults", 1, kr.totalResults());
Eliza Margarethaf7bbb262014-01-14 17:17:29 +000099 assertEquals("doc-number", 2, kr.match(0).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +0000100 assertEquals("StartPos (0)", 1, kr.match(0).startPos);
101 assertEquals("EndPos (0)", 2, kr.match(0).endPos);
102 }
103
Eliza Margaretha76592d72014-01-16 16:04:23 +0000104 /** Matching a SpanElementQuery and a SpanNextQuery
105 * Multiple atomic indices
106 * */
Eliza Margarethac1960f62014-01-14 12:35:53 +0000107 @Test
108 public void testCase4() throws IOException{
Eliza Margaretha76592d72014-01-16 16:04:23 +0000109// log.trace("Testcase4");
110
111 ki = new KorapIndex();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000112 ki.addDoc(createFieldDoc0());
Eliza Margaretha76592d72014-01-16 16:04:23 +0000113 ki.commit();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000114 ki.addDoc(createFieldDoc1());
115 ki.addDoc(createFieldDoc2());
Eliza Margaretha76592d72014-01-16 16:04:23 +0000116 ki.commit();
117
Eliza Margarethac1960f62014-01-14 12:35:53 +0000118 sq = new SpanSegmentQuery(
119 new SpanElementQuery("base","e"),
120 new SpanNextQuery(
121 new SpanTermQuery(new Term("base","s:a")),
122 new SpanTermQuery(new Term("base","s:b"))
123 )
124 );
125
126 kr = ki.search(sq, (short) 10);
127 ki.close();
128
129 assertEquals("totalResults", 2, kr.totalResults());
130 // Match #0
Eliza Margarethaf7bbb262014-01-14 17:17:29 +0000131 assertEquals("doc-number", 0, kr.match(0).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +0000132 assertEquals("StartPos", 3, kr.match(0).startPos);
133 assertEquals("EndPos", 5, kr.match(0).endPos);
134 // Match #1
Eliza Margaretha76592d72014-01-16 16:04:23 +0000135 assertEquals("doc-number", 0, kr.match(1).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +0000136 assertEquals("StartPos", 1, kr.match(1).startPos);
137 assertEquals("EndPos", 3, kr.match(1).endPos);
138 }
139
Eliza Margaretha76592d72014-01-16 16:04:23 +0000140 /** Matching SpanElementQueries */
Eliza Margarethac1960f62014-01-14 12:35:53 +0000141 @Test
142 public void testCase5() throws IOException{
Eliza Margaretha76592d72014-01-16 16:04:23 +0000143// log.trace("Testcase5");
Eliza Margarethac1960f62014-01-14 12:35:53 +0000144 sq = new SpanSegmentQuery(
145 new SpanElementQuery("base","e"),
146 new SpanElementQuery("base","e2")
147 );
148
149 kr = ki.search(sq, (short) 10);
Eliza Margarethabefc23f2014-01-20 14:34:15 +0000150 ki.close();
Eliza Margarethac1960f62014-01-14 12:35:53 +0000151
152 assertEquals("totalResults", 1, kr.totalResults());
153 // Match #0
Eliza Margarethaf7bbb262014-01-14 17:17:29 +0000154 assertEquals("doc-number", 0, kr.match(0).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +0000155 assertEquals("StartPos", 3, kr.match(0).startPos);
156 assertEquals("EndPos", 5, kr.match(0).endPos);
157 }
Eliza Margarethabefc23f2014-01-20 14:34:15 +0000158
159 /** Skip to SegmentSpan */
160 @Test
161 public void testcase6() throws IOException{
162 ki.addDoc(createFieldDoc4());
163 ki.commit();
164 sq = new SpanNextQuery(
165 new SpanSegmentQuery(
166 new SpanTermQuery(new Term("base","s:b")),
167 new SpanTermQuery(new Term("base","s:c"))
168 ),
169 new SpanTermQuery(new Term("base","s:d"))
170 );
171
172 kr = ki.search(sq, (short) 10);
173 ki.close();
174
175 assertEquals("totalResults", 2, kr.totalResults());
176 // Match #0
177 assertEquals("doc-number", 0, kr.match(0).getLocalDocID());
178 assertEquals("StartPos (0)", 4, kr.match(0).startPos);
179 assertEquals("EndPos (0)", 6, kr.match(0).endPos);
180 // Match #1 in the other atomic index
181 assertEquals("doc-number", 0, kr.match(1).getLocalDocID());
182 assertEquals("StartPos (0)", 0, kr.match(1).startPos);
183 assertEquals("EndPos (0)", 2, kr.match(1).endPos);
184 }
Eliza Margarethac1960f62014-01-14 12:35:53 +0000185
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000186 private FieldDocument createFieldDoc0(){
Eliza Margarethac1960f62014-01-14 12:35:53 +0000187 fd = new FieldDocument();
188 fd.addString("ID", "doc-0");
189 fd.addTV("base",
190 "bcbabd",
191 "[(0-1)s:b|i:b|_1#0-1]" +
192 "[(1-2)s:c|i:c|s:b|_2#1-2]" +
193 "[(2-3)s:b|i:b|_3#2-3|<>:e#2-4$<i>4]" +
194 "[(3-4)s:a|i:a|_4#3-4|<>:e#3-5$<i>5|<>:e2#3-5$<i>5]" +
195 "[(4-5)s:b|i:b|s:c|_5#4-5]" +
196 "[(5-6)s:d|i:d|_6#5-6|<>:e2#5-6$<i>6]");
197 return fd;
198 }
199
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000200 private FieldDocument createFieldDoc1(){
Eliza Margarethac1960f62014-01-14 12:35:53 +0000201 fd = new FieldDocument();
202 fd.addString("ID", "doc-1");
203 fd.addTV("base",
204 "babaa",
Eliza Margarethabefc23f2014-01-20 14:34:15 +0000205 "[(0-1)s:b|i:b|s:c_1#0-1]" +
Eliza Margarethac1960f62014-01-14 12:35:53 +0000206 "[(1-2)s:a|i:a|s:b|_2#1-2|<>:e#1-3$<i>3]" +
207 "[(2-3)s:b|i:b|s:a|_3#2-3]" +
208 "[(3-4)s:a|i:a|_4#3-4]" +
209 "[(4-5)s:a|i:a|_5#4-5]");
210 return fd;
211 }
212
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000213 private FieldDocument createFieldDoc2(){
Eliza Margarethac1960f62014-01-14 12:35:53 +0000214 fd = new FieldDocument();
215 fd.addString("ID", "doc-2");
216 fd.addTV("base",
217 "bdb",
218 "[(0-1)s:b|i:b|_1#0-1]" +
219 "[(1-2)s:d|i:d|s:b|_2#1-2]"+
220 "[(2-3)s:b|i:b|s:a|_3#2-3]");
221 return fd;
222 }
Eliza Margarethabefc23f2014-01-20 14:34:15 +0000223
224 private FieldDocument createFieldDoc4(){
225 fd = new FieldDocument();
226 fd.addString("ID", "doc-3");
227 fd.addTV("base",
228 "bdb",
229 "[(0-1)s:b|i:b|s:c|_1#0-1]" +
230 "[(1-2)s:d|_2#1-2]"+
231 "[(2-3)s:d|i:d|_3#2-3]");
232 return fd;
233 }
Eliza Margarethac1960f62014-01-14 12:35:53 +0000234}