blob: adc550b0f8d7cc014f5de67c126978886d77a6fe [file] [log] [blame]
Eliza Margarethac1960f62014-01-14 12:35:53 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
Eliza Margaretha76592d72014-01-16 16:04:23 +000013import org.slf4j.Logger;
14import org.slf4j.LoggerFactory;
Eliza Margarethac1960f62014-01-14 12:35:53 +000015
16import de.ids_mannheim.korap.KorapIndex;
17import de.ids_mannheim.korap.KorapResult;
18import de.ids_mannheim.korap.query.SpanElementQuery;
19import de.ids_mannheim.korap.query.SpanNextQuery;
20import de.ids_mannheim.korap.query.SpanSegmentQuery;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000021import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
22import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
Eliza Margarethac1960f62014-01-14 12:35:53 +000023
24
25@RunWith(JUnit4.class)
26public class TestSegmentIndex {
27 private SpanQuery sq;
28 private KorapIndex ki;
29 private KorapResult kr;
30 private FieldDocument fd;
Eliza Margaretha76592d72014-01-16 16:04:23 +000031 private Logger log;
Eliza Margarethac1960f62014-01-14 12:35:53 +000032
33 public TestSegmentIndex() throws IOException {
34 ki = new KorapIndex();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000035 ki.addDoc(createFieldDoc0());
Eliza Margarethac1960f62014-01-14 12:35:53 +000036 ki.addDoc(createFieldDoc1());
37 ki.addDoc(createFieldDoc2());
Eliza Margarethac1960f62014-01-14 12:35:53 +000038 ki.commit();
Eliza Margaretha76592d72014-01-16 16:04:23 +000039
40 log = LoggerFactory.getLogger(getClass());
Eliza Margarethac1960f62014-01-14 12:35:53 +000041 }
42
43 /** Multiple matches in one document. */
44 @Test
45 public void testCase1() throws IOException {
Eliza Margaretha76592d72014-01-16 16:04:23 +000046// log.trace("Testcase1");
Eliza Margarethac1960f62014-01-14 12:35:53 +000047 sq = new SpanSegmentQuery(
48 new SpanTermQuery(new Term("base","s:b")),
49 new SpanTermQuery(new Term("base","s:c"))
50 );
51
52 kr = ki.search(sq, (short) 10);
53 ki.close();
54
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000055 assertEquals("totalResults", 3, kr.totalResults());
Eliza Margarethac1960f62014-01-14 12:35:53 +000056 assertEquals("StartPos (0)", 1, kr.match(0).startPos);
57 assertEquals("EndPos (0)", 2, kr.match(0).endPos);
58 assertEquals("StartPos (1)", 4, kr.match(1).startPos);
59 assertEquals("EndPos (1)", 5, kr.match(1).endPos);
60 }
61
62 /** Matches in multiple documents.
63 * Ensure the same document. The current secondspan is skipped to
64 * the doc number of the firstspan. */
65 @Test
Eliza Margaretha76592d72014-01-16 16:04:23 +000066 public void testCase2() throws IOException {
67// log.trace("Testcase2");
Eliza Margarethac1960f62014-01-14 12:35:53 +000068 sq = new SpanSegmentQuery(
69 new SpanTermQuery(new Term("base","s:a")),
70 new SpanTermQuery(new Term("base","s:b"))
71 );
72
73 kr = ki.search(sq, (short) 10);
74 ki.close();
75
76 assertEquals("totalResults", 3, kr.totalResults());
77 // Match #0
Eliza Margarethaf7bbb262014-01-14 17:17:29 +000078 assertEquals("doc-number", 1, kr.match(0).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +000079 assertEquals("StartPos", 1, kr.match(0).startPos);
80 assertEquals("EndPos", 2, kr.match(0).endPos);
81 // Match #2
Eliza Margarethaf7bbb262014-01-14 17:17:29 +000082 assertEquals("doc-number", 2, kr.match(2).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +000083 assertEquals("StartPos", 2, kr.match(2).startPos);
84 assertEquals("EndPos", 3, kr.match(2).endPos);
85 }
86
87
88 /** Ensure the same document, skip to a greater doc number */
89 @Test
90 public void testCase3() throws IOException{
Eliza Margaretha76592d72014-01-16 16:04:23 +000091// log.trace("Testcase3");
Eliza Margarethac1960f62014-01-14 12:35:53 +000092 sq = new SpanSegmentQuery(
93 new SpanTermQuery(new Term("base","s:d")),
94 new SpanTermQuery(new Term("base","s:b"))
95 );
96
97 kr = ki.search(sq, (short) 10);
98 ki.close();
99
100 assertEquals("totalResults", 1, kr.totalResults());
Eliza Margarethaf7bbb262014-01-14 17:17:29 +0000101 assertEquals("doc-number", 2, kr.match(0).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +0000102 assertEquals("StartPos (0)", 1, kr.match(0).startPos);
103 assertEquals("EndPos (0)", 2, kr.match(0).endPos);
104 }
105
Eliza Margaretha76592d72014-01-16 16:04:23 +0000106 /** Matching a SpanElementQuery and a SpanNextQuery
107 * Multiple atomic indices
108 * */
Eliza Margarethac1960f62014-01-14 12:35:53 +0000109 @Test
110 public void testCase4() throws IOException{
Eliza Margaretha76592d72014-01-16 16:04:23 +0000111// log.trace("Testcase4");
112
113 ki = new KorapIndex();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000114 ki.addDoc(createFieldDoc0());
Eliza Margaretha76592d72014-01-16 16:04:23 +0000115 ki.commit();
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000116 ki.addDoc(createFieldDoc1());
117 ki.addDoc(createFieldDoc2());
Eliza Margaretha76592d72014-01-16 16:04:23 +0000118 ki.commit();
119
Eliza Margarethac1960f62014-01-14 12:35:53 +0000120 sq = new SpanSegmentQuery(
121 new SpanElementQuery("base","e"),
122 new SpanNextQuery(
123 new SpanTermQuery(new Term("base","s:a")),
124 new SpanTermQuery(new Term("base","s:b"))
125 )
126 );
127
128 kr = ki.search(sq, (short) 10);
129 ki.close();
130
131 assertEquals("totalResults", 2, kr.totalResults());
132 // Match #0
Eliza Margarethaf7bbb262014-01-14 17:17:29 +0000133 assertEquals("doc-number", 0, kr.match(0).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +0000134 assertEquals("StartPos", 3, kr.match(0).startPos);
135 assertEquals("EndPos", 5, kr.match(0).endPos);
136 // Match #1
Eliza Margaretha76592d72014-01-16 16:04:23 +0000137 assertEquals("doc-number", 0, kr.match(1).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +0000138 assertEquals("StartPos", 1, kr.match(1).startPos);
139 assertEquals("EndPos", 3, kr.match(1).endPos);
140 }
141
Eliza Margaretha76592d72014-01-16 16:04:23 +0000142 /** Matching SpanElementQueries */
Eliza Margarethac1960f62014-01-14 12:35:53 +0000143 @Test
144 public void testCase5() throws IOException{
Eliza Margaretha76592d72014-01-16 16:04:23 +0000145// log.trace("Testcase5");
Eliza Margarethac1960f62014-01-14 12:35:53 +0000146 sq = new SpanSegmentQuery(
147 new SpanElementQuery("base","e"),
148 new SpanElementQuery("base","e2")
149 );
150
151 kr = ki.search(sq, (short) 10);
Eliza Margarethabefc23f2014-01-20 14:34:15 +0000152 ki.close();
Eliza Margarethac1960f62014-01-14 12:35:53 +0000153
154 assertEquals("totalResults", 1, kr.totalResults());
155 // Match #0
Eliza Margarethaf7bbb262014-01-14 17:17:29 +0000156 assertEquals("doc-number", 0, kr.match(0).getLocalDocID());
Eliza Margarethac1960f62014-01-14 12:35:53 +0000157 assertEquals("StartPos", 3, kr.match(0).startPos);
158 assertEquals("EndPos", 5, kr.match(0).endPos);
159 }
Eliza Margarethabefc23f2014-01-20 14:34:15 +0000160
161 /** Skip to SegmentSpan */
162 @Test
163 public void testcase6() throws IOException{
164 ki.addDoc(createFieldDoc4());
165 ki.commit();
166 sq = new SpanNextQuery(
167 new SpanSegmentQuery(
168 new SpanTermQuery(new Term("base","s:b")),
169 new SpanTermQuery(new Term("base","s:c"))
170 ),
171 new SpanTermQuery(new Term("base","s:d"))
172 );
173
174 kr = ki.search(sq, (short) 10);
175 ki.close();
176
177 assertEquals("totalResults", 2, kr.totalResults());
178 // Match #0
179 assertEquals("doc-number", 0, kr.match(0).getLocalDocID());
180 assertEquals("StartPos (0)", 4, kr.match(0).startPos);
181 assertEquals("EndPos (0)", 6, kr.match(0).endPos);
182 // Match #1 in the other atomic index
183 assertEquals("doc-number", 0, kr.match(1).getLocalDocID());
184 assertEquals("StartPos (0)", 0, kr.match(1).startPos);
185 assertEquals("EndPos (0)", 2, kr.match(1).endPos);
186 }
Nils Diewaldcc7c0b32014-07-31 19:58:22 +0000187
188
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000189 private FieldDocument createFieldDoc0(){
Eliza Margarethac1960f62014-01-14 12:35:53 +0000190 fd = new FieldDocument();
191 fd.addString("ID", "doc-0");
192 fd.addTV("base",
193 "bcbabd",
194 "[(0-1)s:b|i:b|_1#0-1]" +
195 "[(1-2)s:c|i:c|s:b|_2#1-2]" +
196 "[(2-3)s:b|i:b|_3#2-3|<>:e#2-4$<i>4]" +
197 "[(3-4)s:a|i:a|_4#3-4|<>:e#3-5$<i>5|<>:e2#3-5$<i>5]" +
198 "[(4-5)s:b|i:b|s:c|_5#4-5]" +
199 "[(5-6)s:d|i:d|_6#5-6|<>:e2#5-6$<i>6]");
200 return fd;
201 }
202
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000203 private FieldDocument createFieldDoc1(){
Eliza Margarethac1960f62014-01-14 12:35:53 +0000204 fd = new FieldDocument();
205 fd.addString("ID", "doc-1");
206 fd.addTV("base",
207 "babaa",
Nils Diewaldcc7c0b32014-07-31 19:58:22 +0000208 "[(0-1)s:b|i:b|s:c|_1#0-1]" +
Eliza Margarethac1960f62014-01-14 12:35:53 +0000209 "[(1-2)s:a|i:a|s:b|_2#1-2|<>:e#1-3$<i>3]" +
210 "[(2-3)s:b|i:b|s:a|_3#2-3]" +
211 "[(3-4)s:a|i:a|_4#3-4]" +
212 "[(4-5)s:a|i:a|_5#4-5]");
213 return fd;
214 }
215
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000216 private FieldDocument createFieldDoc2(){
Eliza Margarethac1960f62014-01-14 12:35:53 +0000217 fd = new FieldDocument();
218 fd.addString("ID", "doc-2");
219 fd.addTV("base",
220 "bdb",
221 "[(0-1)s:b|i:b|_1#0-1]" +
222 "[(1-2)s:d|i:d|s:b|_2#1-2]"+
223 "[(2-3)s:b|i:b|s:a|_3#2-3]");
224 return fd;
225 }
Eliza Margarethabefc23f2014-01-20 14:34:15 +0000226
227 private FieldDocument createFieldDoc4(){
228 fd = new FieldDocument();
Nils Diewaldcc7c0b32014-07-31 19:58:22 +0000229 fd.addString("ID", "doc-4");
Eliza Margarethabefc23f2014-01-20 14:34:15 +0000230 fd.addTV("base",
231 "bdb",
232 "[(0-1)s:b|i:b|s:c|_1#0-1]" +
233 "[(1-2)s:d|_2#1-2]"+
234 "[(2-3)s:d|i:d|_3#2-3]");
235 return fd;
236 }
Eliza Margarethac1960f62014-01-14 12:35:53 +0000237}