blob: da6851ef69f5513b8a3c368410cd3f008a1d6146 [file] [log] [blame]
Eliza Margarethac1960f62014-01-14 12:35:53 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanQuery;
9import org.apache.lucene.search.spans.SpanTermQuery;
10import org.junit.Test;
11import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
Eliza Margaretha76592d72014-01-16 16:04:23 +000013import org.slf4j.Logger;
14import org.slf4j.LoggerFactory;
Eliza Margarethac1960f62014-01-14 12:35:53 +000015
Nils Diewalda14ecd62015-02-26 21:00:20 +000016import de.ids_mannheim.korap.KrillIndex;
Nils Diewald884dbcf2015-02-27 17:02:28 +000017import de.ids_mannheim.korap.response.Result;
Eliza Margarethac1960f62014-01-14 12:35:53 +000018import de.ids_mannheim.korap.query.SpanElementQuery;
19import de.ids_mannheim.korap.query.SpanNextQuery;
20import de.ids_mannheim.korap.query.SpanSegmentQuery;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000021import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
22import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
Eliza Margarethac1960f62014-01-14 12:35:53 +000023
24
25@RunWith(JUnit4.class)
26public class TestSegmentIndex {
Nils Diewaldbb33da22015-03-04 16:24:25 +000027 private SpanQuery sq;
28 private KrillIndex ki;
29 private Result kr;
30 private FieldDocument fd;
31 private Logger log;
Eliza Margarethac1960f62014-01-14 12:35:53 +000032
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000033
Nils Diewaldbb33da22015-03-04 16:24:25 +000034 public TestSegmentIndex () throws IOException {
35 ki = new KrillIndex();
36 ki.addDoc(createFieldDoc0());
37 ki.addDoc(createFieldDoc1());
38 ki.addDoc(createFieldDoc2());
39 ki.commit();
40
41 log = LoggerFactory.getLogger(getClass());
42 }
43
44
45 /** Multiple matches in one document. */
46 @Test
47 public void testCase1 () throws IOException {
48 sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:b")),
49 new SpanTermQuery(new Term("base", "s:c")));
50
51 kr = ki.search(sq, (short) 10);
52 ki.close();
53
54 assertEquals("totalResults", kr.getTotalResults(), 3);
55 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
56 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
57 assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
58 assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
59 }
60
61
62 /**
63 * Matches in multiple documents.
64 * Ensure the same document. The current secondspan is skipped to
65 * the doc number of the firstspan.
66 */
67 @Test
68 public void testCase2 () throws IOException {
69 // log.trace("Testcase2");
70 sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:a")),
71 new SpanTermQuery(new Term("base", "s:b")));
72
73 kr = ki.search(sq, (short) 10);
74 ki.close();
75
76 assertEquals("totalResults", kr.getTotalResults(), 3);
77 // Match #0
78 assertEquals("doc-number", 1, kr.getMatch(0).getLocalDocID());
79 assertEquals("StartPos", 1, kr.getMatch(0).startPos);
80 assertEquals("EndPos", 2, kr.getMatch(0).endPos);
81 // Match #2
82 assertEquals("doc-number", 2, kr.getMatch(2).getLocalDocID());
83 assertEquals("StartPos", 2, kr.getMatch(2).startPos);
84 assertEquals("EndPos", 3, kr.getMatch(2).endPos);
85 }
86
87
88 /** Ensure the same document, skip to a greater doc number */
89 @Test
90 public void testCase3 () throws IOException {
91 // log.trace("Testcase3");
92 sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:d")),
93 new SpanTermQuery(new Term("base", "s:b")));
94
95 kr = ki.search(sq, (short) 10);
96 ki.close();
97
98 assertEquals("totalResults", kr.getTotalResults(), 1);
99 assertEquals("doc-number", 2, kr.getMatch(0).getLocalDocID());
100 assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
101 assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
102 }
103
104
105 /**
106 * Matching a SpanElementQuery and a SpanNextQuery
107 * Multiple atomic indices
108 * */
109 @Test
110 public void testCase4 () throws IOException {
111 // log.trace("Testcase4");
112
113 ki = new KrillIndex();
114 ki.addDoc(createFieldDoc0());
115 ki.commit();
116 ki.addDoc(createFieldDoc1());
117 ki.addDoc(createFieldDoc2());
118 ki.commit();
119
120 sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"),
121 new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")),
122 new SpanTermQuery(new Term("base", "s:b"))));
123
124 kr = ki.search(sq, (short) 10);
125 ki.close();
126
127 assertEquals("totalResults", kr.getTotalResults(), 2);
128 // Match #0
129 assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
130 assertEquals("StartPos", 3, kr.getMatch(0).startPos);
131 assertEquals("EndPos", 5, kr.getMatch(0).endPos);
132 // Match #1
133 assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID());
134 assertEquals("StartPos", 1, kr.getMatch(1).startPos);
135 assertEquals("EndPos", 3, kr.getMatch(1).endPos);
136 }
137
138
139 /** Matching SpanElementQueries */
140 @Test
141 public void testCase5 () throws IOException {
142 // log.trace("Testcase5");
143 sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"),
144 new SpanElementQuery("base", "e2"));
145
146 kr = ki.search(sq, (short) 10);
147 ki.close();
148
149 assertEquals("totalResults", kr.getTotalResults(), 1);
150 // Match #0
151 assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
152 assertEquals("StartPos", 3, kr.getMatch(0).startPos);
153 assertEquals("EndPos", 5, kr.getMatch(0).endPos);
154 }
155
156
157 /** Skip to SegmentSpan */
158 @Test
159 public void testcase6 () throws IOException {
160 ki.addDoc(createFieldDoc4());
161 ki.commit();
162 sq = new SpanNextQuery(new SpanSegmentQuery(new SpanTermQuery(new Term(
163 "base", "s:b")), new SpanTermQuery(new Term("base", "s:c"))),
164 new SpanTermQuery(new Term("base", "s:d")));
165
166 kr = ki.search(sq, (short) 10);
167 ki.close();
168
169 assertEquals("totalResults", kr.getTotalResults(), 2);
170 // Match #0
171 assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
172 assertEquals("StartPos (0)", 4, kr.getMatch(0).startPos);
173 assertEquals("EndPos (0)", 6, kr.getMatch(0).endPos);
174 // Match #1 in the other atomic index
175 assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID());
176 assertEquals("StartPos (0)", 0, kr.getMatch(1).startPos);
177 assertEquals("EndPos (0)", 2, kr.getMatch(1).endPos);
178 }
179
180
181 private FieldDocument createFieldDoc0 () {
182 fd = new FieldDocument();
183 fd.addString("ID", "doc-0");
184 fd.addTV("base", "bcbabd", "[(0-1)s:b|i:b|_1#0-1]"
185 + "[(1-2)s:c|i:c|s:b|_2#1-2]"
186 + "[(2-3)s:b|i:b|_3#2-3|<>:e#2-4$<i>4]"
187 + "[(3-4)s:a|i:a|_4#3-4|<>:e#3-5$<i>5|<>:e2#3-5$<i>5]"
188 + "[(4-5)s:b|i:b|s:c|_5#4-5]"
189 + "[(5-6)s:d|i:d|_6#5-6|<>:e2#5-6$<i>6]");
190 return fd;
191 }
192
193
194 private FieldDocument createFieldDoc1 () {
195 fd = new FieldDocument();
196 fd.addString("ID", "doc-1");
197 fd.addTV("base", "babaa", "[(0-1)s:b|i:b|s:c|_1#0-1]"
198 + "[(1-2)s:a|i:a|s:b|_2#1-2|<>:e#1-3$<i>3]"
199 + "[(2-3)s:b|i:b|s:a|_3#2-3]" + "[(3-4)s:a|i:a|_4#3-4]"
200 + "[(4-5)s:a|i:a|_5#4-5]");
201 return fd;
202 }
203
204
205 private FieldDocument createFieldDoc2 () {
206 fd = new FieldDocument();
207 fd.addString("ID", "doc-2");
208 fd.addTV("base", "bdb", "[(0-1)s:b|i:b|_1#0-1]"
209 + "[(1-2)s:d|i:d|s:b|_2#1-2]" + "[(2-3)s:b|i:b|s:a|_3#2-3]");
210 return fd;
211 }
212
213
214 private FieldDocument createFieldDoc4 () {
215 fd = new FieldDocument();
216 fd.addString("ID", "doc-4");
217 fd.addTV("base", "bdb", "[(0-1)s:b|i:b|s:c|_1#0-1]"
218 + "[(1-2)s:d|_2#1-2]" + "[(2-3)s:d|i:d|_3#2-3]");
219 return fd;
220 }
Eliza Margarethac1960f62014-01-14 12:35:53 +0000221}