blob: 340f460f5ca46b6ef52835435de91b015a2c764a [file] [log] [blame]
Eliza Margarethafb25cef2014-06-06 14:19:07 +00001package de.ids_mannheim.korap.index;
2
margaretha05a4bc12022-02-11 10:55:43 +01003import static de.ids_mannheim.korap.TestSimple.getJsonQuery;
Eliza Margarethafb25cef2014-06-06 14:19:07 +00004import static org.junit.Assert.assertEquals;
5
6import java.io.IOException;
Eliza Margaretha997ccde2014-07-04 09:20:35 +00007import java.util.ArrayList;
8import java.util.List;
Eliza Margarethafb25cef2014-06-06 14:19:07 +00009
10import org.apache.lucene.index.Term;
11import org.apache.lucene.search.spans.SpanQuery;
12import org.apache.lucene.search.spans.SpanTermQuery;
13import org.junit.Test;
14
Akronfa8b2282020-06-18 07:41:46 +020015import de.ids_mannheim.korap.Krill;
Nils Diewalda14ecd62015-02-26 21:00:20 +000016import de.ids_mannheim.korap.KrillIndex;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000017import de.ids_mannheim.korap.query.SpanAttributeQuery;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000018import de.ids_mannheim.korap.query.SpanElementQuery;
Eliza Margaretha997ccde2014-07-04 09:20:35 +000019import de.ids_mannheim.korap.query.SpanNextQuery;
Eliza Margaretha98c200e2014-10-15 13:59:58 +000020import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
Akronfa8b2282020-06-18 07:41:46 +020021import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
Eliza Margaretha2db5e232015-03-04 10:20:01 +000022import de.ids_mannheim.korap.response.Result;
Akronfa8b2282020-06-18 07:41:46 +020023import de.ids_mannheim.korap.util.QueryException;
24
Eliza Margarethafb25cef2014-06-06 14:19:07 +000025public class TestAttributeIndex {
Eliza Margarethafb25cef2014-06-06 14:19:07 +000026
Nils Diewalda14ecd62015-02-26 21:00:20 +000027 private KrillIndex ki = new KrillIndex();
Nils Diewald884dbcf2015-02-27 17:02:28 +000028 private Result kr;
Eliza Margarethaafe98122015-01-23 17:37:57 +000029 private FieldDocument fd;
30
Nils Diewaldbb33da22015-03-04 16:24:25 +000031
32 public TestAttributeIndex () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +000033 ki = new KrillIndex();
Eliza Margarethaafe98122015-01-23 17:37:57 +000034 }
35
Nils Diewaldbb33da22015-03-04 16:24:25 +000036
37 private FieldDocument createFieldDoc0 () {
Eliza Margarethaafe98122015-01-23 17:37:57 +000038 fd = new FieldDocument();
39 fd.addString("ID", "doc-0");
Eliza Margaretha6f989202016-10-14 21:48:29 +020040 fd.addTV("base", "bcbabd", "[(0-1)s:a|_1$<i>0<i>1|"
41 + "<>:s$<b>64<i>0<i>5<i>5<b>0<s>3|"
42 + "<>:div$<b>64<i>0<i>2<i>2<b>0<s>2|"
43 + "<>:div$<b>64<i>0<i>3<i>3<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +020044 + "@:class=header$<b>17<s>1<i>3|@:class=header$<b>17<s>2<i>2]"
margaretha69726b12015-12-10 12:03:19 +010045
Eliza Margaretha6f989202016-10-14 21:48:29 +020046 + "[(1-2)s:e|_2$<i>1<i>2|"
Akronfa8b2282020-06-18 07:41:46 +020047 + "<>:a$<b>64<i>1<i>2<i>2<b>0<s>1|@:class=header$<b>17<s>1<i>2]"
Akron42993552016-02-04 13:24:24 +010048
Eliza Margaretha6f989202016-10-14 21:48:29 +020049 + "[(2-3)s:e|_3$<i>2<i>3|"
Akronfa8b2282020-06-18 07:41:46 +020050 + "<>:div$<b>64<i>2<i>5<i>5<b>0<s>1|@:class=time$<b>17<s>1<i>5]"
Akron42993552016-02-04 13:24:24 +010051
Eliza Margaretha6f989202016-10-14 21:48:29 +020052 + "[(3-4)s:a|_4$<i>3<i>4|"
Akronfa8b2282020-06-18 07:41:46 +020053 + "<>:div$<b>64<i>3<i>5<i>5<b>0<s>1|@:class=header$<b>17<s>1<i>5]"
Akron42993552016-02-04 13:24:24 +010054
Eliza Margaretha6f989202016-10-14 21:48:29 +020055 + "[(4-5)s:b|_5$<i>4<i>5|" + "<>:a$<b>64<i>4<i>5<i>5<b>0<s>2|"
56 + "<>:div$<b>64<i>4<i>5<i>5<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +020057 + "@:class=header$<b>17<s>2<i>5]"
Akron42993552016-02-04 13:24:24 +010058
Eliza Margaretha6f989202016-10-14 21:48:29 +020059 + "[(5-6)s:d|_6$<i>5<i>6|" + "<>:div$<b>64<i>5<i>6<i>6<b>0<s>2|"
60 + "<>:s$<b>64<i>5<i>6<i>6<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +020061 + "@:class=header$<b>17<s>1<i>6]"
Akron42993552016-02-04 13:24:24 +010062
Eliza Margaretha6f989202016-10-14 21:48:29 +020063 + "[(6-7)s:d|_7$<i>6<i>7|" + "<>:s$<b>64<i>6<i>7<i>7<b>0<s>2|"
64 + "<>:div$<b>64<i>6<i>7<i>7<b>0<s>1"
Akronfa8b2282020-06-18 07:41:46 +020065 + "|@:class=header$<b>17<s>1<i>7|@:class=header$<b>17<s>2<i>7]");
Eliza Margarethaafe98122015-01-23 17:37:57 +000066
67 return fd;
68 }
69
Nils Diewaldbb33da22015-03-04 16:24:25 +000070
71 private FieldDocument createFieldDoc1 () {
Eliza Margarethaafe98122015-01-23 17:37:57 +000072 fd = new FieldDocument();
73 fd.addString("ID", "doc-1");
Eliza Margaretha6f989202016-10-14 21:48:29 +020074 fd.addTV("base", "bcbabd", "[(0-1)s:b|_1$<i>0<i>1|"
75 + "<>:s<b>64<i>0<i>5<i>5<b>0<s>2|"
76 + "<>:div$<b>64<i>0<i>3<i>3<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +020077 + "@:class=header$<b>17<s>1<i>3|@:class=title$<b>17<s>1<i>3|@:class=book$<b>17<s>1<i>3]"
margaretha10da63e2015-12-18 15:42:52 +010078
Eliza Margaretha6f989202016-10-14 21:48:29 +020079 + "[(1-2)s:c|_2$<i>1<i>2|" + "<>:div$<b>64<i>1<i>2<i>2<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +020080 + "@:class=header$<b>17<s>1<i>2|@:class=title$<b>17<s>1<i>2]"
margaretha10da63e2015-12-18 15:42:52 +010081
Eliza Margaretha6f989202016-10-14 21:48:29 +020082 + "[(2-3)s:b|_3$<i>2<i>3|"
Akronfa8b2282020-06-18 07:41:46 +020083 + "<>:div$<b>64<i>2<i>5<i>5<b>0<s>1|@:class=book$<b>17<s>1<i>5]"
margaretha10da63e2015-12-18 15:42:52 +010084
Eliza Margaretha6f989202016-10-14 21:48:29 +020085 + "[(3-4)s:a|_4$<i>3<i>4|"
Akronfa8b2282020-06-18 07:41:46 +020086 + "<>:div$<b>64<i>3<i>5<i>5<b>0<s>1|@:class=title$<b>17<s>1<i>5]"
margaretha10da63e2015-12-18 15:42:52 +010087
Eliza Margaretha6f989202016-10-14 21:48:29 +020088 + "[(4-5)s:b|_5$<i>4<i>5|" + "<>:div$<b>64<i>4<i>5<i>5<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +020089 + "@:class=header$<b>17<s>1<i>5|@:class=book$<b>17<s>1<i>5|@:class=title$<b>17<s>1<i>5]"
margaretha10da63e2015-12-18 15:42:52 +010090
Eliza Margaretha6f989202016-10-14 21:48:29 +020091 + "[(5-6)s:d|_6$<i>5<i>6|" + "<>:s$<b>64<i>5<i>6<i>6<b>0<s>2|"
Akronfa8b2282020-06-18 07:41:46 +020092 + "<>:div$<b>64<i>5<i>6<i>6<b>0<s>1|@:class=header$<b>17<s>1<i>6]"
margaretha10da63e2015-12-18 15:42:52 +010093
Eliza Margaretha6f989202016-10-14 21:48:29 +020094 + "[(6-7)s:d|_7$<i>6<i>7|" + "<>:s$<b>64<i>6<i>7<i>7<b>0<s>2|"
95 + "<>:div$<b>64<i>6<i>7<i>7<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +020096 + "@:class=header$<b>17<s>1<i>7|@:class=title$<b>17<s>1<i>7]");
Eliza Margarethaafe98122015-01-23 17:37:57 +000097
98 return fd;
99 }
100
Nils Diewaldbb33da22015-03-04 16:24:25 +0000101
102 private FieldDocument createFieldDoc2 () {
Eliza Margarethaafe98122015-01-23 17:37:57 +0000103 fd = new FieldDocument();
104 fd.addString("ID", "doc-1");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200105 fd.addTV("base", "bcbabd", "[(0-1)s:b|_1$<i>0<i>1|"
106 + "<>:div$<b>64<i>0<i>3<i>3<b>0<s>2|"
107 + "<>:s$<b>64<i>0<i>5<i>5<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +0200108 + "@:class=header$<b>17<s>2<i>3|@:class=book$<b>17<s>1<i>5|@:class=book$<b>17<s>2<i>3]"
margaretha10da63e2015-12-18 15:42:52 +0100109
Eliza Margaretha6f989202016-10-14 21:48:29 +0200110 + "[(1-2)s:e|_2$<i>1<i>2|" + "<>:a$<b>64<i>1<i>2<i>2<b>0<s>2|"
111 + "<>:div$<b>64<i>1<i>2<i>2<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +0200112 + "@:class=book$<b>17<s>2<i>2|@:class=header$<b>17<s>1<i>2]"
margaretha10da63e2015-12-18 15:42:52 +0100113
Eliza Margaretha6f989202016-10-14 21:48:29 +0200114 + "[(2-3)s:b|_3$<i>2<i>3|" + "<>:a$<b>64<i>1<i>2<i>2<b>0<s>2|"
115 + "<>:div$<b>64<i>2<i>3<i>5<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +0200116 + "@:class=header$<b>17<s>2<i>2|@:class=book$<b>17<s>1<i>5]"
margaretha10da63e2015-12-18 15:42:52 +0100117
Eliza Margaretha6f989202016-10-14 21:48:29 +0200118 + "[(3-4)s:a|_4$<i>3<i>4|"
Akronfa8b2282020-06-18 07:41:46 +0200119 + "<>:div$<b>64<i>3<i>5<i>5<b>0<s>1|@:class=title$<b>17<s>1<i>5]"
margaretha10da63e2015-12-18 15:42:52 +0100120
Eliza Margaretha6f989202016-10-14 21:48:29 +0200121 + "[(4-5)s:b|_5$<i>4<i>5|"
Akronfa8b2282020-06-18 07:41:46 +0200122 + "<>:div$<b>64<i>4<i>5<i>5<b>0<s>1|@:class=header$<b>17<s>1<i>5|@:class=book$<b>17<s>1<i>5]"
margaretha10da63e2015-12-18 15:42:52 +0100123
Eliza Margaretha6f989202016-10-14 21:48:29 +0200124 + "[(5-6)s:d|_6$<i>5<i>6|" + "<>:s$<b>64<i>5<i>6<i>6<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +0200125 + "<>:div$<b>64<i>5<i>6<i>6<b>0<s>1|@:class=header$<b>17<s>1<i>6]"
margaretha10da63e2015-12-18 15:42:52 +0100126
Eliza Margaretha6f989202016-10-14 21:48:29 +0200127 + "[(6-7)s:d|_7$<i>6<i>7|" + "<>:s$<b>64<i>6<i>7<i>7<b>0<s>2|"
128 + "<>:div$<b>64<i>6<i>7<i>7<b>0<s>1|"
Akronfa8b2282020-06-18 07:41:46 +0200129 + "@:class=header$<b>17<s>1<i>7|@:class=book$<b>17<s>2<i>7]");
Eliza Margarethaafe98122015-01-23 17:37:57 +0000130
131 return fd;
132 }
133
Nils Diewaldbb33da22015-03-04 16:24:25 +0000134
Eliza Margarethaafe98122015-01-23 17:37:57 +0000135 /**
136 * Test matching elementRef
137 *
138 * @throws IOException
Eliza Margaretha6f989202016-10-14 21:48:29 +0200139 */
Eliza Margarethaafe98122015-01-23 17:37:57 +0000140 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141 public void testCase1 () throws IOException {
Eliza Margarethaafe98122015-01-23 17:37:57 +0000142 ki.addDoc(createFieldDoc0());
143 ki.commit();
144
Eliza Margaretha6f989202016-10-14 21:48:29 +0200145 SpanAttributeQuery saq = new SpanAttributeQuery(
146 new SpanTermQuery(new Term("base", "@:class=header")), true);
Eliza Margarethaafe98122015-01-23 17:37:57 +0000147
Akron42993552016-02-04 13:24:24 +0100148 SpanElementQuery seq = new SpanElementQuery("base", "div");
margaretha69726b12015-12-10 12:03:19 +0100149
Akron42993552016-02-04 13:24:24 +0100150 // div with @class=header
151 SpanQuery sq = new SpanWithAttributeQuery(seq, saq, true);
Eliza Margarethaafe98122015-01-23 17:37:57 +0000152
153 kr = ki.search(sq, (short) 10);
154
Akron42993552016-02-04 13:24:24 +0100155 // for (int i = 0; i < kr.getTotalResults(); i++) {
156 // System.out.println(kr.getMatch(i).getLocalDocID() + " "
157 // + kr.getMatch(i).startPos + " " + kr.getMatch(i).endPos);
158 // }
159 //
160 assertEquals((long) 4, kr.getTotalResults());
161 assertEquals(0, kr.getMatch(0).getStartPos());
162 assertEquals(2, kr.getMatch(0).getEndPos());
163 assertEquals(0, kr.getMatch(1).getStartPos());
164 assertEquals(3, kr.getMatch(1).getEndPos());
165 assertEquals(3, kr.getMatch(2).getStartPos());
166 assertEquals(5, kr.getMatch(2).getEndPos());
167 assertEquals(6, kr.getMatch(3).getStartPos());
168 assertEquals(7, kr.getMatch(3).getEndPos());
Eliza Margarethaafe98122015-01-23 17:37:57 +0000169 }
170
Nils Diewaldbb33da22015-03-04 16:24:25 +0000171
Eliza Margarethaafe98122015-01-23 17:37:57 +0000172 /**
173 * Test multiple attributes and negation
174 *
175 * @throws IOException
Eliza Margaretha6f989202016-10-14 21:48:29 +0200176 */
Eliza Margarethaafe98122015-01-23 17:37:57 +0000177 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000178 public void testCase2 () throws IOException {
Eliza Margarethaafe98122015-01-23 17:37:57 +0000179 ki.addDoc(createFieldDoc1());
180 ki.commit();
181 // header and title
182 List<SpanQuery> sql = new ArrayList<>();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200183 sql.add(new SpanAttributeQuery(
184 new SpanTermQuery(new Term("base", "@:class=header")), true));
185 sql.add(new SpanAttributeQuery(
186 new SpanTermQuery(new Term("base", "@:class=title")), true));
Eliza Margarethaafe98122015-01-23 17:37:57 +0000187
Eliza Margaretha6f989202016-10-14 21:48:29 +0200188 SpanQuery sq = new SpanWithAttributeQuery(
189 new SpanElementQuery("base", "div"), sql, true);
Eliza Margarethaafe98122015-01-23 17:37:57 +0000190
191 kr = ki.search(sq, (short) 10);
192
193 assertEquals((long) 4, kr.getTotalResults());
194 assertEquals(0, kr.getMatch(0).getStartPos());
195 assertEquals(3, kr.getMatch(0).getEndPos());
196 assertEquals(1, kr.getMatch(1).getStartPos());
197 assertEquals(2, kr.getMatch(1).getEndPos());
198 assertEquals(4, kr.getMatch(2).getStartPos());
199 assertEquals(5, kr.getMatch(2).getEndPos());
200 assertEquals(6, kr.getMatch(3).getStartPos());
201 assertEquals(7, kr.getMatch(3).getEndPos());
202
203 // Add not Attribute
204 // header and title, not book
Eliza Margaretha6f989202016-10-14 21:48:29 +0200205 sql.add(new SpanAttributeQuery(
206 new SpanTermQuery(new Term("base", "@:class=book")), true,
207 true));
Eliza Margarethaafe98122015-01-23 17:37:57 +0000208
209 sq = new SpanWithAttributeQuery(new SpanElementQuery("base", "div"),
210 sql, true);
211
212 kr = ki.search(sq, (short) 10);
213
214 assertEquals((long) 2, kr.getTotalResults());
215 assertEquals(1, kr.getMatch(0).getStartPos());
216 assertEquals(2, kr.getMatch(0).getEndPos());
217 assertEquals(6, kr.getMatch(1).getStartPos());
218 assertEquals(7, kr.getMatch(1).getEndPos());
219
220 // Test multiple negations
221 // header, not title, not book
222 sql.remove(1);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200223 sql.add(new SpanAttributeQuery(
224 new SpanTermQuery(new Term("base", "@:class=title")), true,
225 true));
Eliza Margarethaafe98122015-01-23 17:37:57 +0000226
227 sq = new SpanWithAttributeQuery(new SpanElementQuery("base", "div"),
228 sql, true);
229
Akronfa8b2282020-06-18 07:41:46 +0200230 assertEquals("spanElementWithAttribute(<base:div />, "+
231 "[spanAttribute(base:@:class=header), "+
232 "spanAttribute(!base:@:class=book), "+
233 "spanAttribute(!base:@:class=title)])", sq.toString());
234
Eliza Margarethaafe98122015-01-23 17:37:57 +0000235 kr = ki.search(sq, (short) 10);
236 assertEquals((long) 1, kr.getTotalResults());
237 assertEquals(5, kr.getMatch(0).getStartPos());
238 assertEquals(6, kr.getMatch(0).getEndPos());
239 }
240
Eliza Margaretha05bff462015-02-18 18:18:26 +0000241
Nils Diewaldbb33da22015-03-04 16:24:25 +0000242 /**
243 * Element with only not attributes
244 *
245 * @throws IOException
Eliza Margaretha6f989202016-10-14 21:48:29 +0200246 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000247 @Test
248 public void testcase9 () throws IOException {
Eliza Margaretha05bff462015-02-18 18:18:26 +0000249
Nils Diewaldbb33da22015-03-04 16:24:25 +0000250 ki.addDoc(createFieldDoc2());
251 ki.commit();
Eliza Margaretha05bff462015-02-18 18:18:26 +0000252
Eliza Margaretha6f989202016-10-14 21:48:29 +0200253 SpanAttributeQuery saq = new SpanAttributeQuery(
254 new SpanTermQuery(new Term("base", "@:class=book")), true,
255 true);
256 SpanQuery sq = new SpanWithAttributeQuery(
257 new SpanElementQuery("base", "div"), saq, true);
Eliza Margaretha05bff462015-02-18 18:18:26 +0000258
Nils Diewaldbb33da22015-03-04 16:24:25 +0000259 kr = ki.search(sq, (short) 10);
260 assertEquals(4, kr.getTotalResults());
261 assertEquals(1, kr.getMatch(0).getStartPos());
262 assertEquals(2, kr.getMatch(0).getEndPos());
263 assertEquals(3, kr.getMatch(1).getStartPos());
264 assertEquals(5, kr.getMatch(1).getEndPos());
265 assertEquals(5, kr.getMatch(2).getStartPos());
266 assertEquals(6, kr.getMatch(2).getEndPos());
267 assertEquals(6, kr.getMatch(3).getStartPos());
268 assertEquals(7, kr.getMatch(3).getEndPos());
Eliza Margaretha05bff462015-02-18 18:18:26 +0000269
Nils Diewaldbb33da22015-03-04 16:24:25 +0000270 List<SpanQuery> sql = new ArrayList<>();
271 sql.add(saq);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200272 sql.add(new SpanAttributeQuery(
273 new SpanTermQuery(new Term("base", "@:class=header")), true,
274 true));
Nils Diewaldbb33da22015-03-04 16:24:25 +0000275 sq = new SpanWithAttributeQuery(new SpanElementQuery("base", "div"),
276 sql, true);
Eliza Margaretha05bff462015-02-18 18:18:26 +0000277
Nils Diewaldbb33da22015-03-04 16:24:25 +0000278 kr = ki.search(sq, (short) 10);
279 assertEquals(1, kr.getTotalResults());
280 assertEquals(3, kr.getMatch(0).getStartPos());
281 assertEquals(5, kr.getMatch(0).getEndPos());
282
margaretha69726b12015-12-10 12:03:19 +0100283
Nils Diewaldbb33da22015-03-04 16:24:25 +0000284 }
285
Eliza Margaretha05bff462015-02-18 18:18:26 +0000286
Eliza Margarethaafe98122015-01-23 17:37:57 +0000287 /**
288 * same attribute types referring to different element types
Eliza Margaretha6f989202016-10-14 21:48:29 +0200289 */
Eliza Margarethaafe98122015-01-23 17:37:57 +0000290 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000291 public void testCase3 () throws IOException {
Eliza Margarethaafe98122015-01-23 17:37:57 +0000292 ki.addDoc(createFieldDoc2());
293 ki.commit();
294
295 List<SpanQuery> sql = new ArrayList<>();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200296 sql.add(new SpanAttributeQuery(
297 new SpanTermQuery(new Term("base", "@:class=header")), true));
298 sql.add(new SpanAttributeQuery(
299 new SpanTermQuery(new Term("base", "@:class=book")), true,
300 true));
301 SpanQuery sq = new SpanWithAttributeQuery(
302 new SpanElementQuery("base", "div"), sql, true);
Eliza Margarethaafe98122015-01-23 17:37:57 +0000303
304 kr = ki.search(sq, (short) 10);
305
306 assertEquals((long) 3, kr.getTotalResults());
307 assertEquals(1, kr.getMatch(0).getStartPos());
308 assertEquals(2, kr.getMatch(0).getEndPos());
309 assertEquals(5, kr.getMatch(1).getStartPos());
310 assertEquals(6, kr.getMatch(1).getEndPos());
311 assertEquals(6, kr.getMatch(2).getStartPos());
312 assertEquals(7, kr.getMatch(2).getEndPos());
313 }
314
Nils Diewaldbb33da22015-03-04 16:24:25 +0000315
Eliza Margarethaafe98122015-01-23 17:37:57 +0000316 /** Test skipto doc for spanWithAttribute */
317 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000318 public void testCase4 () throws IOException {
Eliza Margarethaafe98122015-01-23 17:37:57 +0000319 ki.addDoc(createFieldDoc1());
320 ki.addDoc(createFieldDoc0());
321 ki.addDoc(createFieldDoc2());
322 ki.commit();
323
Eliza Margaretha6f989202016-10-14 21:48:29 +0200324 SpanAttributeQuery saq = new SpanAttributeQuery(
325 new SpanTermQuery(new Term("base", "@:class=book")), true);
Eliza Margarethaafe98122015-01-23 17:37:57 +0000326
327 List<SpanQuery> sql = new ArrayList<>();
328 sql.add(saq);
329
330 SpanWithAttributeQuery sq = new SpanWithAttributeQuery(
331 new SpanElementQuery("base", "div"), sql, true);
332
333 kr = ki.search(sq, (short) 10);
margaretha14f918d2015-12-11 11:48:07 +0100334
335 // for (int i = 0; i < kr.getTotalResults(); i++) {
336 // System.out.println(kr.getMatch(i).getLocalDocID() + " "
337 // + kr.getMatch(i).startPos + " " + kr.getMatch(i).endPos);
338 // }
339
Akron42993552016-02-04 13:24:24 +0100340 assertEquals((long) 6, kr.getTotalResults());
Eliza Margarethaafe98122015-01-23 17:37:57 +0000341
Eliza Margaretha6f989202016-10-14 21:48:29 +0200342 SpanNextQuery snq = new SpanNextQuery(
343 new SpanTermQuery(new Term("base", "s:e")), sq);
Eliza Margarethaafe98122015-01-23 17:37:57 +0000344
345 kr = ki.search(snq, (short) 10);
346
347 assertEquals((long) 1, kr.getTotalResults());
348 assertEquals(2, kr.getMatch(0).getLocalDocID());
349 assertEquals(1, kr.getMatch(0).getStartPos());
350 assertEquals(5, kr.getMatch(0).getEndPos());
351 }
352
Nils Diewaldbb33da22015-03-04 16:24:25 +0000353
Eliza Margarethaafe98122015-01-23 17:37:57 +0000354 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000355 * Arbitrary elements with a specific attribute.
Eliza Margaretha6f989202016-10-14 21:48:29 +0200356 */
Eliza Margarethaafe98122015-01-23 17:37:57 +0000357 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000358 public void testCase5 () throws IOException {
359 ki.addDoc(createFieldDoc2());
Eliza Margarethaafe98122015-01-23 17:37:57 +0000360 ki.commit();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200361 SpanAttributeQuery saq = new SpanAttributeQuery(
362 new SpanTermQuery(new Term("base", "@:class=book")), true);
Eliza Margarethaafe98122015-01-23 17:37:57 +0000363
Nils Diewaldbb33da22015-03-04 16:24:25 +0000364 SpanWithAttributeQuery swaq = new SpanWithAttributeQuery(saq, true);
365 kr = ki.search(swaq, (short) 10);
366 assertEquals(6, kr.getTotalResults());
Eliza Margaretha05bff462015-02-18 18:18:26 +0000367
Nils Diewaldbb33da22015-03-04 16:24:25 +0000368 assertEquals(0, kr.getMatch(0).getStartPos());
369 assertEquals(3, kr.getMatch(0).getEndPos());
370 assertEquals(0, kr.getMatch(1).getStartPos());
371 assertEquals(5, kr.getMatch(1).getEndPos());
372 assertEquals(1, kr.getMatch(2).getStartPos());
373 assertEquals(2, kr.getMatch(2).getEndPos());
374 assertEquals(2, kr.getMatch(3).getStartPos());
375 assertEquals(5, kr.getMatch(3).getEndPos());
376 assertEquals(4, kr.getMatch(4).getStartPos());
377 assertEquals(5, kr.getMatch(4).getEndPos());
378 assertEquals(6, kr.getMatch(5).getStartPos());
379 assertEquals(7, kr.getMatch(5).getEndPos());
Eliza Margarethaafe98122015-01-23 17:37:57 +0000380 }
Eliza Margaretha05bff462015-02-18 18:18:26 +0000381
Eliza Margaretha05bff462015-02-18 18:18:26 +0000382
Nils Diewaldbb33da22015-03-04 16:24:25 +0000383 /**
384 * Arbitrary elements with multiple attributes.
Eliza Margaretha6f989202016-10-14 21:48:29 +0200385 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000386 @Test
387 public void testCase6 () throws IOException {
388 ki.addDoc(createFieldDoc2());
389 ki.commit();
Eliza Margaretha05bff462015-02-18 18:18:26 +0000390
Nils Diewaldbb33da22015-03-04 16:24:25 +0000391 List<SpanQuery> sql = new ArrayList<>();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200392 sql.add(new SpanAttributeQuery(
393 new SpanTermQuery(new Term("base", "@:class=header")), true));
394 sql.add(new SpanAttributeQuery(
395 new SpanTermQuery(new Term("base", "@:class=book")), true));
Eliza Margaretha05bff462015-02-18 18:18:26 +0000396
Nils Diewaldbb33da22015-03-04 16:24:25 +0000397 SpanWithAttributeQuery swaq = new SpanWithAttributeQuery(sql, true);
398 kr = ki.search(swaq, (short) 10);
399 assertEquals(2, kr.getTotalResults());
Eliza Margaretha05bff462015-02-18 18:18:26 +0000400
Nils Diewaldbb33da22015-03-04 16:24:25 +0000401 assertEquals(0, kr.getMatch(0).getStartPos());
402 assertEquals(3, kr.getMatch(0).getEndPos());
403 assertEquals(4, kr.getMatch(1).getStartPos());
404 assertEquals(5, kr.getMatch(1).getEndPos());
Eliza Margaretha05bff462015-02-18 18:18:26 +0000405
Nils Diewaldbb33da22015-03-04 16:24:25 +0000406 }
Eliza Margaretha05bff462015-02-18 18:18:26 +0000407
Eliza Margaretha05bff462015-02-18 18:18:26 +0000408
Nils Diewaldbb33da22015-03-04 16:24:25 +0000409 /**
410 * Arbitrary elements with an attribute and a not attribute.
Eliza Margaretha6f989202016-10-14 21:48:29 +0200411 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000412 @Test
413 public void testCase7 () throws IOException {
414 ki.addDoc(createFieldDoc2());
415 ki.commit();
Eliza Margaretha05bff462015-02-18 18:18:26 +0000416
Nils Diewaldbb33da22015-03-04 16:24:25 +0000417 List<SpanQuery> sql = new ArrayList<>();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200418 sql.add(new SpanAttributeQuery(
419 new SpanTermQuery(new Term("base", "@:class=header")), true,
420 true));
421 sql.add(new SpanAttributeQuery(
422 new SpanTermQuery(new Term("base", "@:class=book")), true));
Eliza Margaretha05bff462015-02-18 18:18:26 +0000423
Nils Diewaldbb33da22015-03-04 16:24:25 +0000424 SpanWithAttributeQuery swaq = new SpanWithAttributeQuery(sql, true);
425 kr = ki.search(swaq, (short) 10);
426 assertEquals(4, kr.getTotalResults());
Eliza Margaretha05bff462015-02-18 18:18:26 +0000427
Nils Diewaldbb33da22015-03-04 16:24:25 +0000428 assertEquals(0, kr.getMatch(0).getStartPos());
429 assertEquals(5, kr.getMatch(0).getEndPos());
430 assertEquals(1, kr.getMatch(1).getStartPos());
431 assertEquals(2, kr.getMatch(1).getEndPos());
432 assertEquals(2, kr.getMatch(2).getStartPos());
433 assertEquals(5, kr.getMatch(2).getEndPos());
434 assertEquals(6, kr.getMatch(3).getStartPos());
435 assertEquals(7, kr.getMatch(3).getEndPos());
Eliza Margaretha05bff462015-02-18 18:18:26 +0000436
Nils Diewaldbb33da22015-03-04 16:24:25 +0000437 // for (int i = 0; i < kr.getTotalResults(); i++) {
438 // System.out.println(kr.getMatch(i).getLocalDocID() + " "
439 // + kr.getMatch(i).startPos + " " + kr.getMatch(i).endPos);
440 // }
441 }
Eliza Margaretha05bff462015-02-18 18:18:26 +0000442
Nils Diewaldbb33da22015-03-04 16:24:25 +0000443
444 /**
445 * Arbitrary elements with only not attributes.
Eliza Margaretha6f989202016-10-14 21:48:29 +0200446 */
margaretha05a4bc12022-02-11 10:55:43 +0100447 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000448 public void testCase8 () throws IOException {
449 ki.addDoc(createFieldDoc2());
450 ki.commit();
451
452 List<SpanQuery> sql = new ArrayList<>();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200453 sql.add(new SpanAttributeQuery(
454 new SpanTermQuery(new Term("base", "@:class=header")), true,
455 true));
456 sql.add(new SpanAttributeQuery(
457 new SpanTermQuery(new Term("base", "@:class=book")), true,
458 true));
Nils Diewaldbb33da22015-03-04 16:24:25 +0000459
460 SpanWithAttributeQuery swaq = new SpanWithAttributeQuery(sql, true);
461 kr = ki.search(swaq, (short) 10);
margaretha05a4bc12022-02-11 10:55:43 +0100462
463 assertEquals(1, kr.getErrors().size());
464 assertEquals(104, kr.getError(0).getCode());
465 assertEquals("No (positive) attribute is defined.",kr.getError(0).getMessage());
466
Nils Diewaldbb33da22015-03-04 16:24:25 +0000467 }
Eliza Margaretha05bff462015-02-18 18:18:26 +0000468
Akron89266462021-11-08 14:19:35 +0100469 @Test
470 public void testIndexOutOfBoundsBug () throws IOException {
471 FieldDocument fd = new FieldDocument();
472 fd.addString("ID", "doc-1");
473 fd.addTV("base", "abc",
474 "[(0-1)s:a|_1$<i>0<i>1|<>:a$<b>64<i>0<i>3<i>3<b>0<s>1|@:x=y$<b>17<s>1<i>3]"
475 + "[(1-2)s:b|_2$<i>1<i>2|<>:d$<b>65<i>1<b>0<s>7|@:x=y$<b>17<s>7]"
476 + "[(2-3)s:c|_3$<i>2<i>3|<>:a$<b>64<i>2<i>3<i>3<b>0<s>2]");
477
478 ki.addDoc(fd);
479 ki.commit();
480
481 fd = new FieldDocument();
482 fd.addString("ID", "doc-2");
483 fd.addTV("base", "abc",
484 "[(0-1)s:a|_1$<i>0<i>1|<>:a$<b>64<i>0<i>3<i>3<b>0<s>1|@:x=y$<b>17<s>1<i>3]"
485 + "[(1-2)s:b|_2$<i>1<i>2]"
486 + "[(2-3)s:c|_3$<i>2<i>3]");
487
488 ki.addDoc(fd);
489 ki.commit();
490
491 // Check <a x=y>
492 SpanQuery sq = new SpanWithAttributeQuery(
493 new SpanElementQuery("base", "a"),
494 new SpanAttributeQuery(new SpanTermQuery(new Term("base", "@:x=y")), true),
495 true
496 );
497
498 assertEquals("spanElementWithAttribute(<base:a />, spanAttribute(base:@:x=y))", sq.toString());
499 kr = ki.search(sq, (short) 10);
500
501 assertEquals(2, kr.getTotalResults());
502
503 assertEquals(0, kr.getMatch(0).getStartPos());
504 assertEquals(3, kr.getMatch(0).getEndPos());
505 assertEquals(0, kr.getMatch(1).getStartPos());
506 assertEquals(3, kr.getMatch(1).getEndPos());
507 }
508
Akronfa8b2282020-06-18 07:41:46 +0200509
510 @Test
511 public void testAttributeRealIndex () throws QueryException, IOException {
512 // Construct index
513 KrillIndex ki = new KrillIndex();
514 // Indexing test files
515 ki.addDoc(getClass().getResourceAsStream("/others/REDEW-DOC1-00001.json.gz"),
516 true);
517 ki.commit();
518
519 SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "@:dereko/s:mode:direct"));
520 kr = ki.search(stq, (short) 10);
521 assertEquals((long) 4, kr.getTotalResults());
522
523
524 SpanAttributeQuery saq = new SpanAttributeQuery(
525 new SpanTermQuery(new Term("tokens", "@:dereko/s:mode:direct")), true);
526
527 SpanElementQuery seq = new SpanElementQuery("tokens", "dereko/s:said");
528
529 // div with @class=header
530 SpanQuery sq = new SpanWithAttributeQuery(seq, saq, true);
531 assertEquals("spanElementWithAttribute(<tokens:dereko/s:said />, " +
532 "spanAttribute(tokens:@:dereko/s:mode:direct))", sq.toString());
533
534 kr = ki.search(sq, (short) 10);
535 assertEquals((long) 4, kr.getTotalResults());
536
537
538 String filepath = getClass()
539 .getResource(
540 "/queries/attribute/element-single-attribute-2.jsonld")
541 .getFile();
542
543 SpanQueryWrapper sqw = getJsonQuery(filepath);
544 Krill krill = new Krill(sqw);
545 assertEquals("spanElementWithAttribute(<tokens:dereko/s:said />, " +
546 "spanAttribute(tokens:@:dereko/s:mode:direct))", krill.getSpanQuery().toString());
547 Result kr = krill.apply(ki);
548 assertEquals(kr.getTotalResults(), 4);
549 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000550}