blob: da7e15889f2e05c414c15cc0679a1420096204ca [file] [log] [blame]
Akron3ba74f22015-07-24 18:46:17 +02001package de.ids_mannheim.korap.collection;
Akron40550172015-08-04 03:06:12 +02002
margaretha2a30bd42021-02-01 16:56:36 +01003import static de.ids_mannheim.korap.TestSimple.getJsonString;
4import static org.junit.Assert.assertEquals;
margaretha2a30bd42021-02-01 16:56:36 +01005import static org.junit.Assert.assertTrue;
Akron3ba74f22015-07-24 18:46:17 +02006
margaretha2a30bd42021-02-01 16:56:36 +01007import java.io.IOException;
Akronb59f40e2018-08-23 17:15:43 +02008
Akronfd05f502015-07-30 18:34:26 +02009import org.apache.lucene.index.Term;
Akronfd05f502015-07-30 18:34:26 +020010import org.apache.lucene.search.spans.SpanQuery;
11import org.apache.lucene.search.spans.SpanTermQuery;
margaretha2a30bd42021-02-01 16:56:36 +010012import org.junit.Test;
Akron3ba74f22015-07-24 18:46:17 +020013import org.junit.runner.RunWith;
14import org.junit.runners.JUnit4;
15
margaretha2a30bd42021-02-01 16:56:36 +010016import de.ids_mannheim.korap.Krill;
17import de.ids_mannheim.korap.KrillCollection;
18import de.ids_mannheim.korap.KrillIndex;
19import de.ids_mannheim.korap.index.FieldDocument;
20import de.ids_mannheim.korap.query.QueryBuilder;
21import de.ids_mannheim.korap.response.Result;
22import de.ids_mannheim.korap.response.SearchContext;
margaretha2a30bd42021-02-01 16:56:36 +010023import de.ids_mannheim.korap.util.StatusCodes;
Akronb59f40e2018-08-23 17:15:43 +020024
25
Akron3ba74f22015-07-24 18:46:17 +020026@RunWith(JUnit4.class)
27public class TestKrillCollectionIndex {
28 private KrillIndex ki;
29
Akronb59f40e2018-08-23 17:15:43 +020030 final String path = "/queries/collections/";
Akron40550172015-08-04 03:06:12 +020031
Akron3ba74f22015-07-24 18:46:17 +020032 @Test
margarethaee683ff2017-07-03 12:27:28 +020033 public void testKrillCollectionWithWrongJson () throws IOException {
34 ki = new KrillIndex();
35 ki.addDoc(createDoc1());
36 ki.addDoc(createDoc2());
37 ki.addDoc(createDoc3());
38 ki.commit();
39
40 KrillCollection kc = new KrillCollection("{lalala}");
Akron5e3436f2017-07-04 15:28:03 +020041 assertEquals("Unable to parse JSON", kc.getError(0).getMessage());
margarethaee683ff2017-07-03 12:27:28 +020042 kc.setIndex(ki);
Akron5e3436f2017-07-04 15:28:03 +020043
margarethaee683ff2017-07-03 12:27:28 +020044 long docs = 0, tokens = 0, sentences = 0, paragraphs = 0;
45 try {
46 docs = kc.numberOf("documents");
47 tokens = kc.numberOf("tokens");
48 sentences = kc.numberOf("sentences");
49 paragraphs = kc.numberOf("paragraphs");
50 }
51 catch (IOException e) {
52 e.printStackTrace();
53 }
54 assertEquals(0, docs);
55 assertEquals(0, tokens);
56 assertEquals(0, sentences);
57 assertEquals(0, paragraphs);
margaretha30a03ae2017-07-11 19:04:09 +020058
59 assertEquals(1, kc.getErrors().size());
60 assertEquals(StatusCodes.UNABLE_TO_PARSE_JSON, kc.getErrors().get(0).getCode());
margarethaee683ff2017-07-03 12:27:28 +020061 }
62
63
64 @Test
Akron3ba74f22015-07-24 18:46:17 +020065 public void testIndexWithCollectionBuilder () throws IOException {
66 ki = new KrillIndex();
67 ki.addDoc(createDoc1());
68 ki.addDoc(createDoc2());
69 ki.addDoc(createDoc3());
70 ki.commit();
Akron176c9b12015-07-29 19:53:40 +020071 CollectionBuilder cb = new CollectionBuilder();
72 KrillCollection kcn = new KrillCollection(ki);
Akron3ba74f22015-07-24 18:46:17 +020073
74 // Simple string tests
75 kcn.fromBuilder(cb.term("author", "Frank"));
76 assertEquals(1, kcn.docCount());
77
78 kcn.fromBuilder(cb.term("author", "Peter"));
79 assertEquals(1, kcn.docCount());
80
81 kcn.fromBuilder(cb.term("author", "Sebastian"));
82 assertEquals(1, kcn.docCount());
83
84 kcn.fromBuilder(cb.term("author", "Michael"));
85 assertEquals(0, kcn.docCount());
86
margarethaee683ff2017-07-03 12:27:28 +020087 kcn.fromBuilder(cb.term("nothing", "nothing"));
Akronc346ce42017-07-02 19:14:07 +020088 assertEquals(0, kcn.docCount());
89
Akron3ba74f22015-07-24 18:46:17 +020090 kcn.fromBuilder(cb.term("textClass", "reisen"));
91 assertEquals(3, kcn.docCount());
92
93 kcn.fromBuilder(cb.term("textClass", "kultur"));
94 assertEquals(2, kcn.docCount());
95
96 kcn.fromBuilder(cb.term("textClass", "finanzen"));
97 assertEquals(1, kcn.docCount());
98
99 // Simple orGroup tests
Akron40550172015-08-04 03:06:12 +0200100 kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank"))
101 .with(cb.term("author", "Michael")));
Akron3ba74f22015-07-24 18:46:17 +0200102 assertEquals(1, kcn.docCount());
103
Akron40550172015-08-04 03:06:12 +0200104 kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank"))
105 .with(cb.term("author", "Sebastian")));
Akron3ba74f22015-07-24 18:46:17 +0200106 assertEquals(2, kcn.docCount());
107
Akron176c9b12015-07-29 19:53:40 +0200108 kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank"))
Akron40550172015-08-04 03:06:12 +0200109 .with(cb.term("author", "Sebastian"))
110 .with(cb.term("author", "Peter")));
Akron3ba74f22015-07-24 18:46:17 +0200111 assertEquals(3, kcn.docCount());
112
Akron176c9b12015-07-29 19:53:40 +0200113 kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Huhu"))
Akron40550172015-08-04 03:06:12 +0200114 .with(cb.term("author", "Haha"))
115 .with(cb.term("author", "Hehe")));
Akron3ba74f22015-07-24 18:46:17 +0200116 assertEquals(0, kcn.docCount());
117
118 // Multi field orGroup tests
Akron40550172015-08-04 03:06:12 +0200119 kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1"))
120 .with(cb.term("author", "Peter")));
Akron3ba74f22015-07-24 18:46:17 +0200121 assertEquals(2, kcn.docCount());
122
Akron40550172015-08-04 03:06:12 +0200123 kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1"))
124 .with(cb.term("author", "Frank")));
Akron3ba74f22015-07-24 18:46:17 +0200125 assertEquals(1, kcn.docCount());
126
Akron40550172015-08-04 03:06:12 +0200127 kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1"))
128 .with(cb.term("author", "Michael")));
Akron3ba74f22015-07-24 18:46:17 +0200129 assertEquals(1, kcn.docCount());
130
131 // Simple andGroup tests
Akron40550172015-08-04 03:06:12 +0200132 kcn.fromBuilder(cb.andGroup().with(cb.term("author", "Frank"))
133 .with(cb.term("author", "Michael")));
Akron3ba74f22015-07-24 18:46:17 +0200134 assertEquals(0, kcn.docCount());
135
Akron40550172015-08-04 03:06:12 +0200136 kcn.fromBuilder(cb.andGroup().with(cb.term("ID", "doc-1"))
137 .with(cb.term("author", "Frank")));
Akron3ba74f22015-07-24 18:46:17 +0200138 assertEquals(1, kcn.docCount());
139
140 // andGroup in keyword field test
Akron40550172015-08-04 03:06:12 +0200141 kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen"))
142 .with(cb.term("textClass", "finanzen")));
Akron3ba74f22015-07-24 18:46:17 +0200143 assertEquals(1, kcn.docCount());
144
Akron40550172015-08-04 03:06:12 +0200145 kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen"))
146 .with(cb.term("textClass", "kultur")));
Akron3ba74f22015-07-24 18:46:17 +0200147 assertEquals(2, kcn.docCount());
148
Akron40550172015-08-04 03:06:12 +0200149 kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "finanzen"))
150 .with(cb.term("textClass", "kultur")));
Akron3ba74f22015-07-24 18:46:17 +0200151 assertEquals(0, kcn.docCount());
Akron80cba8d2015-07-27 17:27:46 +0200152
Akron26207572018-04-04 20:21:42 +0200153 kcn.fromBuilder(cb.term("text", "mann"));
Akron80cba8d2015-07-27 17:27:46 +0200154 assertEquals(3, kcn.docCount());
155
Akron26207572018-04-04 20:21:42 +0200156 kcn.fromBuilder(cb.term("text", "frau"));
Akron80cba8d2015-07-27 17:27:46 +0200157 assertEquals(1, kcn.docCount());
Akron3ba74f22015-07-24 18:46:17 +0200158 };
159
Akron40550172015-08-04 03:06:12 +0200160
Akron5a44f2b2018-01-09 23:26:41 +0100161 @Test
162 public void testIndexWithRegex () throws IOException {
163 ki = new KrillIndex();
164 ki.addDoc(createDoc1());
165 ki.addDoc(createDoc2());
166 ki.addDoc(createDoc3());
167 ki.commit();
168 CollectionBuilder cb = new CollectionBuilder();
169 KrillCollection kcn = new KrillCollection(ki);
170
171 // Frank, Sebastian
172 kcn.fromBuilder(cb.re("author", ".*an.*"));
173 assertEquals(2, kcn.docCount());
174
175 // Kultur & Reisen,
176 // Reisen & Finanzen,
177 // Nachricht & Kultur & Reisen
178 kcn.fromBuilder(cb.re("textClass", ".*(ult|eis).*"));
179 assertEquals(3, kcn.docCount());
180
181 // Test in group
182 kcn.fromBuilder(
183 cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "kultur"))
184 );
185 assertEquals(2, kcn.docCount());
186
187 kcn.fromBuilder(
188 cb.andGroup().with(
189 cb.re("textClass", ".*eis.*")
190 ).with(
191 cb.re("textClass", ".*ult.*")
192 )
193 );
194 assertEquals(2, kcn.docCount());
195
196 kcn.fromBuilder(
197 cb.andGroup().with(
198 cb.re("textClass", ".*eis.*")
199 ).with(
200 cb.orGroup().with(
201 cb.re("textClass", ".*ult.*")
202 ).with(
203 cb.re("textClass", ".*nan.*")
204 )
205 )
206 );
207 assertEquals(3, kcn.docCount());
208 };
209
210
Akron3ba74f22015-07-24 18:46:17 +0200211 @Test
Akron6b0be132019-09-16 19:01:59 +0200212 public void testIndexWithNegation1 () throws IOException {
Akron3ba74f22015-07-24 18:46:17 +0200213 ki = new KrillIndex();
214 ki.addDoc(createDoc1());
215 ki.addDoc(createDoc2());
216 ki.addDoc(createDoc3());
217 ki.commit();
Akron176c9b12015-07-29 19:53:40 +0200218 CollectionBuilder cb = new CollectionBuilder();
219 KrillCollection kcn = new KrillCollection(ki);
Akron3ba74f22015-07-24 18:46:17 +0200220
221 // Simple negation tests
222 kcn.fromBuilder(cb.term("author", "Frank").not());
223 assertEquals(2, kcn.docCount());
224
225 kcn.fromBuilder(cb.term("textClass", "reisen").not());
226 assertEquals(0, kcn.docCount());
227
228 kcn.fromBuilder(cb.term("textClass", "kultur").not());
229 assertEquals(1, kcn.docCount());
230
231 // orGroup with simple Negation
Akron40550172015-08-04 03:06:12 +0200232 kcn.fromBuilder(cb.orGroup().with(cb.term("textClass", "kultur").not())
233 .with(cb.term("author", "Peter")));
Akron3ba74f22015-07-24 18:46:17 +0200234 assertEquals(2, kcn.docCount());
235
Akron40550172015-08-04 03:06:12 +0200236 kcn.fromBuilder(cb.orGroup().with(cb.term("textClass", "kultur").not())
237 .with(cb.term("author", "Sebastian")));
Akron3ba74f22015-07-24 18:46:17 +0200238 assertEquals(1, kcn.docCount());
Akron6b0be132019-09-16 19:01:59 +0200239
240 kcn.fromBuilder(
241 cb.andGroup().with(
242 cb.term("author", "Frank").not()
243 )
244 .with(
245 cb.term("author", "Sebastian").not()
246 )
247 );
248 assertEquals("AndGroup(-author:Frank -author:Sebastian)", kcn.toString());
249 assertEquals(1, kcn.docCount());
250
251
252 kcn.fromBuilder(
253 cb.andGroup().with(
254 cb.term("author", "Peter")
255 )
256 .with(
257 cb.andGroup().with(
258 cb.term("author", "Frank").not()
259 )
260 .with(
261 cb.term("author", "Sebastian").not()
262 )
263 )
264 );
265 assertEquals("AndGroup(author:Peter AndGroup(-author:Frank -author:Sebastian))", kcn.toString());
266 assertEquals(1, kcn.docCount());
267
268 kcn.fromBuilder(
269 cb.andGroup().with(
270 cb.re("textClass", "reis.*")
271 )
272 .with(
273 cb.andGroup().with(
274 cb.term("author", "Frank").not()
275 )
276 .with(
277 cb.term("author", "Sebastian").not()
278 )
279 )
280 );
281 assertEquals("AndGroup(QueryWrapperFilter(textClass:/reis.*/) AndGroup(-author:Frank -author:Sebastian))", kcn.toString());
282 assertEquals(1, kcn.docCount());
Akron3ba74f22015-07-24 18:46:17 +0200283 };
284
Akron40550172015-08-04 03:06:12 +0200285
Akron3ba74f22015-07-24 18:46:17 +0200286 @Test
Akron6b0be132019-09-16 19:01:59 +0200287 public void testIndexWithNegation2 () throws IOException {
288 ki = new KrillIndex();
289 ki.addDoc(createDoc1());
290 ki.commit();
291 ki.addDoc(createDoc2());
292 ki.commit();
293 ki.addDoc(createDoc3());
294 ki.commit();
295 CollectionBuilder cb = new CollectionBuilder();
296 KrillCollection kcn = new KrillCollection(ki);
297
298 // Simple negation tests
299 kcn.fromBuilder(cb.term("author", "Frank").not());
300 assertEquals(2, kcn.docCount());
301
302 kcn.fromBuilder(cb.term("textClass", "reisen").not());
303 assertEquals(0, kcn.docCount());
304
305 kcn.fromBuilder(cb.term("textClass", "kultur").not());
306 assertEquals(1, kcn.docCount());
307
308 // orGroup with simple Negation
309 kcn.fromBuilder(cb.orGroup().with(cb.term("textClass", "kultur").not())
310 .with(cb.term("author", "Peter")));
311 assertEquals(2, kcn.docCount());
312
313 kcn.fromBuilder(cb.orGroup().with(cb.term("textClass", "kultur").not())
314 .with(cb.term("author", "Sebastian")));
315 assertEquals(1, kcn.docCount());
316
317 kcn.fromBuilder(
318 cb.andGroup().with(
319 cb.term("author", "Frank").not()
320 )
321 .with(
322 cb.term("author", "Sebastian").not()
323 )
324 );
325 assertEquals("AndGroup(-author:Frank -author:Sebastian)", kcn.toString());
326 assertEquals(1, kcn.docCount());
327
328 kcn.fromBuilder(
329 cb.andGroup().with(
330 cb.term("author", "Peter")
331 )
332 .with(
333 cb.andGroup().with(
334 cb.term("author", "Frank").not()
335 )
336 .with(
337 cb.term("author", "Sebastian").not()
338 )
339 )
340 );
341 assertEquals("AndGroup(author:Peter AndGroup(-author:Frank -author:Sebastian))", kcn.toString());
342 assertEquals(1, kcn.docCount());
343
344 kcn.fromBuilder(
345 cb.andGroup().with(
346 cb.re("textClass", "reis..")
347 )
348 .with(
349 cb.andGroup().with(
350 cb.term("author", "Frank").not()
351 )
352 .with(
353 cb.term("author", "Sebastian").not()
354 )
355 )
356 );
357 assertEquals("AndGroup(QueryWrapperFilter(textClass:/reis../) AndGroup(-author:Frank -author:Sebastian))", kcn.toString());
358 assertEquals(1, kcn.docCount());
359 };
360
361 @Test
362 public void testIndexWithNegation3 () throws IOException {
363
364 // This is identical to above but the operands are switched
365 ki = new KrillIndex();
366 ki.addDoc(createDoc1());
367 ki.commit();
368 ki.addDoc(createDoc2());
369 ki.commit();
370 ki.addDoc(createDoc3());
371 ki.commit();
372 CollectionBuilder cb = new CollectionBuilder();
373 KrillCollection kcn = new KrillCollection(ki);
374
375 // orGroup with simple Negation
376 kcn.fromBuilder(
377 cb.orGroup().with(cb.term("author", "Peter"))
378 .with(cb.term("textClass", "kultur").not()));
379 assertEquals(2, kcn.docCount());
380
381 kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Sebastian"))
382 .with(cb.term("textClass", "kultur").not()));
383 assertEquals(1, kcn.docCount());
384
385 kcn.fromBuilder(
386 cb.andGroup().with(
387 cb.term("author", "Sebastian").not()
388 )
389 .with(
390 cb.term("author", "Frank").not()
391 )
392 );
393 assertEquals("AndGroup(-author:Sebastian -author:Frank)", kcn.toString());
394 assertEquals(1, kcn.docCount());
395
396 kcn.fromBuilder(
397 cb.andGroup().with(
398 cb.andGroup().with(
399 cb.term("author", "Sebastian").not()
400 )
401 .with(
402 cb.term("author", "Frank").not()
403 )
404 )
405 .with(
406 cb.term("author", "Peter")
407 )
408 );
409 assertEquals("AndGroup(AndGroup(-author:Sebastian -author:Frank) author:Peter)", kcn.toString());
410 assertEquals(1, kcn.docCount());
411
412 kcn.fromBuilder(
413 cb.andGroup().with(
414 cb.andGroup().with(
415 cb.term("author", "Sebastian").not()
416 )
417 .with(
418 cb.term("author", "Frank").not()
419 )
420 )
421 .with(
422 cb.re("textClass", "reis..")
423 )
424 );
425 assertEquals("AndGroup(AndGroup(-author:Sebastian -author:Frank) QueryWrapperFilter(textClass:/reis../))", kcn.toString());
426 assertEquals(1, kcn.docCount());
427 };
428
429
430 @Test
Akron80cba8d2015-07-27 17:27:46 +0200431 public void testIndexWithMultipleCommitsAndDeletes () throws IOException {
Akron3ba74f22015-07-24 18:46:17 +0200432 ki = new KrillIndex();
433 ki.addDoc(createDoc1());
434 ki.addDoc(createDoc2());
435 ki.commit();
Akron176c9b12015-07-29 19:53:40 +0200436 CollectionBuilder cb = new CollectionBuilder();
437 KrillCollection kcn = new KrillCollection(ki);
Akron3ba74f22015-07-24 18:46:17 +0200438
439 kcn.fromBuilder(cb.term("author", "Frank"));
440 assertEquals(1, kcn.docCount());
441 kcn.fromBuilder(cb.term("author", "Peter"));
442 assertEquals(1, kcn.docCount());
443 kcn.fromBuilder(cb.term("author", "Sebastian"));
444 assertEquals(0, kcn.docCount());
445 kcn.fromBuilder(cb.term("author", "Michael").not());
446 assertEquals(2, kcn.docCount());
447
448 // Add Sebastians doc
449 ki.addDoc(createDoc3());
450 ki.commit();
451
452 kcn.fromBuilder(cb.term("author", "Frank"));
453 assertEquals(1, kcn.docCount());
454 kcn.fromBuilder(cb.term("author", "Peter"));
455 assertEquals(1, kcn.docCount());
456 kcn.fromBuilder(cb.term("author", "Sebastian"));
457 assertEquals(1, kcn.docCount());
458 kcn.fromBuilder(cb.term("author", "Michael").not());
459 assertEquals(3, kcn.docCount());
460
461 // Remove one document
462 ki.delDocs("author", "Peter");
463 ki.commit();
464
465 kcn.fromBuilder(cb.term("author", "Frank"));
466 assertEquals(1, kcn.docCount());
467 kcn.fromBuilder(cb.term("author", "Peter"));
468 assertEquals(0, kcn.docCount());
469 kcn.fromBuilder(cb.term("author", "Sebastian"));
470 assertEquals(1, kcn.docCount());
471 kcn.fromBuilder(cb.term("author", "Michael").not());
472 assertEquals(2, kcn.docCount());
Akron80cba8d2015-07-27 17:27:46 +0200473
474 // Readd Peter's doc
475 ki.addDoc(createDoc2());
476 ki.commit();
477
478 kcn.fromBuilder(cb.term("author", "Frank"));
479 assertEquals(1, kcn.docCount());
480 kcn.fromBuilder(cb.term("author", "Peter"));
481 assertEquals(1, kcn.docCount());
482 kcn.fromBuilder(cb.term("author", "Sebastian"));
483 assertEquals(1, kcn.docCount());
484 kcn.fromBuilder(cb.term("author", "Michael").not());
485 assertEquals(3, kcn.docCount());
Akron3ba74f22015-07-24 18:46:17 +0200486 };
487
Akron40550172015-08-04 03:06:12 +0200488
Akron80cba8d2015-07-27 17:27:46 +0200489 @Test
490 public void testIndexWithDateRanges () throws IOException {
491 ki = new KrillIndex();
492 ki.addDoc(createDoc1());
493 ki.addDoc(createDoc2());
494 ki.addDoc(createDoc3());
495 ki.commit();
Akron176c9b12015-07-29 19:53:40 +0200496 CollectionBuilder cb = new CollectionBuilder();
497 KrillCollection kcn = new KrillCollection(ki);
Akron80cba8d2015-07-27 17:27:46 +0200498
499 kcn.fromBuilder(cb.date("pubDate", "2005"));
500 assertEquals(3, kcn.docCount());
501 kcn.fromBuilder(cb.date("pubDate", "2005-12"));
502 assertEquals(3, kcn.docCount());
503
504 kcn.fromBuilder(cb.date("pubDate", "2005-12-10"));
505 assertEquals(1, kcn.docCount());
506 kcn.fromBuilder(cb.date("pubDate", "2005-12-16"));
507 assertEquals(1, kcn.docCount());
508 kcn.fromBuilder(cb.date("pubDate", "2005-12-07"));
509 assertEquals(1, kcn.docCount());
510
511 kcn.fromBuilder(cb.since("pubDate", "2005-12-07"));
512 assertEquals(3, kcn.docCount());
513 kcn.fromBuilder(cb.since("pubDate", "2005-12-10"));
514 assertEquals(2, kcn.docCount());
515 kcn.fromBuilder(cb.since("pubDate", "2005-12-16"));
516 assertEquals(1, kcn.docCount());
517
518 kcn.fromBuilder(cb.till("pubDate", "2005-12-16"));
519 assertEquals(3, kcn.docCount());
520 kcn.fromBuilder(cb.till("pubDate", "2005-12-10"));
521 assertEquals(2, kcn.docCount());
522 kcn.fromBuilder(cb.till("pubDate", "2005-12-07"));
523 assertEquals(1, kcn.docCount());
524
525 kcn.fromBuilder(cb.date("pubDate", "2005-12-10").not());
526 assertEquals(2, kcn.docCount());
527 kcn.fromBuilder(cb.date("pubDate", "2005-12-16").not());
528 assertEquals(2, kcn.docCount());
529 kcn.fromBuilder(cb.date("pubDate", "2005-12-07").not());
530 assertEquals(2, kcn.docCount());
531 kcn.fromBuilder(cb.date("pubDate", "2005-12-09").not());
532 assertEquals(3, kcn.docCount());
533
534
535 kcn.fromBuilder(cb.till("pubDate", "2005-12-16").not());
536 assertEquals(0, kcn.docCount());
537 kcn.fromBuilder(cb.till("pubDate", "2005-12-15").not());
538 assertEquals(1, kcn.docCount());
539 kcn.fromBuilder(cb.till("pubDate", "2005-12-10").not());
540 assertEquals(1, kcn.docCount());
541 kcn.fromBuilder(cb.till("pubDate", "2005-12-09").not());
542 assertEquals(2, kcn.docCount());
543 kcn.fromBuilder(cb.till("pubDate", "2005-12-07").not());
544 assertEquals(2, kcn.docCount());
545 kcn.fromBuilder(cb.till("pubDate", "2005-12-06").not());
546 assertEquals(3, kcn.docCount());
547 };
548
549
550 @Test
551 public void testIndexWithRegexes () throws IOException {
552 ki = new KrillIndex();
553
Akron26207572018-04-04 20:21:42 +0200554 FieldDocument fd = ki.addDoc(createDoc1());
Akron80cba8d2015-07-27 17:27:46 +0200555 ki.addDoc(createDoc2());
556 ki.addDoc(createDoc3());
557 ki.commit();
558
Akron176c9b12015-07-29 19:53:40 +0200559 CollectionBuilder cb = new CollectionBuilder();
560 KrillCollection kcn = new KrillCollection(ki);
Akron80cba8d2015-07-27 17:27:46 +0200561
562 kcn.fromBuilder(cb.re("author", "Fran.*"));
563 assertEquals(1, kcn.docCount());
564 kcn.fromBuilder(cb.re("author", "Blin.*"));
565 assertEquals(0, kcn.docCount());
566 kcn.fromBuilder(cb.re("author", "Frank|Peter"));
567 assertEquals(2, kcn.docCount());
568
Akron2e5897b2018-03-29 12:07:11 +0200569 // "Frau" requires text request!
570 kcn.fromBuilder(cb.text("text", "Frau"));
571 assertEquals(1, kcn.docCount());
572
Akron26207572018-04-04 20:21:42 +0200573 kcn.fromBuilder(cb.term("text", "frau"));
Akron80cba8d2015-07-27 17:27:46 +0200574 assertEquals(1, kcn.docCount());
575
Akron26207572018-04-04 20:21:42 +0200576 kcn.fromBuilder(cb.re("text", "fra."));
Akron80cba8d2015-07-27 17:27:46 +0200577 assertEquals(1, kcn.docCount());
578
Akron26207572018-04-04 20:21:42 +0200579 kcn.fromBuilder(cb.re("text", "fra.|ma.n"));
Akron80cba8d2015-07-27 17:27:46 +0200580 assertEquals(3, kcn.docCount());
Akron26207572018-04-04 20:21:42 +0200581
582 String sv = fd.doc.getField("text").stringValue();
583 assertEquals("Der alte Mann ging über die Straße", sv);
584
585 kcn.fromBuilder(cb.term("text", sv));
586 assertEquals(1, kcn.docCount());
Akron26207572018-04-04 20:21:42 +0200587 };
Akron80cba8d2015-07-27 17:27:46 +0200588
Akron408ae352018-03-28 16:47:41 +0200589 @Test
590 public void testIndexWithTextStringQueries () throws IOException {
591 ki = new KrillIndex();
592 ki.addDoc(createDoc1());
593 ki.commit();
594
595 CollectionBuilder cb = new CollectionBuilder();
596 KrillCollection kcn = new KrillCollection(ki);
597
Akron26207572018-04-04 20:21:42 +0200598 kcn.fromBuilder(cb.term("text", "mann"));
Akron2e5897b2018-03-29 12:07:11 +0200599 assertEquals(1, kcn.docCount());
600
Akron26207572018-04-04 20:21:42 +0200601 kcn.fromBuilder(cb.term("text", "Der alte Mann ging über die Straße"));
Akron2e5897b2018-03-29 12:07:11 +0200602 assertEquals(1, kcn.docCount());
603
Akron26207572018-04-04 20:21:42 +0200604 kcn.fromBuilder(cb.text("text", "Der alte Mann"));
605 assertEquals(kcn.toString(), "QueryWrapperFilter(text:\"der alte mann\")");
606 assertEquals(1, kcn.docCount());
Akron408ae352018-03-28 16:47:41 +0200607 };
608
Akronb59f40e2018-08-23 17:15:43 +0200609 @Test
610 public void testUnknownVC () throws IOException {
611 ki = new KrillIndex();
612 ki.addDoc(createDoc1());
613 ki.commit();
614
615 // This test was adopted from TestVCCaching,
616 // But does not fail anymore for deserialization
margaretha5a8abea2021-11-08 16:57:51 +0100617 String json = _getJSONString("vc-ref/unknown-vc-ref.jsonld");
Akronb59f40e2018-08-23 17:15:43 +0200618
619 KrillCollection kc = new KrillCollection(json);
620 assertEquals("referTo(https://korap.ids-mannheim.de/@ndiewald/MyCorpus)", kc.getBuilder().toString());
621
margarethad9a46af2022-01-04 09:10:04 +0100622 assertEquals("vcFilter(https://korap.ids-mannheim.de/@ndiewald/MyCorpus)",kc.toString());
Akronb59f40e2018-08-23 17:15:43 +0200623
624 QueryBuilder kq = new QueryBuilder("field");
625
626 Krill krill = new Krill(kq.seg("a").with("b"));
627 krill.setCollection(kc);
628
629 Result result = krill.apply(ki);
630
631 assertEquals(StatusCodes.MISSING_COLLECTION, result.getError(0).getCode());
margaretha5a8abea2021-11-08 16:57:51 +0100632 assertTrue(result.getError(0).getMessage().startsWith("Collection is not found"));
Akronb59f40e2018-08-23 17:15:43 +0200633 };
margarethad9a46af2022-01-04 09:10:04 +0100634
Akronb59f40e2018-08-23 17:15:43 +0200635 @Test
Akronfd05f502015-07-30 18:34:26 +0200636 public void filterExampleFromLegacy () throws Exception {
637
638 // Construct index
639 KrillIndex ki = new KrillIndex();
640 // Indexing test files
641 for (String i : new String[] { "00001", "00002", "00003", "00004",
642 "00005", "00006", "02439" }) {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200643 ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
Akronfd05f502015-07-30 18:34:26 +0200644 true);
645 };
646 ki.commit();
647
648 // Create Virtual collections:
649 KrillCollection kc = new KrillCollection(ki);
650
651 assertEquals("Documents", 7, kc.numberOf("documents"));
652
653 // The virtual collection consists of all documents that have
654 // the textClass "reisen" and "freizeit"
655
656 /* kc.filter(kf.and("textClass", "reisen").and("textClass",
657 "freizeit-unterhaltung"));
658 */
659
Akron40550172015-08-04 03:06:12 +0200660 kc.fromBuilder(kc.build().andGroup()
661 .with(kc.build().term("textClass", "reisen"))
662 .with(kc.build().term("textClass", "freizeit-unterhaltung")));
Akronfd05f502015-07-30 18:34:26 +0200663
664 assertEquals("Documents", 5, kc.numberOf("documents"));
665 assertEquals("Tokens", 1678, kc.numberOf("tokens"));
666 assertEquals("Sentences", 194, kc.numberOf("sentences"));
667 assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
668
669
670 // Subset this to all documents that have also the text
671 // kc.filter(kf.and("textClass", "kultur"));
672 /*
673 kc.fromBuilder(
674 kc.build().andGroup().with(
675 kc.getBuilder()
676 ).with(
677 kc.build().term("textClass", "kultur")
678 )
679 );
680 */
681
682 kc.filter(kc.build().term("textClass", "kultur"));
683
684 assertEquals("Documents", 1, kc.numberOf("documents"));
685 assertEquals("Tokens", 405, kc.numberOf("tokens"));
686 assertEquals("Sentences", 75, kc.numberOf("sentences"));
687 assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
688
689
690 // kc.filter(kf.and("corpusID", "WPD"));
691 kc.filter(kc.build().term("corpusID", "WPD"));
692
693 assertEquals("Documents", 1, kc.numberOf("documents"));
694 assertEquals("Tokens", 405, kc.numberOf("tokens"));
695 assertEquals("Sentences", 75, kc.numberOf("sentences"));
696 assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
697
698 // Create a query
Eliza Margaretha6f989202016-10-14 21:48:29 +0200699 Krill ks = new Krill(
700 new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
701 ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20)
702 .setContext(
703 new SearchContext(true, (short) 5, true, (short) 5));
Akronfd05f502015-07-30 18:34:26 +0200704
Akron60971692016-06-08 12:56:21 +0200705 Result kr = ks.apply(ki);
706
707 /*
Akron40550172015-08-04 03:06:12 +0200708 Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
709 (short) 5);
Akron60971692016-06-08 12:56:21 +0200710 */
Akronfd05f502015-07-30 18:34:26 +0200711 assertEquals(kr.getTotalResults(), 70);
712
713
714 kc.extend(kc.build().term("textClass", "uninteresting"));
715 assertEquals("Documents", 1, kc.numberOf("documents"));
716
717 kc.extend(kc.build().term("textClass", "wissenschaft"));
718
719 assertEquals("Documents", 3, kc.numberOf("documents"));
720 assertEquals("Tokens", 1669, kc.numberOf("tokens"));
721 assertEquals("Sentences", 188, kc.numberOf("sentences"));
722 assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
Akronfd05f502015-07-30 18:34:26 +0200723 };
724
margarethaee683ff2017-07-03 12:27:28 +0200725
726 @Test
Akronc346ce42017-07-02 19:14:07 +0200727 public void filterExampleWithNullresult () throws Exception {
728
729 // Construct index
730 KrillIndex ki = new KrillIndex();
731 // Indexing test files
732 for (String i : new String[] { "00001", "00002" }) {
733 ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
margarethaee683ff2017-07-03 12:27:28 +0200734 true);
Akronc346ce42017-07-02 19:14:07 +0200735 };
736 ki.commit();
737
738 // Create Virtual collections:
739 KrillCollection kc = new KrillCollection(ki);
740
741 assertEquals("Documents", 2, kc.numberOf("documents"));
742
743 kc.fromBuilder(kc.build().term("textClass", "nichts"));
744
745 assertEquals("Documents", 0, kc.numberOf("documents"));
746 assertEquals("Tokens", 0, kc.numberOf("tokens"));
747 assertEquals("Sentences", 0, kc.numberOf("sentences"));
748 assertEquals("Paragraphs", 0, kc.numberOf("paragraphs"));
margarethaee683ff2017-07-03 12:27:28 +0200749 };
750
Akronfd05f502015-07-30 18:34:26 +0200751
752 @Test
753 public void filterExampleAtomicLegacy () throws Exception {
754
755 // That's exactly the same test class, but with multiple atomic indices
756
757 // Construct index
758 KrillIndex ki = new KrillIndex();
759 // Indexing test files
760 for (String i : new String[] { "00001", "00002", "00003", "00004",
761 "00005", "00006", "02439" }) {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200762 ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
Akronfd05f502015-07-30 18:34:26 +0200763 true);
764 ki.commit();
765 };
766
767 CollectionBuilder kf = new CollectionBuilder();
768
769 // Create Virtual collections:
770 KrillCollection kc = new KrillCollection(ki);
771
772 assertEquals("Documents", 7, kc.numberOf("documents"));
773
774 // If this is set - everything is fine automatically ...
775 kc.filter(kc.build().term("corpusID", "WPD"));
776
777 assertEquals("Documents", 7, kc.numberOf("documents"));
778
779 // The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
780
781 /*
782 kc.filter(kf.and("textClass", "reisen").and("textClass",
783 "freizeit-unterhaltung"));
784 */
Akron40550172015-08-04 03:06:12 +0200785 kc.filter(kc.build().andGroup()
786 .with(kc.build().term("textClass", "reisen"))
787 .with(kc.build().term("textClass", "freizeit-unterhaltung")));
Akronfd05f502015-07-30 18:34:26 +0200788
789 assertEquals("Documents", 5, kc.numberOf("documents"));
790 assertEquals("Tokens", 1678, kc.numberOf("tokens"));
791 assertEquals("Sentences", 194, kc.numberOf("sentences"));
792 assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
793
794 // Subset this to all documents that have also the text
795 // kc.filter(kf.and("textClass", "kultur"));
796
797 kc.filter(kc.build().term("textClass", "kultur"));
798
799 assertEquals("Documents", 1, kc.numberOf("documents"));
800 assertEquals("Tokens", 405, kc.numberOf("tokens"));
801 assertEquals("Sentences", 75, kc.numberOf("sentences"));
802 assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
803
804 // This is already filtered though ...
805 // kc.filter(kf.and("corpusID", "WPD"));
806 kc.filter(kc.build().term("corpusID", "WPD"));
807
808 assertEquals("Documents", 1, kc.numberOf("documents"));
809 assertEquals("Tokens", 405, kc.numberOf("tokens"));
810 assertEquals("Sentences", 75, kc.numberOf("sentences"));
811 assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
812
813 // Create a query
Eliza Margaretha6f989202016-10-14 21:48:29 +0200814 Krill ks = new Krill(
815 new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
816 ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20)
817 .setContext(
818 new SearchContext(true, (short) 5, true, (short) 5));
Akronfd05f502015-07-30 18:34:26 +0200819
Akron60971692016-06-08 12:56:21 +0200820 Result kr = ks.apply(ki);
821 /*
Akron40550172015-08-04 03:06:12 +0200822 Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
823 (short) 5);
Akron60971692016-06-08 12:56:21 +0200824 */
Akronfd05f502015-07-30 18:34:26 +0200825 assertEquals(kr.getTotalResults(), 70);
826
827 // kc.extend(kf.and("textClass", "uninteresting"));
828 kc.extend(kc.build().term("textClass", "uninteresting"));
829
Akronfd05f502015-07-30 18:34:26 +0200830 assertEquals("Documents", 1, kc.numberOf("documents"));
831
Akronaa74ec62015-07-31 17:22:55 +0200832 kc.extend(kc.build().term("textClass", "wissenschaft"));
Akronfd05f502015-07-30 18:34:26 +0200833
834 assertEquals("Documents", 3, kc.numberOf("documents"));
835 assertEquals("Tokens", 1669, kc.numberOf("tokens"));
836 assertEquals("Sentences", 188, kc.numberOf("sentences"));
837 assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
Akronaa74ec62015-07-31 17:22:55 +0200838
839 // System.err.println(kc.toString());
840 // Test collectionbuilder simplifier!
841 /*
842 OrGroup(
843 AndGroup(
844 corpusID:WPD
845 textClass:reisen
846 textClass:freizeit-unterhaltung
847 textClass:kultur
848 corpusID:WPD
849 )
850 textClass:uninteresting
851 textClass:wissenschaft
852 )
Akronfd05f502015-07-30 18:34:26 +0200853 */
Akronaa74ec62015-07-31 17:22:55 +0200854
855 assertTrue(ki.delDocs("textClass", "wissenschaft"));
856 ki.commit();
857
858 assertEquals("Documents", 1, kc.numberOf("documents"));
859 assertEquals("Tokens", 405, kc.numberOf("tokens"));
860 assertEquals("Sentences", 75, kc.numberOf("sentences"));
861 assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
862 };
863
Akron40550172015-08-04 03:06:12 +0200864
Akronaa74ec62015-07-31 17:22:55 +0200865 @Test
866 public void filterExample2Legacy () throws Exception {
867
868 // Construct index
869 KrillIndex ki = new KrillIndex();
870 // Indexing test files
871 for (String i : new String[] { "00001", "00002", "00003", "00004",
872 "00005", "00006", "02439" }) {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200873 ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
Akronaa74ec62015-07-31 17:22:55 +0200874 true);
875 };
876 ki.commit();
877
Eliza Margaretha6f989202016-10-14 21:48:29 +0200878 ki.addDoc(
879 getClass().getResourceAsStream("/wiki/00012-fakemeta.json.gz"),
880 true);
Akronaa74ec62015-07-31 17:22:55 +0200881
882 ki.commit();
883
884 /*
885 CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200886
Akronaa74ec62015-07-31 17:22:55 +0200887 // Create Virtual collections:
888 KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
889 kc.filter(kf.and("textClass", "reisen").and("textClass",
890 "freizeit-unterhaltung"));
891 */
892
893 KrillCollection kc = new KrillCollection(ki);
894 CollectionBuilder cb = kc.build();
Akron40550172015-08-04 03:06:12 +0200895 kc.filter(cb.andGroup().with(cb.term("textClass", "reisen"))
896 .with(cb.term("textClass", "freizeit-unterhaltung")));
Akronaa74ec62015-07-31 17:22:55 +0200897
898 assertEquals("Documents", 5, kc.numberOf("documents"));
899 assertEquals("Tokens", 1678, kc.numberOf("tokens"));
900 assertEquals("Sentences", 194, kc.numberOf("sentences"));
901 assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
902
903
904 // Create a query
Eliza Margaretha6f989202016-10-14 21:48:29 +0200905 Krill ks = new Krill(
906 new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
907 ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20)
908 .setContext(
909 new SearchContext(true, (short) 5, true, (short) 5));
Akronaa74ec62015-07-31 17:22:55 +0200910
Akron60971692016-06-08 12:56:21 +0200911 Result kr = ks.apply(ki);
Akronaa74ec62015-07-31 17:22:55 +0200912
Akronaa74ec62015-07-31 17:22:55 +0200913 assertEquals(kr.getTotalResults(), 369);
914
915 // kc.filter(kf.and("corpusID", "QQQ"));
916 kc.filter(cb.term("corpusID", "QQQ"));
917
918 assertEquals("Documents", 0, kc.numberOf("documents"));
919 assertEquals("Tokens", 0, kc.numberOf("tokens"));
920 assertEquals("Sentences", 0, kc.numberOf("sentences"));
921 assertEquals("Paragraphs", 0, kc.numberOf("paragraphs"));
922
Akron60971692016-06-08 12:56:21 +0200923 ks.setCollection(kc);
924
925 // Create a query
926 kr = ks.apply(ki);
927 /*
Akron40550172015-08-04 03:06:12 +0200928 kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
929 (short) 5);
Akron60971692016-06-08 12:56:21 +0200930 */
Akronaa74ec62015-07-31 17:22:55 +0200931 assertEquals(kr.getTotalResults(), 0);
932 };
933
934
935 @Test
936 public void uidCollectionLegacy () throws IOException {
937
938 // Construct index
939 KrillIndex ki = new KrillIndex();
940 // Indexing test files
941 int uid = 1;
942 for (String i : new String[] { "00001", "00002", "00003", "00004",
943 "00005", "00006", "02439" }) {
944 FieldDocument fd = ki.addDoc(uid++,
945 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
946 true);
947 };
948 ki.commit();
949
950 assertEquals("Documents", 7, ki.numberOf("documents"));
951 assertEquals("Paragraphs", 174, ki.numberOf("paragraphs"));
952 assertEquals("Sentences", 281, ki.numberOf("sentences"));
953 assertEquals("Tokens", 2661, ki.numberOf("tokens"));
954
955 SpanQuery sq = new SpanTermQuery(new Term("tokens", "s:der"));
956 Result kr = ki.search(sq, (short) 10);
957 assertEquals(86, kr.getTotalResults());
958
959 // Create Virtual collections:
960 KrillCollection kc = new KrillCollection();
961 kc.filterUIDs(new String[] { "2", "3", "4" });
962 kc.setIndex(ki);
963 assertEquals("Documents", 3, kc.numberOf("documents"));
964
965 assertEquals("Paragraphs", 46, kc.numberOf("paragraphs"));
966 assertEquals("Sentences", 103, kc.numberOf("sentences"));
967 assertEquals("Tokens", 1229, kc.numberOf("tokens"));
968
Akron08f4ceb2016-08-03 23:53:32 +0200969
Akron60971692016-06-08 12:56:21 +0200970 Krill ks = new Krill(sq);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200971 ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20)
972 .setContext(
973 new SearchContext(true, (short) 5, true, (short) 5));
Akron60971692016-06-08 12:56:21 +0200974 kr = ks.apply(ki);
975
976 // kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
Akronaa74ec62015-07-31 17:22:55 +0200977
978 assertEquals((long) 39, kr.getTotalResults());
979 };
980
Akron40550172015-08-04 03:06:12 +0200981
Akronaa74ec62015-07-31 17:22:55 +0200982 @Test
983 public void uidCollectionWithDeletions () throws IOException {
984
985 // Construct index
986 KrillIndex ki = new KrillIndex();
987 // Indexing test files
988 int uid = 1;
989 for (String i : new String[] { "00001", "00002", "00003", "00004",
990 "00005", "00006", "02439" }) {
991 FieldDocument fd = ki.addDoc(uid++,
992 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
993 true);
994 };
995 ki.commit();
996
997
998 assertEquals("Documents", 7, ki.numberOf("documents"));
999 assertEquals("Paragraphs", 174, ki.numberOf("paragraphs"));
1000 assertEquals("Sentences", 281, ki.numberOf("sentences"));
1001 assertEquals("Tokens", 2661, ki.numberOf("tokens"));
1002
1003 assertTrue(ki.delDoc(3));
1004 ki.commit();
1005
1006 assertEquals("Documents", 6, ki.numberOf("documents"));
1007
1008 assertEquals("Paragraphs", 146, ki.numberOf("paragraphs"));
1009 assertEquals("Sentences", 212, ki.numberOf("sentences"));
1010 assertEquals("Tokens", 2019, ki.numberOf("tokens"));
1011
1012 assertTrue(ki.delDoc(2));
1013 assertTrue(ki.delDoc(3));
1014 assertTrue(ki.delDoc(4));
1015 assertTrue(ki.delDoc(5));
1016 assertTrue(ki.delDoc(6));
1017 assertTrue(ki.delDoc(7));
1018 ki.commit();
1019
1020 assertEquals("Documents", 1, ki.numberOf("documents"));
1021 assertEquals("Paragraphs", 75, ki.numberOf("paragraphs"));
Akronfd05f502015-07-30 18:34:26 +02001022 };
1023
Akron451b7ae2018-08-15 13:21:27 +02001024 @Test
1025 public void testKrillCollectionWithNonexistingNegation () throws IOException {
1026 ki = new KrillIndex();
1027 ki.addDoc(createDoc1()); // nachricht kultur reisen
1028 ki.addDoc(createDoc3()); // reisen finanzen
1029 ki.commit();
1030
1031 KrillCollection kc = new KrillCollection(ki);
1032 CollectionBuilder cb = kc.build();
1033
1034 kc.fromBuilder(cb.term("textClass","reisen"));
1035 assertEquals(kc.toString(), "textClass:reisen");
1036 assertEquals("Documents", 2, kc.numberOf("documents"));
1037
1038 kc.fromBuilder(cb.andGroup().with(
1039 cb.term("textClass","reisen")
1040 ).with(
1041 cb.term("textClass","nachricht").not()
1042 ));
1043 assertEquals(kc.toString(), "AndGroup(textClass:reisen -textClass:nachricht)");
1044 assertEquals("Documents", 1, kc.numberOf("documents"));
1045
1046
1047 kc.fromBuilder(cb.andGroup().with(
1048 cb.term("textClass","reisen")
1049 ).with(
1050 cb.term("textClass","reisen").not()
1051 ));
1052 assertEquals(kc.toString(), "AndGroup(textClass:reisen -textClass:reisen)");
1053 assertEquals("Documents", 0, kc.numberOf("documents"));
1054
1055 kc.fromBuilder(cb.andGroup().with(
1056 cb.term("textClass","kultur")
1057 ).with(
1058 cb.term("textClass","finanzen").not()
1059 ));
1060 assertEquals(kc.toString(), "AndGroup(textClass:kultur -textClass:finanzen)");
1061 assertEquals("Documents", 1, kc.numberOf("documents"));
1062
1063 kc.fromBuilder(cb.andGroup().with(
1064 cb.term("textClass","reisen")
1065 ).with(
1066 cb.term("textClass","Blabla").not()
1067 ));
1068 assertEquals(kc.toString(), "AndGroup(textClass:reisen -textClass:Blabla)");
1069 assertEquals("Documents", 2, kc.numberOf("documents"));
1070 }
1071
Akronfd05f502015-07-30 18:34:26 +02001072
Akron1f531262018-08-24 14:27:00 +02001073 @Test
1074 public void testKrillCollectionWithValueVectorNe () throws IOException {
1075 ki = new KrillIndex();
1076 ki.addDoc(createDoc1()); // nachricht kultur reisen
1077 ki.addDoc(createDoc2()); // kultur reisen
1078 ki.addDoc(createDoc3()); // reisen finanzen
1079 ki.commit();
1080
1081 KrillCollection kc = new KrillCollection();
1082 kc.setIndex(ki);
1083
1084 CollectionBuilder cb = kc.build();
1085 kc.fromBuilder(cb.orGroup().with(cb.term("textClass", "nachricht")).with(cb.term("textClass","finanzen")));
1086 assertEquals("OrGroup(textClass:nachricht textClass:finanzen)", kc.toString());
1087 assertEquals("Documents", 2, kc.numberOf("documents"));
1088
1089 kc.fromBuilder(cb.term("textClass", "nachricht").not());
1090 assertEquals("-textClass:nachricht", kc.toString());
1091 assertEquals("Documents", 2, kc.numberOf("documents"));
1092
1093 kc.fromBuilder(cb.orGroup().with(cb.term("textClass", "nachricht").not()).with(cb.term("textClass","finanzen").not()));
1094 assertEquals("OrGroup(-textClass:nachricht -textClass:finanzen)", kc.toString());
1095 assertEquals("Documents", 3, kc.numberOf("documents"));
1096
1097 kc.fromBuilder(cb.orGroup().with(cb.term("textClass", "nachricht")).with(cb.term("textClass","finanzen")).not());
1098 assertEquals("-OrGroup(textClass:nachricht textClass:finanzen)", kc.toString());
1099 assertEquals("Documents", 1, kc.numberOf("documents"));
1100
1101 Krill ks = new Krill(new QueryBuilder("tokens").seg("i:a"));
1102 ks.setCollection(kc);
1103
1104 // Create a query
1105 Result kr = ks.apply(ki);
1106 assertEquals(1, kr.getTotalResults());
1107 assertEquals("[[a]] c d", kr.getMatch(0).getSnippetBrackets());
1108
1109 String json = _getJSONString("collection_with_vector_ne.jsonld");
1110 ks = new Krill(json);
1111
1112 kc = ks.getCollection();
1113 kc.setIndex(ki);
1114
1115 assertEquals("-OrGroup(textClass:nachricht textClass:finanzen)", kc.toString());
1116 assertEquals("Documents", 1, kc.numberOf("documents"));
1117
1118 kr = ks.apply(ki);
1119 assertEquals("[[a]] c d", kr.getMatch(0).getSnippetBrackets());
1120 assertEquals(1, kr.getTotalResults());
1121 };
Akron2423bba2018-09-03 15:11:10 +02001122
1123 @Test
1124 public void testKrillCollectionWithLargeVector () throws IOException {
1125 ki = new KrillIndex();
1126 ki.addDoc(createDoc1());
1127 ki.addDoc(createDoc2());
1128 ki.addDoc(createDoc3());
1129 ki.commit();
1130 ki.addDoc(createDoc5000());
1131 ki.commit();
1132
1133 String json = _getJSONString("collection_large_vector.jsonld");
1134 KrillCollection kc = new KrillCollection(json);
1135
1136 Krill ks = new Krill(new QueryBuilder("tokens").seg("i:a"));
1137 ks.setCollection(kc);
1138 kc.setIndex(ki);
1139
1140 assertEquals("Documents", 4, kc.numberOf("documents"));
1141
1142 Result kr = ks.apply(ki);
1143 assertEquals("[[a]] b c", kr.getMatch(0).getSnippetBrackets());
1144 assertEquals("[[a]] c d", kr.getMatch(1).getSnippetBrackets());
1145 assertEquals("[[a]] d e", kr.getMatch(2).getSnippetBrackets());
1146 assertEquals("[[a]] d e", kr.getMatch(3).getSnippetBrackets());
1147 };
1148
Akronfd966c52018-09-03 15:29:37 +02001149 @Test
1150 public void testKrillCollectionWithLargeVectorAndLargeIndex () throws IOException {
1151 ki = new KrillIndex();
1152 for (int i = 0; i < 6000; i++) {
1153 FieldDocument fd = new FieldDocument();
1154 fd.addString("UID", Integer.toString(i));
1155 ki.addDoc(fd);
1156 if (i == 4500)
1157 ki.commit();
1158 };
1159
1160 ki.commit();
1161
1162 String json = _getJSONString("collection_large_vector.jsonld");
1163 KrillCollection kc = new KrillCollection(json);
1164 kc.setIndex(ki);
1165
1166 assertEquals("Documents", 5000, kc.numberOf("documents"));
1167 };
1168
1169
Akron2423bba2018-09-03 15:11:10 +02001170
Akron1f531262018-08-24 14:27:00 +02001171
margarethaa0d88f62018-09-03 18:03:52 +02001172 public static FieldDocument createDoc1 () {
Akron3ba74f22015-07-24 18:46:17 +02001173 FieldDocument fd = new FieldDocument();
Akronb59f40e2018-08-23 17:15:43 +02001174 fd.addString("UID", "1");
Akron3ba74f22015-07-24 18:46:17 +02001175 fd.addString("ID", "doc-1");
1176 fd.addString("author", "Frank");
Akrona6dabb72019-01-09 13:09:41 +01001177 fd.addKeywords("textClass", "Nachricht Kultur Reisen");
Akronc7a2abc2019-01-17 14:21:34 +01001178 fd.addDate("pubDate", 20051210);
Akron22d319e2018-04-01 17:13:49 +02001179 fd.addText("text", "Der alte Mann ging über die Straße");
Akronb59f40e2018-08-23 17:15:43 +02001180 fd.addTV("tokens", "a b c", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
1181 + "[(2-3)s:b|i:b|_1$<i>2<i>3]" + "[(4-5)s:c|i:c|_2$<i>4<i>5]");
Akron3ba74f22015-07-24 18:46:17 +02001182 return fd;
1183 };
1184
Akron40550172015-08-04 03:06:12 +02001185
margarethaa0d88f62018-09-03 18:03:52 +02001186 public static FieldDocument createDoc2 () {
Akron3ba74f22015-07-24 18:46:17 +02001187 FieldDocument fd = new FieldDocument();
Akronb59f40e2018-08-23 17:15:43 +02001188 fd.addString("UID", "2");
1189 fd.addString("ID", "doc-2");
Akron3ba74f22015-07-24 18:46:17 +02001190 fd.addString("author", "Peter");
Akrona6dabb72019-01-09 13:09:41 +01001191 fd.addKeywords("textClass", "Kultur Reisen");
Akronc7a2abc2019-01-17 14:21:34 +01001192 fd.addDate("pubDate", 20051207);
Akron3ba74f22015-07-24 18:46:17 +02001193 fd.addText("text", "Der junge Mann hatte keine andere Wahl");
Akronb59f40e2018-08-23 17:15:43 +02001194 fd.addTV("tokens", "a c d", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
1195 + "[(2-3)s:c|i:c|_1$<i>2<i>3]" + "[(4-5)s:d|i:d|_2$<i>4<i>5]");
Akron3ba74f22015-07-24 18:46:17 +02001196 return fd;
1197 };
1198
Akron40550172015-08-04 03:06:12 +02001199
margarethaa0d88f62018-09-03 18:03:52 +02001200 public static FieldDocument createDoc3 () {
Akron3ba74f22015-07-24 18:46:17 +02001201 FieldDocument fd = new FieldDocument();
Akronb59f40e2018-08-23 17:15:43 +02001202 fd.addString("UID", "3");
1203 fd.addString("ID", "doc-3");
Akron3ba74f22015-07-24 18:46:17 +02001204 fd.addString("author", "Sebastian");
Akrona6dabb72019-01-09 13:09:41 +01001205 fd.addKeywords("textClass", "Reisen Finanzen");
Akronc7a2abc2019-01-17 14:21:34 +01001206 fd.addDate("pubDate", 20051216);
Akron3ba74f22015-07-24 18:46:17 +02001207 fd.addText("text", "Die Frau und der Mann küssten sich");
Akronb59f40e2018-08-23 17:15:43 +02001208 fd.addTV("tokens", "a d e", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
1209 + "[(2-3)s:d|i:d|_1$<i>2<i>3]" + "[(4-5)s:e|i:e|_2$<i>4<i>5]");
Akron3ba74f22015-07-24 18:46:17 +02001210 return fd;
1211 };
Akronb59f40e2018-08-23 17:15:43 +02001212
margarethad9a46af2022-01-04 09:10:04 +01001213 public static FieldDocument createDoc5000 () {
Akron2423bba2018-09-03 15:11:10 +02001214 FieldDocument fd = new FieldDocument();
1215 fd.addString("UID", "5000");
1216 fd.addString("ID", "doc-5000");
1217 fd.addString("author", "Sebastian");
Akrona6dabb72019-01-09 13:09:41 +01001218 fd.addKeywords("textClass", "Kultur Finanzen");
Akronc7a2abc2019-01-17 14:21:34 +01001219 fd.addDate("pubDate", 20180202);
Akron2423bba2018-09-03 15:11:10 +02001220 fd.addText("text", "Die Frau und der Mann küssten sich");
1221 fd.addTV("tokens", "a d e", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
1222 + "[(2-3)s:d|i:d|_1$<i>2<i>3]" + "[(4-5)s:e|i:e|_2$<i>4<i>5]");
1223 return fd;
1224 };
1225
Akronb59f40e2018-08-23 17:15:43 +02001226 private String _getJSONString (String file) {
1227 return getJsonString(getClass().getResource(path + file).getFile());
1228 };
Akron3ba74f22015-07-24 18:46:17 +02001229};