blob: bdbb580d5aa75134324a0bc24f3c0738cd8c2291 [file] [log] [blame]
Eliza Margaretha6a780692014-01-15 09:45:42 +00001package de.ids_mannheim.korap.search;
2
margarethaf70addb2015-04-27 13:17:18 +02003import static de.ids_mannheim.korap.TestSimple.getString;
4import static org.junit.Assert.assertEquals;
Akron001dab32015-07-02 12:30:15 +02005import static org.junit.Assert.assertTrue;
margarethaf70addb2015-04-27 13:17:18 +02006import static org.junit.Assert.assertFalse;
7import static org.junit.Assert.assertNull;
Nils Diewaldc925b492013-12-03 23:56:10 +00008
margarethaf70addb2015-04-27 13:17:18 +02009import java.io.IOException;
10import java.util.HashMap;
Nils Diewald56dc2582014-11-04 21:33:46 +000011
Nils Diewaldc925b492013-12-03 23:56:10 +000012import org.junit.Test;
Akron176c9b12015-07-29 19:53:40 +020013import org.junit.Ignore;
Nils Diewaldc925b492013-12-03 23:56:10 +000014import org.junit.runner.RunWith;
15import org.junit.runners.JUnit4;
16
margarethaf70addb2015-04-27 13:17:18 +020017import com.fasterxml.jackson.databind.JsonNode;
18import com.fasterxml.jackson.databind.ObjectMapper;
19
20import de.ids_mannheim.korap.Krill;
21import de.ids_mannheim.korap.KrillCollection;
22import de.ids_mannheim.korap.KrillIndex;
23import de.ids_mannheim.korap.KrillMeta;
24import de.ids_mannheim.korap.collection.CollectionBuilder;
25import de.ids_mannheim.korap.index.FieldDocument;
26import de.ids_mannheim.korap.query.QueryBuilder;
margarethaf70addb2015-04-27 13:17:18 +020027import de.ids_mannheim.korap.response.Result;
28import de.ids_mannheim.korap.response.SearchContext;
29
Nils Diewaldc925b492013-12-03 23:56:10 +000030@RunWith(JUnit4.class)
Nils Diewaldbbd39a52015-02-23 19:56:57 +000031public class TestKrill {
Nils Diewaldc925b492013-12-03 23:56:10 +000032 @Test
33 public void searchCount () {
Nils Diewaldbb33da22015-03-04 16:24:25 +000034 Krill k = new Krill(new QueryBuilder("field1").seg("a").with("b"));
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000035
36 KrillMeta meta = k.getMeta();
37
Nils Diewaldafab8f32015-01-26 19:11:32 +000038 // Count:
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000039 meta.setCount(30);
40 assertEquals(meta.getCount(), 30);
41 meta.setCount(20);
42 assertEquals(meta.getCount(), 20);
43 meta.setCount(-50);
44 assertEquals(meta.getCount(), 20);
45 meta.setCount(500);
46 assertEquals(meta.getCount(), meta.getCountMax());
Nils Diewaldc925b492013-12-03 23:56:10 +000047 };
48
Nils Diewaldbb33da22015-03-04 16:24:25 +000049
Nils Diewaldc925b492013-12-03 23:56:10 +000050 @Test
51 public void searchStartIndex () {
Nils Diewaldbb33da22015-03-04 16:24:25 +000052 Krill k = new Krill(new QueryBuilder("field1").seg("a").with("b"));
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000053
54 KrillMeta meta = k.getMeta();
55
Nils Diewaldafab8f32015-01-26 19:11:32 +000056 // startIndex
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000057 meta.setStartIndex(5);
58 assertEquals(meta.getStartIndex(), 5);
59 meta.setStartIndex(1);
60 assertEquals(meta.getStartIndex(), 1);
61 meta.setStartIndex(0);
62 assertEquals(meta.getStartIndex(), 0);
63 meta.setStartIndex(70);
64 assertEquals(meta.getStartIndex(), 70);
65 meta.setStartIndex(-5);
66 assertEquals(meta.getStartIndex(), 0);
Nils Diewaldc925b492013-12-03 23:56:10 +000067 };
68
Nils Diewaldbb33da22015-03-04 16:24:25 +000069
Nils Diewaldc925b492013-12-03 23:56:10 +000070 @Test
71 public void searchQuery () {
Nils Diewaldbb33da22015-03-04 16:24:25 +000072 Krill ks = new Krill(new QueryBuilder("field1").seg("a").with("b"));
Nils Diewaldafab8f32015-01-26 19:11:32 +000073 // query
Nils Diewaldbb33da22015-03-04 16:24:25 +000074 assertEquals(ks.getSpanQuery().toString(),
75 "spanSegment(field1:a, field1:b)");
Nils Diewaldc925b492013-12-03 23:56:10 +000076 };
77
Nils Diewaldafab8f32015-01-26 19:11:32 +000078
Nils Diewaldc925b492013-12-03 23:56:10 +000079 @Test
80 public void searchIndex () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +000081 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +000082 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +000083 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +000084 for (String i : new String[] { "00001", "00002", "00003", "00004",
85 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +000086 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +000087 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
88 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +000089 };
90 ki.commit();
Nils Diewaldc925b492013-12-03 23:56:10 +000091
Nils Diewaldbb33da22015-03-04 16:24:25 +000092 Krill ks = new Krill(new QueryBuilder("tokens").seg("s:Buchstaben"));
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000093
Akron176c9b12015-07-29 19:53:40 +020094 CollectionBuilder cb = new CollectionBuilder();
95
96 ks.getCollection().fromBuilder(cb.term("textClass", "reisen"));
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000097
98 KrillMeta meta = ks.getMeta();
99 meta.setCount(3);
100 meta.setStartIndex(5);
101 meta.getContext().left.setLength(1);
102 meta.getContext().right.setLength(1);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000103
Nils Diewald884dbcf2015-02-27 17:02:28 +0000104 Result kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000105 assertEquals(kr.getTotalResults(), 6);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000106 assertEquals(kr.getMatch(0).getSnippetBrackets(),
107 "... dem [Buchstaben] A ...");
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000108
109 JsonNode res = ks.toJsonNode();
110 assertEquals(3, res.at("/meta/count").asInt());
111 assertEquals(5, res.at("/meta/startIndex").asInt());
112 assertEquals("token", res.at("/meta/context/left/0").asText());
113 assertEquals(1, res.at("/meta/context/left/1").asInt());
114 assertEquals("token", res.at("/meta/context/right/0").asText());
115 assertEquals(1, res.at("/meta/context/right/1").asInt());
Nils Diewaldc925b492013-12-03 23:56:10 +0000116 };
Nils Diewaldc6b78752013-12-05 19:05:12 +0000117
Nils Diewaldafab8f32015-01-26 19:11:32 +0000118
Nils Diewaldc6b78752013-12-05 19:05:12 +0000119 @Test
120 public void searchJSON () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000121 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000122 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000123 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000124 for (String i : new String[] { "00001", "00002", "00003", "00004",
125 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +0000126 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000127 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
128 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000129 };
130 ki.commit();
Nils Diewaldc6b78752013-12-05 19:05:12 +0000131
Nils Diewaldbb33da22015-03-04 16:24:25 +0000132 String json = getString(getClass().getResource(
133 "/queries/metaquery3.jsonld").getFile());
Nils Diewaldc6b78752013-12-05 19:05:12 +0000134
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000135 Krill ks = new Krill(json);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000136 Result kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000137 assertEquals(kr.getTotalResults(), 66);
138 assertEquals(5, kr.getItemsPerPage());
139 assertEquals(5, kr.getStartIndex());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000140 assertEquals("... a: A ist [der klangreichste] der V ...",
141 kr.getMatch(0).getSnippetBrackets());
Nils Diewaldb1c3b652013-12-28 22:47:00 +0000142 };
Nils Diewald01b4ce32013-12-05 22:39:25 +0000143
Nils Diewaldbb33da22015-03-04 16:24:25 +0000144
Nils Diewaldb1c3b652013-12-28 22:47:00 +0000145 @Test
146 public void searchJSON2 () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000147 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000148 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000149 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000150 for (String i : new String[] { "00001", "00002", "00003", "00004",
151 "00005", "00006", "02439", "00012-fakemeta", "00030-fakemeta",
152 /*
153 "02035-substring",
154 "05663-unbalanced",
155 "07452-deep"
156 */
157 }) {
Nils Diewald50333552015-03-02 15:54:46 +0000158 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000159 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
160 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000161 };
162 ki.commit();
Nils Diewaldb1c3b652013-12-28 22:47:00 +0000163
Nils Diewaldbb33da22015-03-04 16:24:25 +0000164 String json = getString(getClass().getResource(
165 "/queries/metaquery4.jsonld").getFile());
Nils Diewaldb1c3b652013-12-28 22:47:00 +0000166
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000167 Krill ks = new Krill(json);
Nils Diewald884dbcf2015-02-27 17:02:28 +0000168 Result kr = ks.apply(ki);
Nils Diewaldc86aa482014-02-12 16:58:05 +0000169
Nils Diewaldafab8f32015-01-26 19:11:32 +0000170 assertEquals(kr.getTotalResults(), 1);
Nils Diewald979b2fe2014-09-29 16:21:41 +0000171
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000172 ks = new Krill(json);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000173 // Ignore the collection part of the query!
Nils Diewald2d5f8102015-02-26 21:07:54 +0000174 ks.setCollection(new KrillCollection());
Nils Diewald3aa9e692015-02-20 22:20:11 +0000175 kr = ks.apply(ki);
Nils Diewald979b2fe2014-09-29 16:21:41 +0000176
Nils Diewaldafab8f32015-01-26 19:11:32 +0000177 assertEquals(kr.getTotalResults(), 5);
Nils Diewaldb1c3b652013-12-28 22:47:00 +0000178
Nils Diewaldbb33da22015-03-04 16:24:25 +0000179 json = getString(getClass().getResource("/queries/metaquery5.jsonld")
180 .getFile());
Nils Diewaldb1c3b652013-12-28 22:47:00 +0000181
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000182 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000183 kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000184 assertEquals(kr.getTotalResults(), 1);
185
Nils Diewaldbb33da22015-03-04 16:24:25 +0000186 json = getString(getClass().getResource("/queries/metaquery6.jsonld")
187 .getFile());
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000188 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000189 kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000190 assertEquals(kr.getTotalResults(), 1);
Nils Diewaldc6b78752013-12-05 19:05:12 +0000191 };
192
Akronbb5d1732015-06-22 01:22:40 +0200193
Akronc63697c2015-06-17 22:32:02 +0200194 // Todo: There SHOULD be a failure here, but Koral currently creates empty collections
195 @Test
196 public void queryJSONapiTest1 () {
Akronbb5d1732015-06-22 01:22:40 +0200197 Krill test = new Krill(
198 "{\"@context\":\"http://korap.ids-mannheim.de/ns/koral/0.3/context.jsonld\",\"errors\":[],\"warnings\":[],\"messages\":[],\"collection\":{},\"query\":{\"@type\":\"koral:token\",\"wrap\":{\"@type\":\"koral:term\",\"layer\":\"orth\",\"key\":\"Baum\",\"match\":\"match:eq\"}},\"meta\":{}}");
Akronc63697c2015-06-17 22:32:02 +0200199 assertFalse(test.hasErrors());
200 };
201
Nils Diewaldc6b78752013-12-05 19:05:12 +0000202
203 @Test
204 public void searchJSONFailure () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000205 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000206 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000207 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000208 for (String i : new String[] { "00001", "00002", "00003", "00004",
209 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +0000210 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000211 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
212 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000213 };
214 ki.commit();
Nils Diewald884dbcf2015-02-27 17:02:28 +0000215 Result kr = new Krill("{ query").apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000216 assertEquals(kr.getTotalResults(), 0);
217 assertEquals(kr.getError(0).getMessage(), "Unable to parse JSON");
Nils Diewaldc6b78752013-12-05 19:05:12 +0000218 };
219
220
Nils Diewald9f310832013-12-06 22:38:55 +0000221 @Test
222 public void searchJSONindexboundary () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000223 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000224 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000225 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000226 for (String i : new String[] { "00001", "00002", "00003", "00004",
227 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +0000228 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000229 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
230 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000231 };
232 ki.commit();
Nils Diewald9f310832013-12-06 22:38:55 +0000233
Nils Diewaldbb33da22015-03-04 16:24:25 +0000234 String json = getString(getClass().getResource(
235 "/queries/bsp-fail1.jsonld").getFile());
Nils Diewald9f310832013-12-06 22:38:55 +0000236
Nils Diewald884dbcf2015-02-27 17:02:28 +0000237 Result kr = new Krill(json).apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000238 assertEquals(0, kr.getStartIndex());
239 assertEquals(kr.getTotalResults(), 0);
240 assertEquals(25, kr.getItemsPerPage());
Nils Diewald9f310832013-12-06 22:38:55 +0000241 };
242
Nils Diewaldafab8f32015-01-26 19:11:32 +0000243
Nils Diewald9f310832013-12-06 22:38:55 +0000244 @Test
245 public void searchJSONindexboundary2 () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000246 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000247 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000248 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000249 for (String i : new String[] { "00001", "00002", "00003", "00004",
250 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +0000251 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000252 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
253 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000254 };
255 ki.commit();
Nils Diewald9f310832013-12-06 22:38:55 +0000256
Nils Diewaldbb33da22015-03-04 16:24:25 +0000257 String json = getString(getClass().getResource(
258 "/queries/bsp-fail2.jsonld").getFile());
Nils Diewald9f310832013-12-06 22:38:55 +0000259
Nils Diewald884dbcf2015-02-27 17:02:28 +0000260 Result kr = new Krill(json).apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000261 assertEquals(50, kr.getItemsPerPage());
262 assertEquals(49950, kr.getStartIndex());
263 assertEquals(kr.getTotalResults(), 0);
Nils Diewald9f310832013-12-06 22:38:55 +0000264 };
265
Akron40550172015-08-04 03:06:12 +0200266
Akron001dab32015-07-02 12:30:15 +0200267 /*
268 * Queries should be mirrored correctly for debugging reasons.
269 */
270 @Test
271 public void queryJSONmirrorTestBug () throws IOException {
272 // Construct index
273 KrillIndex ki = new KrillIndex();
274 String json = getString(getClass().getResource(
Akron40550172015-08-04 03:06:12 +0200275 "/queries/bugs/failing_mirror.jsonld").getFile());
Akron001dab32015-07-02 12:30:15 +0200276 Krill ks = new Krill(json);
277 Result kr = ks.apply(ki);
278
279 ObjectMapper mapper = new ObjectMapper();
280 JsonNode res = mapper.readTree(kr.toJsonString());
281
282 assertEquals("Unable to parse JSON", res.at("/errors/0/1").asText());
283
284 json = getString(getClass().getResource(
Akron40550172015-08-04 03:06:12 +0200285 "/queries/bugs/failing_mirror_2.jsonld").getFile());
Akron001dab32015-07-02 12:30:15 +0200286 ks = new Krill(json);
287 kr = ks.apply(ki);
288
289 res = mapper.readTree(kr.toJsonString());
290
Akron40550172015-08-04 03:06:12 +0200291 assertEquals(23, res.at("/meta/count").asInt());
292 assertEquals(25, res.at("/meta/itemsPerPage").asInt());
Akron001dab32015-07-02 12:30:15 +0200293 assertEquals("base/s:p", res.at("/meta/context").asText());
294 assertFalse(res.at("/query").isMissingNode());
295 assertTrue(res.at("/query/@type").isMissingNode());
296 assertTrue(res.at("/collection/@type").isMissingNode());
297 };
298
299
Nils Diewaldc6b78752013-12-05 19:05:12 +0000300
Nils Diewaldeabed8b2013-12-17 16:46:43 +0000301 @Test
302 public void searchJSONcontext () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000303 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000304 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000305 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000306 for (String i : new String[] { "00001", "00002", "00003", "00004",
307 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +0000308 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000309 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
310 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000311 };
312 ki.commit();
Nils Diewaldeabed8b2013-12-17 16:46:43 +0000313
Nils Diewaldbb33da22015-03-04 16:24:25 +0000314 String json = getString(getClass().getResource(
315 "/queries/bsp-context.jsonld").getFile());
Nils Diewaldeabed8b2013-12-17 16:46:43 +0000316
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000317 Krill ks = new Krill(json);
Nils Diewald884dbcf2015-02-27 17:02:28 +0000318 Result kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000319 assertEquals(kr.getTotalResults(), 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000320 assertEquals("A bzw. a ist der erste Buchstabe des"
321 + " lateinischen [Alphabets] und ein Vokal."
322 + " Der Buchstabe A hat in deutschen Texten"
323 + " eine durchschnittliche Häufigkeit ...", kr.getMatch(0)
324 .getSnippetBrackets());
Nils Diewaldb3a09db2013-12-21 00:22:02 +0000325
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000326 ks.getMeta().setCount(5);
327 ks.getMeta().setStartPage(2);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000328 kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000329 assertEquals(kr.getTotalResults(), 10);
330 assertEquals(5, kr.getStartIndex());
331 assertEquals(5, kr.getItemsPerPage());
Nils Diewald891c53c2013-12-23 16:37:46 +0000332
Nils Diewaldbb33da22015-03-04 16:24:25 +0000333 json = getString(getClass()
334 .getResource("/queries/bsp-context-2.jsonld").getFile());
Nils Diewald891c53c2013-12-23 16:37:46 +0000335
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000336 kr = new Krill(json).apply(ki);
337
Nils Diewaldafab8f32015-01-26 19:11:32 +0000338 assertEquals(kr.getTotalResults(), -1);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000339 assertEquals("... lls seit den Griechen beibehalten worden."
340 + " 3. Bedeutungen in der Biologie steht A für"
341 + " das Nukleosid Adenosin steht A die Base"
342 + " Adenin steht A für die Aminosäure Alanin"
343 + " in der Informatik steht a für den dezimalen"
344 + " [Wert] 97 sowohl im ASCII- als auch im"
345 + " Unicode-Zeichensatz steht A für den dezimalen"
346 + " Wert 65 sowohl im ASCII- als auch im"
347 + " Unicode-Zeichensatz als Kfz-Kennzeichen"
348 + " steht A in Deutschland für Augsburg."
349 + " in Österreich auf ...", kr.getMatch(0).getSnippetBrackets());
Nils Diewaldeabed8b2013-12-17 16:46:43 +0000350 };
351
Nils Diewaldbb33da22015-03-04 16:24:25 +0000352
Nils Diewald364eb642013-12-22 15:03:01 +0000353 @Test
354 public void searchJSONstartPage () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000355 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000356 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000357 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000358 for (String i : new String[] { "00001", "00002", "00003", "00004",
359 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +0000360 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000361 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
362 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000363 };
364 ki.commit();
Nils Diewald364eb642013-12-22 15:03:01 +0000365
Nils Diewaldbb33da22015-03-04 16:24:25 +0000366 String json = getString(getClass().getResource(
367 "/queries/bsp-paging.jsonld").getFile());
Nils Diewald364eb642013-12-22 15:03:01 +0000368
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000369 Krill ks = new Krill(json);
Nils Diewald884dbcf2015-02-27 17:02:28 +0000370 Result kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000371 assertEquals(kr.getTotalResults(), 10);
372 assertEquals(5, kr.getStartIndex());
373 assertEquals(5, kr.getItemsPerPage());
Nils Diewald364eb642013-12-22 15:03:01 +0000374
Nils Diewaldbb33da22015-03-04 16:24:25 +0000375 json = getString(getClass().getResource("/queries/bsp-cutoff.jsonld")
376 .getFile());
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000377 ks = ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000378 kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000379 assertEquals(kr.getTotalResults(), -1);
380 assertEquals(2, kr.getStartIndex());
381 assertEquals(2, kr.getItemsPerPage());
Nils Diewald364eb642013-12-22 15:03:01 +0000382
Nils Diewaldbb33da22015-03-04 16:24:25 +0000383 json = getString(getClass().getResource("/queries/metaquery9.jsonld")
384 .getFile());
Nils Diewald2d5f8102015-02-26 21:07:54 +0000385 KrillCollection kc = new KrillCollection(json);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000386 kc.setIndex(ki);
387 assertEquals(7, kc.numberOf("documents"));
Nils Diewald364eb642013-12-22 15:03:01 +0000388 };
Nils Diewaldeabed8b2013-12-17 16:46:43 +0000389
Nils Diewaldafab8f32015-01-26 19:11:32 +0000390
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000391 @Test
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000392 public void searchJSONitemsPerResource () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000393 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000394 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000395 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000396 for (String i : new String[] { "00001", "00002", "00003", "00004",
397 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +0000398 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000399 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
400 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000401 };
402 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000403 String json = getString(getClass().getResource(
404 "/queries/bsp-itemsPerResource.jsonld").getFile());
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000405
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000406 Krill ks = new Krill(json);
Nils Diewald884dbcf2015-02-27 17:02:28 +0000407 Result kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000408 assertEquals(kr.getTotalResults(), 10);
409 assertEquals(0, kr.getStartIndex());
410 assertEquals(20, kr.getItemsPerPage());
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000411
Nils Diewaldafab8f32015-01-26 19:11:32 +0000412 assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
413 assertEquals("WPD_AAA.00001", kr.getMatch(1).getDocID());
414 assertEquals("WPD_AAA.00001", kr.getMatch(6).getDocID());
415 assertEquals("WPD_AAA.00002", kr.getMatch(7).getDocID());
416 assertEquals("WPD_AAA.00002", kr.getMatch(8).getDocID());
417 assertEquals("WPD_AAA.00004", kr.getMatch(9).getDocID());
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000418
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000419 ks = new Krill(json);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000420 ks.getMeta().setItemsPerResource(1);
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000421
Nils Diewald3aa9e692015-02-20 22:20:11 +0000422 kr = ks.apply(ki);
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000423
Nils Diewaldafab8f32015-01-26 19:11:32 +0000424 assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
425 assertEquals("WPD_AAA.00002", kr.getMatch(1).getDocID());
426 assertEquals("WPD_AAA.00004", kr.getMatch(2).getDocID());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000427
Nils Diewaldafab8f32015-01-26 19:11:32 +0000428 assertEquals(kr.getTotalResults(), 3);
429 assertEquals(0, kr.getStartIndex());
430 assertEquals(20, kr.getItemsPerPage());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000431
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000432 ks = new Krill(json);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000433 ks.getMeta().setItemsPerResource(2);
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000434
Nils Diewald3aa9e692015-02-20 22:20:11 +0000435 kr = ks.apply(ki);
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000436
Nils Diewaldafab8f32015-01-26 19:11:32 +0000437 assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
438 assertEquals("WPD_AAA.00001", kr.getMatch(1).getDocID());
439 assertEquals("WPD_AAA.00002", kr.getMatch(2).getDocID());
440 assertEquals("WPD_AAA.00002", kr.getMatch(3).getDocID());
441 assertEquals("WPD_AAA.00004", kr.getMatch(4).getDocID());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000442
Nils Diewaldafab8f32015-01-26 19:11:32 +0000443 assertEquals(kr.getTotalResults(), 5);
444 assertEquals(0, kr.getStartIndex());
445 assertEquals(20, kr.getItemsPerPage());
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000446
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000447 ks = new Krill(json);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000448 KrillMeta meta = ks.getMeta();
449 meta.setItemsPerResource(1);
450 meta.setStartIndex(1);
451 meta.setCount(1);
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000452
Nils Diewald3aa9e692015-02-20 22:20:11 +0000453 kr = ks.apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000454
Nils Diewaldafab8f32015-01-26 19:11:32 +0000455 assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000456
Nils Diewaldafab8f32015-01-26 19:11:32 +0000457 assertEquals(kr.getTotalResults(), 3);
458 assertEquals(1, kr.getStartIndex());
459 assertEquals(1, kr.getItemsPerPage());
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000460
Nils Diewaldafab8f32015-01-26 19:11:32 +0000461 assertEquals((short) 1, kr.getItemsPerResource());
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000462 };
463
Nils Diewaldafab8f32015-01-26 19:11:32 +0000464
Nils Diewaldd723d812014-09-23 18:50:52 +0000465 @Test
466 public void searchJSONitemsPerResourceServer () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000467 /*
468 * This test is a server-only implementation of
469 * TestResource#testCollection
470 */
471 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000472 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000473 // Indexing test files
474 int uid = 1;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000475 for (String i : new String[] { "00001", "00002", "00003", "00004",
476 "00005", "00006", "02439" }) {
477 ki.addDoc(uid++,
478 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
479 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000480 };
481 ki.commit();
Nils Diewaldd723d812014-09-23 18:50:52 +0000482
Nils Diewaldbb33da22015-03-04 16:24:25 +0000483 String json = getString(getClass().getResource(
484 "/queries/bsp-uid-example.jsonld").getFile());
Nils Diewaldd723d812014-09-23 18:50:52 +0000485
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000486 Krill ks = new Krill(json);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000487 ks.getMeta().setItemsPerResource(1);
488
Nils Diewald2d5f8102015-02-26 21:07:54 +0000489 KrillCollection kc = new KrillCollection();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000490 kc.filterUIDs(new String[] { "1", "4" });
Nils Diewaldafab8f32015-01-26 19:11:32 +0000491 kc.setIndex(ki);
492 ks.setCollection(kc);
Nils Diewaldd723d812014-09-23 18:50:52 +0000493
Nils Diewald884dbcf2015-02-27 17:02:28 +0000494 Result kr = ks.apply(ki);
Nils Diewaldd723d812014-09-23 18:50:52 +0000495
Nils Diewaldafab8f32015-01-26 19:11:32 +0000496 assertEquals(kr.getTotalResults(), 2);
497 assertEquals(0, kr.getStartIndex());
498 assertEquals(25, kr.getItemsPerPage());
Nils Diewaldd723d812014-09-23 18:50:52 +0000499 };
Nils Diewaldba197f22014-11-01 17:21:46 +0000500
Nils Diewaldafab8f32015-01-26 19:11:32 +0000501
Nils Diewaldba197f22014-11-01 17:21:46 +0000502 @Test
503 public void searchJSONnewJSON () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000504 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000505 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000506 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000507 FieldDocument fd = ki.addDoc(1,
508 getClass().getResourceAsStream("/goe/AGA-03828.json.gz"), true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000509 ki.commit();
Nils Diewaldba197f22014-11-01 17:21:46 +0000510
Nils Diewaldafab8f32015-01-26 19:11:32 +0000511 assertEquals(fd.getUID(), 1);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000512 assertEquals(fd.getTextSigle(), "GOE_AGA.03828");
513 assertEquals(fd.getDocSigle(), "GOE_AGA");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000514 assertEquals(fd.getCorpusSigle(), "GOE");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000515 assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000516 assertNull(fd.getSubTitle());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000517 assertEquals(fd.getTextType(), "Autobiographie");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000518 assertNull(fd.getTextTypeArt());
519 assertNull(fd.getTextTypeRef());
520 assertNull(fd.getTextColumn());
521 assertNull(fd.getTextDomain());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000522 assertEquals(fd.getPages(), "529-547");
523 assertEquals(fd.getLicense(), "QAO-NC");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000524 assertEquals(fd.getCreationDate().toString(), "18200000");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000525 assertEquals(fd.getPubDate().toString(), "19820000");
526 assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000527 assertNull(fd.getTextClass());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000528 assertEquals(fd.getLanguage(), "de");
529 assertEquals(fd.getPubPlace(), "München");
530 assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:"
531 + " Autobiographische Einzelheiten,"
532 + " (Geschrieben bis 1832), In: Goethe,"
533 + " Johann Wolfgang von: Goethes Werke,"
534 + " Bd. 10, Autobiographische Schriften"
535 + " II, Hrsg.: Trunz, Erich. München: "
536 + "Verlag C. H. Beck, 1982, S. 529-547");
537 assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000538 assertNull(fd.getEditor());
539 assertNull(fd.getFileEditionStatement());
540 assertNull(fd.getBiblEditionStatement());
541 assertNull(fd.getKeywords());
542
543 assertEquals(fd.getTokenSource(), "opennlp#tokens");
544 assertEquals(fd.getFoundries(),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000545 "base base/paragraphs base/sentences corenlp "
546 + "corenlp/constituency corenlp/morpho "
547 + "corenlp/namedentities corenlp/sentences "
548 + "glemm glemm/morpho mate mate/morpho"
549 + " opennlp opennlp/morpho opennlp/sentences"
550 + " treetagger treetagger/morpho "
551 + "treetagger/sentences");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000552 assertEquals(fd.getLayerInfos(),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000553 "base/s=spans corenlp/c=spans corenlp/ne=tokens"
554 + " corenlp/p=tokens corenlp/s=spans glemm/l=tokens"
555 + " mate/l=tokens mate/m=tokens mate/p=tokens"
556 + " opennlp/p=tokens opennlp/s=spans tt/l=tokens"
557 + " tt/p=tokens tt/s=spans");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000558
559 assertEquals(fd.getCorpusTitle(), "Goethes Werke");
560 assertNull(fd.getCorpusSubTitle());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000561 assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
562 assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000563 assertEquals(fd.getDocTitle(),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000564 "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000565 assertNull(fd.getDocSubTitle());
566 assertNull(fd.getDocEditor());
567 assertNull(fd.getDocAuthor());
568
Nils Diewaldbb33da22015-03-04 16:24:25 +0000569 Krill ks = new Krill(new QueryBuilder("tokens").seg("mate/m:case:nom")
570 .with("mate/m:number:pl"));
Nils Diewald884dbcf2015-02-27 17:02:28 +0000571 Result kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000572
573 assertEquals(kr.getTotalResults(), 148);
574 assertEquals(0, kr.getStartIndex());
575 assertEquals(25, kr.getItemsPerPage());
Nils Diewaldba197f22014-11-01 17:21:46 +0000576 };
Nils Diewald06368ba2014-11-03 20:53:27 +0000577
Nils Diewaldafab8f32015-01-26 19:11:32 +0000578
Nils Diewald06368ba2014-11-03 20:53:27 +0000579 @Test
580 public void searchJSONnewJSON2 () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000581 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000582 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000583 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000584 FieldDocument fd = ki.addDoc(1,
585 getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000586 ki.commit();
Nils Diewald06368ba2014-11-03 20:53:27 +0000587
Nils Diewaldafab8f32015-01-26 19:11:32 +0000588 assertEquals(fd.getUID(), 1);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000589 assertEquals(fd.getTextSigle(), "BZK_D59.00089");
590 assertEquals(fd.getDocSigle(), "BZK_D59");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000591 assertEquals(fd.getCorpusSigle(), "BZK");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000592 assertEquals(fd.getTitle(), "Saragat-Partei zerfällt");
593 assertEquals(fd.getPubDate().toString(), "19590219");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000594
595 assertNull(fd.getSubTitle());
596 assertNull(fd.getAuthor());
597 assertNull(fd.getEditor());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000598 assertEquals(fd.getPubPlace(), "Berlin");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000599 assertNull(fd.getPublisher());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000600 assertEquals(fd.getTextType(), "Zeitung: Tageszeitung");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000601 assertNull(fd.getTextTypeArt());
602 assertEquals(fd.getTextTypeRef(), "Tageszeitung");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000603 assertEquals(fd.getTextDomain(), "Politik");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000604 assertEquals(fd.getCreationDate().toString(), "19590219");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000605 assertEquals(fd.getLicense(), "ACA-NC-LC");
606 assertEquals(fd.getTextColumn(), "POLITIK");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000607 assertNull(fd.getPages());
608 assertEquals(fd.getTextClass(), "politik ausland");
609 assertNull(fd.getFileEditionStatement());
610 assertNull(fd.getBiblEditionStatement());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000611
612 assertEquals(fd.getLanguage(), "de");
613 assertEquals(fd.getReference(),
614 "Neues Deutschland, [Tageszeitung], 19.02.1959, Jg. 14,"
615 + " Berliner Ausgabe, S. 7. - Sachgebiet: Politik, "
616 + "Originalressort: POLITIK; Saragat-Partei zerfällt");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000617 assertNull(fd.getPublisher());
618 assertNull(fd.getKeywords());
619
620 assertEquals(fd.getTokenSource(), "opennlp#tokens");
621
622 assertEquals(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000623 fd.getFoundries(),
624 "base base/paragraphs base/sentences corenlp "
625 + "corenlp/constituency corenlp/morpho corenlp/namedentities"
626 + " corenlp/sentences glemm glemm/morpho mate mate/morpho"
627 + " opennlp opennlp/morpho opennlp/sentences treetagger"
628 + " treetagger/morpho treetagger/sentences");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000629
Nils Diewaldbb33da22015-03-04 16:24:25 +0000630 assertEquals(fd.getLayerInfos(),
631 "base/s=spans corenlp/c=spans corenlp/ne=tokens"
632 + " corenlp/p=tokens corenlp/s=spans glemm/l=tokens"
633 + " mate/l=tokens mate/m=tokens mate/p=tokens"
634 + " opennlp/p=tokens opennlp/s=spans tt/l=tokens"
635 + " tt/p=tokens tt/s=spans");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000636
637 assertEquals(fd.getCorpusTitle(), "Bonner Zeitungskorpus");
638 assertNull(fd.getCorpusSubTitle());
639 assertNull(fd.getCorpusAuthor());
640 assertNull(fd.getCorpusEditor());
641
642 assertEquals(fd.getDocTitle(), "Neues Deutschland");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000643 assertEquals(fd.getDocSubTitle(),
644 "Organ des Zentralkomitees der Sozialistischen "
645 + "Einheitspartei Deutschlands");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000646 assertNull(fd.getDocEditor());
647 assertNull(fd.getDocAuthor());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000648
649 Krill ks = new Krill(new QueryBuilder("tokens").seg("mate/m:case:nom")
650 .with("mate/m:number:sg"));
Nils Diewald884dbcf2015-02-27 17:02:28 +0000651 Result kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000652
653 assertEquals(kr.getTotalResults(), 6);
654 assertEquals(0, kr.getStartIndex());
655 assertEquals(25, kr.getItemsPerPage());
Nils Diewald06368ba2014-11-03 20:53:27 +0000656 };
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000657
Nils Diewaldafab8f32015-01-26 19:11:32 +0000658
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000659 @Test
Nils Diewald56dc2582014-11-04 21:33:46 +0000660 public void searchJSONcosmasBoundaryBug () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000661 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000662 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000663 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000664 FieldDocument fd = ki.addDoc(1,
665 getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000666 ki.commit();
Nils Diewald56dc2582014-11-04 21:33:46 +0000667
Nils Diewaldbb33da22015-03-04 16:24:25 +0000668 String json = getString(getClass().getResource(
669 "/queries/bugs/cosmas_boundary.jsonld").getFile());
Nils Diewald56dc2582014-11-04 21:33:46 +0000670
Nils Diewald8904c1d2015-02-26 16:13:18 +0000671 QueryBuilder kq = new QueryBuilder("tokens");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000672 Krill ks = new Krill(kq.focus(1,
673 kq.contains(kq.tag("base/s:s"), kq._(1, kq.seg("s:Leben")))));
Nils Diewald56dc2582014-11-04 21:33:46 +0000674
Nils Diewald884dbcf2015-02-27 17:02:28 +0000675 Result kr = ks.apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000676 assertEquals(kr.getSerialQuery(),
677 "focus(1: spanContain(<tokens:base/s:s />, {1: tokens:s:Leben}))");
margarethaf70addb2015-04-27 13:17:18 +0200678 assertEquals(40, kr.getMatch(0).getStartPos());
679 assertEquals(41, kr.getMatch(0).getEndPos());
680
Nils Diewaldafab8f32015-01-26 19:11:32 +0000681 assertEquals(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000682 kr.getMatch(0).getSnippetBrackets(),
683 "... Initiative\" eine neue politische Gruppierung ins "
684 + "[{1:Leben}] gerufen hatten. Pressemeldungen zufolge haben sich ...");
Nils Diewald56dc2582014-11-04 21:33:46 +0000685
Nils Diewaldafab8f32015-01-26 19:11:32 +0000686 // Try with high class - don't highlight
Nils Diewaldbb33da22015-03-04 16:24:25 +0000687 ks = new Krill(kq.focus(129,
688 kq.contains(kq.tag("base/s:s"), kq._(129, kq.seg("s:Leben")))));
Nils Diewald56dc2582014-11-04 21:33:46 +0000689
Nils Diewald3aa9e692015-02-20 22:20:11 +0000690 kr = ks.apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000691 assertEquals(kr.getSerialQuery(),
692 "focus(129: spanContain(<tokens:base/s:s />, {129: tokens:s:Leben}))");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000693 assertEquals(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000694 kr.getMatch(0).getSnippetBrackets(),
695 "... Initiative\" eine neue politische Gruppierung ins "
696 + "[Leben] gerufen hatten. Pressemeldungen zufolge haben sich ...");
Nils Diewald0fa2da22014-11-05 03:31:32 +0000697
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000698 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000699 kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000700 assertEquals(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000701 kr.getSerialQuery(),
702 "focus(129: spanElementDistance({129: tokens:s:Namen}, "
703 + "{129: tokens:s:Leben}, [(base/s:s[0:1], notOrdered, notExcluded)]))");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000704 assertEquals(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000705 kr.getMatch(0).getSnippetBrackets(),
706 "... ihren Austritt erklärt und unter dem [Namen \"Einheitsbewegung "
707 + "der sozialistischen Initiative\" eine neue politische Gruppierung "
708 + "ins Leben] gerufen hatten. Pressemeldungen zufolge haben sich ...");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000709 assertEquals(kr.getTotalResults(), 1);
710 assertEquals(0, kr.getStartIndex());
Nils Diewald56dc2582014-11-04 21:33:46 +0000711 };
712
Nils Diewaldbb33da22015-03-04 16:24:25 +0000713
Nils Diewaldc7d08d92014-11-05 21:30:05 +0000714 @Test
715 public void searchJSONmultipleClassesBug () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000716 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000717 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000718 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000719 ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"),
720 true);
721 ki.addDoc(2, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"),
722 true);
Nils Diewaldc7d08d92014-11-05 21:30:05 +0000723
Nils Diewaldafab8f32015-01-26 19:11:32 +0000724 ki.commit();
Nils Diewaldc7d08d92014-11-05 21:30:05 +0000725
Nils Diewaldbb33da22015-03-04 16:24:25 +0000726 String json = getString(getClass().getResource(
727 "/queries/bugs/multiple_classes.jsonld").getFile());
728
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000729 Krill ks = new Krill(json);
Nils Diewald884dbcf2015-02-27 17:02:28 +0000730 Result kr = ks.apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000731 assertEquals(kr.getSerialQuery(),
732 "{4: spanNext({1: spanNext({2: tokens:s:ins}, "
733 + "{3: tokens:s:Leben})}, tokens:s:gerufen)}");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000734 assertEquals(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000735 kr.getMatch(0).getSnippetBrackets(),
736 "... sozialistischen Initiative\" eine neue politische"
737 + " Gruppierung [{4:{1:{2:ins} {3:Leben}} gerufen}] hatten. "
738 + "Pressemeldungen zufolge haben sich in ...");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000739 assertEquals(kr.getTotalResults(), 2);
740 assertEquals(0, kr.getStartIndex());
Nils Diewaldc7d08d92014-11-05 21:30:05 +0000741 };
742
Nils Diewaldbb33da22015-03-04 16:24:25 +0000743
Nils Diewald277e9ce2014-11-06 03:42:11 +0000744 @Test
745 public void searchJSONmultipleClassesBugTokenList () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000746 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000747 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000748 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000749 ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828.json.gz"),
750 true);
751 ki.addDoc(2, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"),
752 true);
Nils Diewaldc7d08d92014-11-05 21:30:05 +0000753
Nils Diewaldafab8f32015-01-26 19:11:32 +0000754 ki.commit();
Nils Diewald277e9ce2014-11-06 03:42:11 +0000755
Nils Diewaldbb33da22015-03-04 16:24:25 +0000756 String json = getString(getClass().getResource(
757 "/queries/bugs/multiple_classes.jsonld").getFile());
758
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000759 Krill ks = new Krill(json);
Nils Diewald884dbcf2015-02-27 17:02:28 +0000760 Result kr = ks.apply(ki);
Nils Diewald277e9ce2014-11-06 03:42:11 +0000761
Nils Diewaldafab8f32015-01-26 19:11:32 +0000762 ObjectMapper mapper = new ObjectMapper();
763 JsonNode res = mapper.readTree(kr.toTokenListJsonString());
Nils Diewald277e9ce2014-11-06 03:42:11 +0000764
Akrond504f212015-06-20 00:27:54 +0200765 assertEquals(1, res.at("/meta/totalResults").asInt());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000766 assertEquals("{4: spanNext({1: spanNext({2: tokens:s:ins}, "
767 + "{3: tokens:s:Leben})}, tokens:s:gerufen)}",
Akrond504f212015-06-20 00:27:54 +0200768 res.at("/meta/serialQuery").asText());
769 assertEquals(0, res.at("/meta/startIndex").asInt());
770 assertEquals(25, res.at("/meta/itemsPerPage").asInt());
Nils Diewald277e9ce2014-11-06 03:42:11 +0000771
Nils Diewaldafab8f32015-01-26 19:11:32 +0000772 assertEquals("BZK_D59.00089", res.at("/matches/0/textSigle").asText());
773 assertEquals(328, res.at("/matches/0/tokens/0/0").asInt());
774 assertEquals(331, res.at("/matches/0/tokens/0/1").asInt());
775 assertEquals(332, res.at("/matches/0/tokens/1/0").asInt());
776 assertEquals(337, res.at("/matches/0/tokens/1/1").asInt());
777 assertEquals(338, res.at("/matches/0/tokens/2/0").asInt());
778 assertEquals(345, res.at("/matches/0/tokens/2/1").asInt());
Nils Diewald277e9ce2014-11-06 03:42:11 +0000779 };
Nils Diewaldc7d08d92014-11-05 21:30:05 +0000780
Nils Diewaldafab8f32015-01-26 19:11:32 +0000781
Nils Diewaldb84e7272014-11-07 01:27:38 +0000782 @Test
783 public void searchJSONmultitermRewriteBug () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000784 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000785 KrillIndex ki = new KrillIndex();
Nils Diewaldb84e7272014-11-07 01:27:38 +0000786
Nils Diewaldafab8f32015-01-26 19:11:32 +0000787 assertEquals(ki.numberOf("documents"), 0);
788
789 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000790 FieldDocument fd = ki.addDoc(1,
791 getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000792 ki.commit();
793
794 assertEquals(ki.numberOf("documents"), 1);
795 assertEquals("BZK", fd.getCorpusSigle());
796
797 // [tt/p="A.*"]{0,3}[tt/p="N.*"]
Nils Diewaldbb33da22015-03-04 16:24:25 +0000798 String json = getString(getClass().getResource(
799 "/queries/bugs/multiterm_rewrite.jsonld").getFile());
800
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000801 Krill ks = new Krill(json);
Nils Diewald2d5f8102015-02-26 21:07:54 +0000802 KrillCollection kc = ks.getCollection();
Nils Diewaldc471b182014-11-19 22:51:15 +0000803
Nils Diewaldafab8f32015-01-26 19:11:32 +0000804 // No index was set
805 assertEquals(-1, kc.numberOf("documents"));
806 kc.setIndex(ki);
Nils Diewaldc471b182014-11-19 22:51:15 +0000807
Nils Diewaldafab8f32015-01-26 19:11:32 +0000808 // Index was set but vc restricted to WPD
809 assertEquals(0, kc.numberOf("documents"));
Nils Diewaldc471b182014-11-19 22:51:15 +0000810
Akron176c9b12015-07-29 19:53:40 +0200811 /*
Nils Diewaldbb33da22015-03-04 16:24:25 +0000812 kc.extend(new CollectionBuilder().or("corpusSigle", "BZK"));
Akron176c9b12015-07-29 19:53:40 +0200813 */
814 CollectionBuilder cb = new CollectionBuilder();
Akron40550172015-08-04 03:06:12 +0200815 kc.fromBuilder(cb.orGroup().with(kc.getBuilder())
816 .with(cb.term("corpusSigle", "BZK")));
Akron176c9b12015-07-29 19:53:40 +0200817
Nils Diewaldafab8f32015-01-26 19:11:32 +0000818 ks.setCollection(kc);
819 assertEquals(1, kc.numberOf("documents"));
Nils Diewald1220e3e2014-11-08 03:18:58 +0000820
Nils Diewald884dbcf2015-02-27 17:02:28 +0000821 Result kr = ks.apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000822
823 assertEquals(kr.getSerialQuery(),
824 "spanOr([SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/), "
825 + "spanNext(spanRepetition(SpanMultiTermQueryWrapper"
826 + "(tokens:/tt/p:A.*/){1,3}), "
827 + "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])");
Nils Diewaldb84e7272014-11-07 01:27:38 +0000828
Nils Diewaldafab8f32015-01-26 19:11:32 +0000829 assertEquals(kr.getTotalResults(), 58);
830 assertEquals(0, kr.getStartIndex());
Nils Diewald5871e4d2014-11-07 03:48:25 +0000831
Nils Diewaldbb33da22015-03-04 16:24:25 +0000832 assertEquals(kr.getMatch(0).getSnippetBrackets(),
833 "[Saragat-Partei] zerfällt Rom (ADN) die von dem");
834 assertEquals(kr.getMatch(1).getSnippetBrackets(),
835 "[Saragat-Partei] zerfällt Rom (ADN) die von dem");
836 assertEquals(kr.getMatch(2).getSnippetBrackets(),
837 "Saragat-Partei zerfällt [Rom] (ADN) "
838 + "die von dem Rechtssozialisten Saragat");
839 assertEquals(kr.getMatch(3).getSnippetBrackets(),
840 "Saragat-Partei zerfällt Rom ([ADN]) "
841 + "die von dem Rechtssozialisten Saragat geführte");
Nils Diewald5871e4d2014-11-07 03:48:25 +0000842
Nils Diewaldbb33da22015-03-04 16:24:25 +0000843 assertEquals(kr.getMatch(23).getSnippetBrackets(),
844 "dem Namen \"Einheitsbewegung der sozialistischen "
845 + "Initiative\" [eine neue politische Gruppierung] "
846 + "ins Leben gerufen hatten. Pressemeldungen zufolge");
Nils Diewaldb84e7272014-11-07 01:27:38 +0000847 };
848
849
Nils Diewald56dc2582014-11-04 21:33:46 +0000850 @Test
Akrone4fdce42015-11-13 16:06:10 +0100851 public void searchJSONtokenDistanceSpanBug () throws IOException {
852 // Construct index
853 KrillIndex ki = new KrillIndex();
854 ki.addDoc(1, getClass().getResourceAsStream("/goe/AGX-00002.json"),
855 false);
856 ki.addDoc(2, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"),
857 true);
858 ki.commit();
859
860 // ({1:Sonne []* Erde} | {2: Erde []* Sonne})
861 String json = getString(getClass().getResource(
862 "/queries/bugs/tokendistancespan_bug.jsonld").getFile()
863 );
864
865 Krill ks = new Krill(json);
866 Result kr = ks.apply(ki);
867 ObjectMapper mapper = new ObjectMapper();
868 JsonNode res = mapper.readTree(kr.toJsonString());
869 assertTrue(res.at("/errors").isMissingNode());
870 };
871
872
873 @Test
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000874 public void searchJSONCollection () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000875 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000876 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000877 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000878 for (String i : new String[] { "00001", "00002", "00003", "00004",
879 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +0000880 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000881 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
882 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000883 };
884 ki.commit();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000885 String json = getString(getClass().getResource(
886 "/queries/metaquery8-nocollection.jsonld").getFile());
887
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000888 Krill ks = new Krill(json);
Nils Diewald884dbcf2015-02-27 17:02:28 +0000889 Result kr = ks.apply(ki);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000890 assertEquals(kr.getTotalResults(), 276);
891 assertEquals(0, kr.getStartIndex());
892 assertEquals(10, kr.getItemsPerPage());
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000893
Nils Diewaldbb33da22015-03-04 16:24:25 +0000894 json = getString(getClass().getResource("/queries/metaquery8.jsonld")
895 .getFile());
896
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000897 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000898 kr = ks.apply(ki);
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000899
Nils Diewaldafab8f32015-01-26 19:11:32 +0000900 assertEquals(kr.getTotalResults(), 147);
901 assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
902 assertEquals(0, kr.getStartIndex());
903 assertEquals(10, kr.getItemsPerPage());
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000904
Nils Diewaldbb33da22015-03-04 16:24:25 +0000905 json = getString(getClass().getResource(
906 "/queries/metaquery8-filtered.jsonld").getFile());
907
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000908 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000909 kr = ks.apply(ki);
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000910
Nils Diewaldafab8f32015-01-26 19:11:32 +0000911 assertEquals(kr.getTotalResults(), 28);
912 assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
913 assertEquals(0, kr.getStartIndex());
914 assertEquals(10, kr.getItemsPerPage());
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000915
Nils Diewaldbb33da22015-03-04 16:24:25 +0000916 json = getString(getClass().getResource(
917 "/queries/metaquery8-filtered-further.jsonld").getFile());
918
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000919 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000920 kr = ks.apply(ki);
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000921
Nils Diewaldafab8f32015-01-26 19:11:32 +0000922 assertEquals(kr.getTotalResults(), 0);
923 assertEquals(0, kr.getStartIndex());
924 assertEquals(10, kr.getItemsPerPage());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000925
Akron176c9b12015-07-29 19:53:40 +0200926
Nils Diewaldbb33da22015-03-04 16:24:25 +0000927 json = getString(getClass().getResource(
928 "/queries/metaquery8-filtered-nested.jsonld").getFile());
929
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000930 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000931 kr = ks.apply(ki);
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000932
Akron176c9b12015-07-29 19:53:40 +0200933 /*
Nils Diewaldbb33da22015-03-04 16:24:25 +0000934 assertEquals("filter with QueryWrapperFilter("
935 + "+(ID:WPD_AAA.00003 (+tokens:s:die"
936 + " +tokens:s:Schriftzeichen)))",
937 ks.getCollection().getFilter(1).toString());
Akron176c9b12015-07-29 19:53:40 +0200938 */
Akron40550172015-08-04 03:06:12 +0200939 assertEquals(
940 "AndGroup(OrGroup(ID:WPD_AAA.00001 ID:WPD_AAA.00002) OrGroup(ID:WPD_AAA.00003 AndGroup(tokens:s:die tokens:s:Schriftzeichen)))",
941 ks.getCollection().toString());
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000942
Nils Diewaldafab8f32015-01-26 19:11:32 +0000943 assertEquals(kr.getTotalResults(), 119);
944 assertEquals(0, kr.getStartIndex());
945 assertEquals(10, kr.getItemsPerPage());
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000946 };
947
Nils Diewald1e5d5942014-05-20 13:29:53 +0000948
949 @Test
950 public void searchJSONSentenceContext () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000951 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000952 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000953 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000954 for (String i : new String[] { "00001", "00002", "00003", "00004",
955 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +0000956 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +0000957 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
958 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +0000959 };
960 ki.commit();
Nils Diewald1e5d5942014-05-20 13:29:53 +0000961
Nils Diewaldbb33da22015-03-04 16:24:25 +0000962 String json = getString(getClass().getResource(
963 "/queries/bsp-context-2.jsonld").getFile());
964
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000965 Krill ks = new Krill(json);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000966 ks.getMeta().setCutOff(false);
967 SearchContext sc = ks.getMeta().getContext();
Nils Diewaldafab8f32015-01-26 19:11:32 +0000968 sc.left.setLength((short) 10);
969 sc.right.setLength((short) 10);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000970
Nils Diewald884dbcf2015-02-27 17:02:28 +0000971 Result kr = ks.apply(ki);
Akronfd05f502015-07-30 18:34:26 +0200972
Nils Diewaldbb33da22015-03-04 16:24:25 +0000973 assertEquals(kr.getMatch(1).getSnippetBrackets(),
974 "... dezimalen [Wert] 65 sowohl ...");
Nils Diewaldafab8f32015-01-26 19:11:32 +0000975 assertEquals(kr.getTotalResults(), 3);
976 assertEquals(0, kr.getStartIndex());
977 assertEquals(25, kr.getItemsPerPage());
978 assertFalse(kr.getContext().toJsonNode().toString().equals("\"s\""));
Nils Diewald1e5d5942014-05-20 13:29:53 +0000979
Nils Diewaldbb33da22015-03-04 16:24:25 +0000980 json = getString(getClass().getResource(
981 "/queries/bsp-context-sentence.jsonld").getFile());
Nils Diewald1e5d5942014-05-20 13:29:53 +0000982
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000983 kr = new Krill(json).apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000984 assertEquals(kr.getMatch(0).getSnippetBrackets(),
985 "steht a für den dezimalen [Wert] 97 sowohl im ASCII-"
986 + " als auch im Unicode-Zeichensatz");
987 assertEquals(kr.getMatch(1).getSnippetBrackets(),
988 "steht A für den dezimalen [Wert] 65 sowohl im ASCII-"
989 + " als auch im Unicode-Zeichensatz");
990 assertEquals(kr.getMatch(2).getSnippetBrackets(),
991 "In einem Zahlensystem mit einer Basis größer "
992 + "als 10 steht A oder a häufig für den dezimalen"
993 + " [Wert] 10, siehe auch Hexadezimalsystem.");
Nils Diewald1e5d5942014-05-20 13:29:53 +0000994
Nils Diewaldafab8f32015-01-26 19:11:32 +0000995 assertEquals(kr.getContext().toJsonNode().toString(), "\"s\"");
Nils Diewald1e5d5942014-05-20 13:29:53 +0000996 };
997
998
Nils Diewald2276e1c2014-04-10 15:01:59 +0000999 @Test
Nils Diewald54187632014-06-11 14:39:29 +00001000 public void searchJSONbug () throws IOException {
Nils Diewaldafab8f32015-01-26 19:11:32 +00001001 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +00001002 KrillIndex ki = new KrillIndex();
Nils Diewaldafab8f32015-01-26 19:11:32 +00001003 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +00001004 for (String i : new String[] { "00001", "00002", "00003", "00004",
1005 "00005", "00006", "02439" }) {
Nils Diewald50333552015-03-02 15:54:46 +00001006 ki.addDoc(
Nils Diewaldbb33da22015-03-04 16:24:25 +00001007 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
1008 true);
Nils Diewaldafab8f32015-01-26 19:11:32 +00001009 };
1010 ki.commit();
Nils Diewald54187632014-06-11 14:39:29 +00001011
Nils Diewaldbb33da22015-03-04 16:24:25 +00001012 String json = getString(getClass().getResource(
1013 "/queries/bsp-bug.jsonld").getFile());
Nils Diewald54187632014-06-11 14:39:29 +00001014
Nils Diewald884dbcf2015-02-27 17:02:28 +00001015 Result kr = new Krill(json).apply(ki);
Nils Diewaldc471b182014-11-19 22:51:15 +00001016
Nils Diewaldbb33da22015-03-04 16:24:25 +00001017 assertEquals(kr.getError(0).getMessage(),
1018 "Operation needs operand list");
Nils Diewald54187632014-06-11 14:39:29 +00001019 };
1020
Nils Diewaldafab8f32015-01-26 19:11:32 +00001021
Nils Diewaldef7124e2014-11-12 20:08:13 +00001022 /**
1023 * This is a breaking test for #179
1024 */
1025 @Test
1026 public void searchJSONexpansionBug () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +00001027 // Construct index
1028 KrillIndex ki = new KrillIndex();
1029 // Indexing test files
1030 ki.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"), true);
1031 ki.commit();
1032
1033 // Expansion bug
1034 // der alte Digraph Aa durch Å
1035 String json = getString(getClass().getResource(
1036 "/queries/bugs/expansion_bug_2.jsonld").getFile());
1037
1038 Result kr = new Krill(json).apply(ki);
1039 assertEquals("... Buchstabe des Alphabetes. In Dänemark ist "
1040 + "[der alte Digraph Aa durch Å] ersetzt worden, "
1041 + "in Eigennamen und Ortsnamen ...", kr.getMatch(0)
1042 .getSnippetBrackets());
1043 assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
1044 assertEquals(kr.getTotalResults(), 1);
1045
1046 // der alte Digraph Aa durch []
1047 // Works with one document
1048 json = getString(getClass().getResource(
1049 "/queries/bugs/expansion_bug.jsonld").getFile());
1050
1051 kr = new Krill(json).apply(ki);
1052
1053 assertEquals("... Buchstabe des Alphabetes. In Dänemark ist "
1054 + "[der alte Digraph Aa durch Å] ersetzt worden, "
1055 + "in Eigennamen und Ortsnamen ...", kr.getMatch(0)
1056 .getSnippetBrackets());
1057 assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
1058 assertEquals(kr.getTotalResults(), 1);
1059
1060 // Now try with one file ahead
1061 ki = new KrillIndex();
1062 for (String i : new String[] { "00001", "00002" }) {
1063 ki.addDoc(
1064 getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
1065 true);
1066 };
1067 ki.commit();
1068
1069 // Expansion bug
1070 // der alte Digraph Aa durch Å
1071 json = getString(getClass().getResource(
1072 "/queries/bugs/expansion_bug_2.jsonld").getFile());
1073
1074 kr = new Krill(json).apply(ki);
1075
1076 assertEquals("... Buchstabe des Alphabetes. In Dänemark ist "
1077 + "[der alte Digraph Aa durch Å] ersetzt worden, "
1078 + "in Eigennamen und Ortsnamen ...", kr.getMatch(0)
1079 .getSnippetBrackets());
1080 assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
1081 assertEquals(kr.getTotalResults(), 1);
1082
1083 // der alte Digraph Aa durch []
1084 json = getString(getClass().getResource(
1085 "/queries/bugs/expansion_bug.jsonld").getFile());
1086
1087 kr = new Krill(json).apply(ki);
1088 assertEquals("... Buchstabe des Alphabetes. In Dänemark ist "
1089 + "[der alte Digraph Aa durch Å] ersetzt worden, "
1090 + "in Eigennamen und Ortsnamen ...", kr.getMatch(0)
1091 .getSnippetBrackets());
1092 assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
1093 assertEquals(kr.getTotalResults(), 1);
Nils Diewaldef7124e2014-11-12 20:08:13 +00001094 };
Nils Diewaldc925b492013-12-03 23:56:10 +00001095};