blob: 8d85d78dea91a6e78efbae1959c5dd6ad2e94074 [file] [log] [blame]
Nils Diewalde3645702014-11-07 21:15:20 +00001package de.ids_mannheim.korap.search;
2
3import java.util.*;
4import java.io.*;
5
6import static de.ids_mannheim.korap.TestSimple.*;
7
Nils Diewaldbbd39a52015-02-23 19:56:57 +00008import de.ids_mannheim.korap.Krill;
Nils Diewald2d5f8102015-02-26 21:07:54 +00009import de.ids_mannheim.korap.KrillCollection;
Nils Diewald0339d462015-02-26 14:53:56 +000010import de.ids_mannheim.korap.KrillQuery;
Nils Diewalda14ecd62015-02-26 21:00:20 +000011import de.ids_mannheim.korap.KrillIndex;
Nils Diewalde3645702014-11-07 21:15:20 +000012import de.ids_mannheim.korap.index.FieldDocument;
Nils Diewald884dbcf2015-02-27 17:02:28 +000013import de.ids_mannheim.korap.response.Result;
Nils Diewalde3645702014-11-07 21:15:20 +000014import java.nio.file.Files;
15import java.nio.file.FileSystem;
16import java.nio.file.Path;
17import java.nio.charset.StandardCharsets;
18import java.nio.ByteBuffer;
19
Akron932dd592021-07-27 12:52:46 +020020import org.apache.commons.lang3.StringUtils;
Akron484c3c12015-07-07 20:25:44 +020021import org.apache.lucene.analysis.Analyzer;
22import org.apache.lucene.analysis.TokenStream;
23import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
24
Nils Diewalde3645702014-11-07 21:15:20 +000025import com.fasterxml.jackson.databind.ObjectMapper;
26import com.fasterxml.jackson.databind.JsonNode;
27
28import static org.junit.Assert.*;
29import org.junit.Test;
30import org.junit.Ignore;
31import org.junit.runner.RunWith;
32import org.junit.runners.JUnit4;
33
34@RunWith(JUnit4.class)
35public class TestMetaFields {
Akron640458c2015-06-25 12:36:15 +020036
Nils Diewalde3645702014-11-07 21:15:20 +000037 @Test
38 public void searchMetaFields () throws IOException {
39
Nils Diewald3aa9e692015-02-20 22:20:11 +000040 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +000041 KrillIndex ki = new KrillIndex();
Nils Diewald3aa9e692015-02-20 22:20:11 +000042 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +000043 for (String i : new String[] { "00001", "00002" }) {
Eliza Margaretha6f989202016-10-14 21:48:29 +020044 ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
Nils Diewaldbb33da22015-03-04 16:24:25 +000045 true);
Nils Diewald3aa9e692015-02-20 22:20:11 +000046 };
47 ki.commit();
Nils Diewalde3645702014-11-07 21:15:20 +000048
Eliza Margaretha6f989202016-10-14 21:48:29 +020049 String jsonString = getJsonString(getClass()
50 .getResource("/queries/metas/fields.jsonld").getFile());
Nils Diewaldbb33da22015-03-04 16:24:25 +000051
Nils Diewaldbbd39a52015-02-23 19:56:57 +000052 Krill ks = new Krill(jsonString);
Nils Diewalde3645702014-11-07 21:15:20 +000053
Nils Diewald884dbcf2015-02-27 17:02:28 +000054 Result kr = ks.apply(ki);
Nils Diewald3aa9e692015-02-20 22:20:11 +000055 assertEquals((long) 17, kr.getTotalResults());
56 assertEquals(0, kr.getStartIndex());
57 assertEquals(9, kr.getItemsPerPage());
Nils Diewaldbb33da22015-03-04 16:24:25 +000058
Nils Diewald3aa9e692015-02-20 22:20:11 +000059 ObjectMapper mapper = new ObjectMapper();
60 JsonNode res = mapper.readTree(kr.toJsonString());
Akronb1166442015-06-27 00:34:19 +020061
Akronb1166442015-06-27 00:34:19 +020062 // mirror fields
63 assertEquals(9, res.at("/meta/count").asInt());
64
65 if (res.at("/meta/fields/0").asText().equals("UID")) {
66 assertEquals("corpusID", res.at("/meta/fields/1").asText());
67 }
68 else {
69 assertEquals("corpusID", res.at("/meta/fields/0").asText());
70 assertEquals("UID", res.at("/meta/fields/1").asText());
71 };
72
Nils Diewald3aa9e692015-02-20 22:20:11 +000073 assertEquals(0, res.at("/matches/0/UID").asInt());
74 assertEquals("WPD", res.at("/matches/0/corpusID").asText());
Akron12f1f5b2015-06-24 15:56:52 +020075 assertTrue(res.at("/matches/0/docID").isMissingNode());
76 assertTrue(res.at("/matches/0/textSigle").isMissingNode());
77 assertTrue(res.at("/matches/0/ID").isMissingNode());
78 assertTrue(res.at("/matches/0/author").isMissingNode());
79 assertTrue(res.at("/matches/0/title").isMissingNode());
80 assertTrue(res.at("/matches/0/subTitle").isMissingNode());
81 assertTrue(res.at("/matches/0/textClass").isMissingNode());
82 assertTrue(res.at("/matches/0/pubPlace").isMissingNode());
83 assertTrue(res.at("/matches/0/pubDate").isMissingNode());
84 assertTrue(res.at("/matches/0/foundries").isMissingNode());
85 assertTrue(res.at("/matches/0/layerInfos").isMissingNode());
86 assertTrue(res.at("/matches/0/tokenization").isMissingNode());
Nils Diewalde3645702014-11-07 21:15:20 +000087
Eliza Margaretha6f989202016-10-14 21:48:29 +020088 jsonString = getJsonString(getClass()
89 .getResource("/queries/metas/fields_2.jsonld").getFile());
Nils Diewaldbbd39a52015-02-23 19:56:57 +000090 ks = new Krill(jsonString);
Nils Diewald3aa9e692015-02-20 22:20:11 +000091 kr = ks.apply(ki);
92 assertEquals((long) 17, kr.getTotalResults());
93 assertEquals(0, kr.getStartIndex());
94 assertEquals(2, kr.getItemsPerPage());
Nils Diewaldbb33da22015-03-04 16:24:25 +000095
Nils Diewald3aa9e692015-02-20 22:20:11 +000096 mapper = new ObjectMapper();
97 res = mapper.readTree(kr.toJsonString());
98 assertEquals(0, res.at("/matches/0/UID").asInt());
Akron12f1f5b2015-06-24 15:56:52 +020099 assertTrue(res.at("/matches/0/corpusID").isMissingNode());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200100 assertEquals("Ruru,Jens.Ol,Aglarech",
101 res.at("/matches/0/author").asText());
Nils Diewald3aa9e692015-02-20 22:20:11 +0000102 assertEquals("A", res.at("/matches/0/title").asText());
103 assertEquals("WPD_AAA.00001", res.at("/matches/0/docID").asText());
Akron3e0403f2015-06-24 20:59:13 +0200104 assertTrue(res.at("/matches/0/textSigle").isMissingNode());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200105 assertEquals("match-WPD_AAA.00001-p6-7",
106 res.at("/matches/0/matchID").asText());
Akron48937e92015-06-26 01:49:02 +0200107 // assertEquals("p6-7", res.at("/matches/0/matchID").asText());
Akron32b95192019-01-11 13:58:55 +0100108 assertTrue(res.at("/matches/0/subTitle").isMissingNode());
Nils Diewald3aa9e692015-02-20 22:20:11 +0000109 assertEquals("", res.at("/matches/0/subTitle").asText());
110 assertEquals("", res.at("/matches/0/textClass").asText());
111 assertEquals("", res.at("/matches/0/pubPlace").asText());
112 assertEquals("", res.at("/matches/0/pubDate").asText());
113 assertEquals("", res.at("/matches/0/foundries").asText());
114 assertEquals("", res.at("/matches/0/layerInfo").asText());
115 assertEquals("", res.at("/matches/0/tokenization").asText());
Nils Diewalde3645702014-11-07 21:15:20 +0000116 };
Akron3e0403f2015-06-24 20:59:13 +0200117
Akron640458c2015-06-25 12:36:15 +0200118
Akron3e0403f2015-06-24 20:59:13 +0200119 @Test
120 public void searchMetaFieldsNew () throws IOException {
121
122 // Construct index
123 KrillIndex ki = new KrillIndex();
Akron640458c2015-06-25 12:36:15 +0200124 ki.addDoc(getClass().getResourceAsStream("/goe/AGX-00002.json"), false);
Akron3e0403f2015-06-24 20:59:13 +0200125 ki.commit();
126
Eliza Margaretha6f989202016-10-14 21:48:29 +0200127 String jsonString = getJsonString(getClass()
128 .getResource("/queries/metas/fields_no.jsonld").getFile());
Akron3e0403f2015-06-24 20:59:13 +0200129
130 Krill ks = new Krill(jsonString);
131 Result kr = ks.apply(ki);
132 ObjectMapper mapper = new ObjectMapper();
133 JsonNode res = mapper.readTree(kr.toJsonString());
Akronbe9638d2019-02-07 17:09:42 +0100134
Akron3e0403f2015-06-24 20:59:13 +0200135 assertEquals(0, res.at("/matches/0/UID").asInt());
136 assertEquals("GOE_AGX.00002", res.at("/matches/0/textSigle").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200137 assertEquals("Maximen und Reflexionen",
138 res.at("/matches/0/title").asText());
Akron3e0403f2015-06-24 20:59:13 +0200139 assertEquals("1982", res.at("/matches/0/pubDate").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200140 assertEquals("Goethe, Johann Wolfgang von",
141 res.at("/matches/0/author").asText());
Akron3e0403f2015-06-24 20:59:13 +0200142 assertEquals("GOE_AGX", res.at("/matches/0/docSigle").asText());
143 assertEquals("GOE", res.at("/matches/0/corpusSigle").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200144 assertEquals("Religion und Christentum",
145 res.at("/matches/0/subTitle").asText());
Akron3e0403f2015-06-24 20:59:13 +0200146 assertEquals("München", res.at("/matches/0/pubPlace").asText());
Akron640458c2015-06-25 12:36:15 +0200147 assertEquals(
148 "base/s=spans cnx/c=spans cnx/l=tokens cnx/m=tokens cnx/p=tokens cnx/s=spans cnx/syn=tokens corenlp/c=spans corenlp/ne=tokens corenlp/p=tokens corenlp/s=spans glemm/l=tokens mate/l=tokens mate/m=tokens mate/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens tt/s=spans xip/c=spans xip/l=tokens xip/p=tokens xip/s=spans",
149 res.at("/matches/0/layerInfos").asText());
Akron3e0403f2015-06-24 20:59:13 +0200150 assertTrue(res.at("/matches/0/textType").isMissingNode());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200151 assertEquals("match-GOE_AGX.00002-p7-8",
152 res.at("/matches/0/matchID").asText());
Akron48937e92015-06-26 01:49:02 +0200153
Akronefbb7542025-06-18 10:36:24 +0200154 assertFalse(res.at("/meta/rewrites").isMissingNode());
155 assertEquals("Kustvakt", res.at("/meta/rewrites/0/src").asText());
156
157
Akron3e0403f2015-06-24 20:59:13 +0200158 // All fields
Eliza Margaretha6f989202016-10-14 21:48:29 +0200159 jsonString = getJsonString(getClass()
160 .getResource("/queries/metas/fields_all.jsonld").getFile());
Akron3e0403f2015-06-24 20:59:13 +0200161
162 ks = new Krill(jsonString);
163 kr = ks.apply(ki);
164 mapper = new ObjectMapper();
165 res = mapper.readTree(kr.toJsonString());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200166 assertEquals("Verlag C. H. Beck",
167 res.at("/matches/0/publisher").asText());
Akron3e0403f2015-06-24 20:59:13 +0200168 assertEquals("Aphorismus", res.at("/matches/0/textType").asText());
169 assertEquals("Aphorismen", res.at("/matches/0/textTypeRef").asText());
Akron640458c2015-06-25 12:36:15 +0200170 assertEquals(
171 "Goethe, Johann Wolfgang von: Maximen und Reflexionen. Religion und Christentum, [Aphorismen], (Erstveröffentlichung: Stuttgart ; Tübingen, 1827-1842), In: Goethe, Johann Wolfgang von: Goethes Werke, Bd. 12, Schriften zur Kunst. Schriften zur Literatur. Maximen und Reflexionen, Hrsg.: Trunz, Erich. München: Verlag C. H. Beck, 1982, S. 372-377",
172 res.at("/matches/0/reference").asText());
Akron3e0403f2015-06-24 20:59:13 +0200173 assertEquals("de", res.at("/matches/0/language").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200174 assertEquals("opennlp#tokens",
175 res.at("/matches/0/tokenSource").asText());
Akron640458c2015-06-25 12:36:15 +0200176 assertEquals(
177 "base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences",
178 res.at("/matches/0/foundries").asText());
Akron2b921a62019-01-14 18:52:45 +0100179
Eliza Margaretha6f989202016-10-14 21:48:29 +0200180 assertEquals("Goethe-Korpus",
181 res.at("/matches/0/corpusTitle").asText());
Akron7e3a10f2017-05-05 15:36:20 +0200182 assertEquals("QAO-NC", res.at("/matches/0/availability").asText());
Akron640458c2015-06-25 12:36:15 +0200183 assertEquals("Goethe: Maximen und Reflexionen, (1827-1842)",
184 res.at("/matches/0/docTitle").asText());
Akron3e0403f2015-06-24 20:59:13 +0200185 assertEquals("1827", res.at("/matches/0/creationDate").asText());
Akron69b958c2017-02-15 22:49:45 +0100186 // assertEquals("372-377", res.at("/matches/0/pages").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200187 assertEquals("match-GOE_AGX.00002-p7-8",
188 res.at("/matches/0/matchID").asText());
Akronefbb7542025-06-18 10:36:24 +0200189 assertTrue(res.at("/meta/rewrites").isMissingNode());
Akron6590c322015-07-02 16:08:13 +0200190
191
192 // @All fields
Eliza Margaretha6f989202016-10-14 21:48:29 +0200193 jsonString = getJsonString(getClass()
194 .getResource("/queries/metas/fields_at_all.jsonld").getFile());
Akron6590c322015-07-02 16:08:13 +0200195
196 ks = new Krill(jsonString);
197 kr = ks.apply(ki);
198 mapper = new ObjectMapper();
199 res = mapper.readTree(kr.toJsonString());
200
Eliza Margaretha6f989202016-10-14 21:48:29 +0200201 assertEquals("Verlag C. H. Beck",
202 res.at("/matches/0/publisher").asText());
Akron6590c322015-07-02 16:08:13 +0200203 assertEquals("Aphorismus", res.at("/matches/0/textType").asText());
204 assertEquals("Aphorismen", res.at("/matches/0/textTypeRef").asText());
205 assertEquals(
206 "Goethe, Johann Wolfgang von: Maximen und Reflexionen. Religion und Christentum, [Aphorismen], (Erstveröffentlichung: Stuttgart ; Tübingen, 1827-1842), In: Goethe, Johann Wolfgang von: Goethes Werke, Bd. 12, Schriften zur Kunst. Schriften zur Literatur. Maximen und Reflexionen, Hrsg.: Trunz, Erich. München: Verlag C. H. Beck, 1982, S. 372-377",
207 res.at("/matches/0/reference").asText());
208 assertEquals("de", res.at("/matches/0/language").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200209 assertEquals("opennlp#tokens",
210 res.at("/matches/0/tokenSource").asText());
Akron6590c322015-07-02 16:08:13 +0200211 assertEquals(
212 "base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences",
213 res.at("/matches/0/foundries").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200214 assertEquals("Goethe-Korpus",
215 res.at("/matches/0/corpusTitle").asText());
Akrone64cc162019-01-08 18:40:37 +0100216 assertEquals("QAO-NC", res.at("/matches/0/availability").asText());
Akron6590c322015-07-02 16:08:13 +0200217 assertEquals("Goethe: Maximen und Reflexionen, (1827-1842)",
218 res.at("/matches/0/docTitle").asText());
219 assertEquals("1827", res.at("/matches/0/creationDate").asText());
Akroneb8c02b2024-06-26 14:34:45 +0200220 assertTrue(res.at("/matches/0/pages").isMissingNode());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200221 assertEquals("match-GOE_AGX.00002-p7-8",
222 res.at("/matches/0/matchID").asText());
Akron379fed02024-06-24 10:02:29 +0200223
224
225 // Missing field
226 jsonString = getJsonString(getClass()
227 .getResource("/queries/metas/fields_missing.jsonld").getFile());
228
229 ks = new Krill(jsonString);
230 kr = ks.apply(ki);
231 mapper = new ObjectMapper();
232 res = mapper.readTree(kr.toJsonString());
233
234 assertTrue(res.at("/matches/0/publisher").isMissingNode());
235 assertEquals("Goethe-Korpus", res.at("/matches/0/corpusTitle").asText());
236 assertTrue(res.at("/matches/0/textType").isMissingNode());
237 assertEquals("", res.at("/matches/0/UID").asText());
238 assertTrue(res.at("/matches/0/namespace.new").isMissingNode());
Akron3e0403f2015-06-24 20:59:13 +0200239 };
Akron48937e92015-06-26 01:49:02 +0200240
Akrond34403f2024-06-24 11:44:12 +0200241 @Test
242 public void searchMetaFieldsWithPeriods () throws IOException {
243
244 // Construct index
245 KrillIndex ki = new KrillIndex();
246 FieldDocument fd = ki.addDoc(getClass().getResourceAsStream("/others/KED-KLX-03212.json.gz"), true);
247
248 ki.commit();
249
250 String jsonString = getJsonString(getClass()
251 .getResource("/queries/metas/fields_with_periods.jsonld").getFile());
252
253 Krill ks = new Krill(jsonString);
254 Result kr = ks.apply(ki);
255 ObjectMapper mapper = new ObjectMapper();
256 JsonNode res = mapper.readTree(kr.toJsonString());
257
258 String sv = fd.doc.getField("textSigle").stringValue();
259 assertEquals("KED/KLX/03212", sv);
260
261 sv = fd.doc.getField("KED.corpusRcpntLabel").stringValue();
262 assertEquals("data:,Kinder", sv);
263
264 assertEquals(1, res.at("/meta/totalResults").asInt());
265
266 assertEquals(0, res.at("/matches/0/UID").asInt());
267 assertEquals("KED/KLX/03212", res.at("/matches/0/textSigle").asText());
268 assertTrue(res.at("/matches/0/title").isMissingNode());
Akron74563e12024-06-24 18:00:57 +0200269 assertEquals("data:,Kinder", res.at("/matches/0/KED.corpusRcpntLabel").asText());
Akrond34403f2024-06-24 11:44:12 +0200270 assertFalse(res.at("/matches/0/fields").isMissingNode());
271
272 Iterator fieldIter = res.at("/matches/0/fields").elements();
273
274 int checkC = 0;
275 int checkF = 0;
276 while (fieldIter.hasNext()) {
277 JsonNode field = (JsonNode) fieldIter.next();
278
279 String key = field.at("/key").asText();
280
281 switch (key) {
282 case "KED.corpusRcpntLabel":
283 assertEquals("type:attachement", field.at("/type").asText());
284 assertEquals("koral:field", field.at("/@type").asText());
285 assertEquals("data:,Kinder", field.at("/value").asText());
286 checkC++;
287 break;
288 case "UID":
289 checkF++;
290 break;
291 case "textSigle":
292 assertEquals("type:string", field.at("/type").asText());
293 assertEquals("koral:field", field.at("/@type").asText());
294 assertEquals("KED/KLX/03212", field.at("/value").asText());
295 checkC++;
296 break;
297 default:
298 checkF++;
299 }
300 };
301
302 assertEquals(2, checkC);
303 assertEquals(0, checkF);
304 };
305
Akron40550172015-08-04 03:06:12 +0200306
Akron484c3c12015-07-07 20:25:44 +0200307 @Test
Akronbe9638d2019-02-07 17:09:42 +0100308 public void searchMetaFieldsDuplicateKeys () throws IOException {
309
310 // Construct index
311 KrillIndex ki = new KrillIndex();
312 ki.addDoc(getClass().getResourceAsStream("/goe/AGX-00002.json"), false);
313 ki.commit();
314
315 String jsonString = getJsonString(getClass()
316 .getResource("/queries/metas/fields_single.jsonld").getFile());
317
318 Krill ks = new Krill(jsonString);
319 ks.getMeta().setLimit(1);
320 Result kr = ks.apply(ki);
321
322 String resultJson = kr.toJsonString();
323
324 assertTrue(resultJson.indexOf("\"textSigle\":\"GOE_AGX.00002\"") > 0);
325 assertTrue(resultJson.indexOf("\"docSigle\":\"GOE_AGX\"") > 0);
326 assertTrue(resultJson.indexOf("\"corpusSigle\":\"GOE\"") > 0);
Akrond475d992021-11-23 18:39:47 +0100327 // assertTrue(resultJson.indexOf("\"UID\":") > 0);
Akronbe9638d2019-02-07 17:09:42 +0100328 assertTrue(resultJson.indexOf("\"availability\":") > 0);
329
330 assertEquals(
331 resultJson.indexOf("\"textSigle\":\"GOE_AGX.00002\""),
332 resultJson.lastIndexOf("\"textSigle\":\"GOE_AGX.00002\"")
333 );
334 assertEquals(
335 resultJson.indexOf("\"docSigle\":\"GOE_AGX\""),
336 resultJson.lastIndexOf("\"docSigle\":\"GOE_AGX\"")
337 );
338 assertEquals(
339 resultJson.indexOf("\"corpusSigle\":\"GOE\""),
340 resultJson.lastIndexOf("\"corpusSigle\":\"GOE\"")
341 );
342 assertEquals(
343 resultJson.indexOf("\"UID\":0"),
344 resultJson.lastIndexOf("\"UID\":0")
345 );
346 assertEquals(
347 resultJson.indexOf("\"availability\":"),
348 resultJson.lastIndexOf("\"availability\":")
349 );
350 };
351
352 @Test
Akron484c3c12015-07-07 20:25:44 +0200353 public void searchCollectionFields () throws IOException {
354 KrillIndex ki = new KrillIndex();
355 FieldDocument fd = new FieldDocument();
356 fd.addString("corpusSigle", "ABC");
357 fd.addString("docSigle", "ABC-123");
358 fd.addString("textSigle", "ABC-123-0001");
359 fd.addText("title", "Die Wahlverwandschaften");
360 fd.addText("author", "Johann Wolfgang von Goethe");
Akrona6dabb72019-01-09 13:09:41 +0100361 fd.addKeywords("textClass", "reisen wissenschaft");
Akron484c3c12015-07-07 20:25:44 +0200362 fd.addInt("pubDate", 20130617);
363 fd.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
364 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
365 ki.addDoc(fd);
366
367 FieldDocument fd2 = new FieldDocument();
368 fd2.addString("corpusSigle", "ABC");
369 fd2.addString("docSigle", "ABC-125");
370 fd2.addString("textSigle", "ABC-125-0001");
371 fd2.addText("title", "Die Glocke");
372 fd2.addText("author", "Schiller, Friedrich");
Akrona6dabb72019-01-09 13:09:41 +0100373 fd2.addKeywords("textClass", "Reisen geschichte");
Akron484c3c12015-07-07 20:25:44 +0200374 fd2.addInt("pubDate", 20130203);
375 fd2.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
376 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
377 ki.addDoc(fd2);
378 ki.commit();
379
380 // textClass = reisen & wissenschaft
Eliza Margaretha6f989202016-10-14 21:48:29 +0200381 String jsonString = getJsonString(getClass()
382 .getResource("/queries/collections/collection_textClass.jsonld")
383 .getFile());
Akron484c3c12015-07-07 20:25:44 +0200384 Krill ks = new Krill(jsonString);
385 KrillCollection kc = ks.getCollection();
386 kc.setIndex(ki);
Akron484c3c12015-07-07 20:25:44 +0200387 assertEquals(1, kc.numberOf("documents"));
388
389 // textClass = reisen
Eliza Margaretha6f989202016-10-14 21:48:29 +0200390 jsonString = getJsonString(getClass()
391 .getResource(
392 "/queries/collections/collection_textClass_2.jsonld")
393 .getFile());
Akron484c3c12015-07-07 20:25:44 +0200394 ks = new Krill(jsonString);
395 kc = ks.getCollection();
396 kc.setIndex(ki);
Akron484c3c12015-07-07 20:25:44 +0200397 assertEquals(2, kc.numberOf("documents"));
398
399 /*
Akron484c3c12015-07-07 20:25:44 +0200400 TokenStream ts = fd2.doc.getField("author").tokenStream(
401 (Analyzer) ki.writer().getAnalyzer(),
402 (TokenStream) null
403 );
404 // OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
405 CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200406
Akron484c3c12015-07-07 20:25:44 +0200407 ts.reset();
408 while (ts.incrementToken()) {
409 String term = charTermAttribute.toString();
410 System.err.println(">>" + term + "<<");
411 };
412 */
413
414 // author = wolfgang
Eliza Margaretha6f989202016-10-14 21:48:29 +0200415 jsonString = getJsonString(getClass()
416 .getResource("/queries/collections/collection_goethe.jsonld")
417 .getFile());
Akron484c3c12015-07-07 20:25:44 +0200418 ks = new Krill(jsonString);
419 kc = ks.getCollection();
420 kc.setIndex(ki);
Akron484c3c12015-07-07 20:25:44 +0200421 assertEquals(1, kc.numberOf("documents"));
422
423 // author = Wolfgang
Eliza Margaretha6f989202016-10-14 21:48:29 +0200424 jsonString = getJsonString(getClass()
425 .getResource("/queries/collections/collection_goethe_2.jsonld")
426 .getFile());
Akron484c3c12015-07-07 20:25:44 +0200427 ks = new Krill(jsonString);
428 kc = ks.getCollection();
429 kc.setIndex(ki);
Akron484c3c12015-07-07 20:25:44 +0200430 assertEquals(1, kc.numberOf("documents"));
431
432 Result kr = ks.apply(ki);
Akron40550172015-08-04 03:06:12 +0200433
Akron484c3c12015-07-07 20:25:44 +0200434 ObjectMapper mapper = new ObjectMapper();
435 JsonNode res = mapper.readTree(kr.toJsonString());
436 assertEquals(1, res.at("/meta/totalResults").asInt());
437 };
438
Akron48937e92015-06-26 01:49:02 +0200439
440 @Test
441 public void searchMetaContext () throws IOException {
442
443 // All fields
Eliza Margaretha6f989202016-10-14 21:48:29 +0200444 String jsonString = getJsonString(getClass()
445 .getResource("/queries/metas/context_paragraph.jsonld")
446 .getFile());
Akron48937e92015-06-26 01:49:02 +0200447
448 Krill ks = new Krill(jsonString);
449 assertTrue(ks.getMeta().getContext().isSpanDefined());
450 assertEquals("base/p", ks.getMeta().getContext().getSpanContext());
451 };
Akron8bb3bc32018-12-12 19:34:56 +0100452
Akron9de655e2021-07-05 15:23:31 +0200453
454 @Test
455 public void searchMetaAndSnippets () throws IOException {
456
457 // All fields
458 String jsonString = getJsonString(getClass()
459 .getResource("/queries/metas/no-snippets.jsonld")
460 .getFile());
461
462 Krill ks = new Krill(jsonString);
463 assertFalse(ks.getMeta().hasSnippets());
464 };
465
Akron8bb3bc32018-12-12 19:34:56 +0100466
467 @Test
468 public void searchMetaAssets () throws IOException {
469 KrillIndex ki = new KrillIndex();
470 FieldDocument fd = new FieldDocument();
471 fd.addString("textSigle", "ABC-123-0002");
472 fd.addText("title", "Die Wahlverwandtschaften");
473 fd.addText("author", "Johann Wolfgang von Goethe");
Akrona6dabb72019-01-09 13:09:41 +0100474 fd.addKeywords("textClass", "reisen wissenschaft");
Akronc7a2abc2019-01-17 14:21:34 +0100475 fd.addDate("pubDate", 20130617);
Akron8bb3bc32018-12-12 19:34:56 +0100476 fd.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
477 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
478 fd.addAttachement("WikiLink", "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel");
479 ki.addDoc(fd);
480 ki.commit();
481
482 assertEquals(fd.doc.getField("textSigle").stringValue(), "ABC-123-0002");
483 assertEquals(fd.doc.getField("title").stringValue(), "Die Wahlverwandtschaften");
484 assertEquals(fd.doc.getField("author").stringValue(), "Johann Wolfgang von Goethe");
485 assertEquals(fd.doc.getField("textClass").stringValue(), "reisen wissenschaft");
486 assertEquals(fd.doc.getField("pubDate").stringValue(), "20130617");
487 assertEquals(fd.doc.getField("WikiLink").stringValue(), "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel");
488 }
489
Nils Diewalde3645702014-11-07 21:15:20 +0000490};