| Eliza Margaretha | 6a78069 | 2014-01-15 09:45:42 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 3 | import static org.junit.Assert.assertEquals; |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 4 | import static org.junit.Assert.assertTrue; |
| Akron | baeaf0e | 2019-06-19 15:04:41 +0200 | [diff] [blame] | 5 | import static org.junit.Assert.assertFalse; |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 6 | import static org.junit.Assert.fail; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 7 | |
| Akron | fafde02 | 2018-12-14 14:17:05 +0100 | [diff] [blame] | 8 | import java.util.*; |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 9 | import java.io.BufferedReader; |
| 10 | import java.io.FileReader; |
| 11 | import java.io.IOException; |
| Eliza Margaretha | 805e27f | 2016-10-14 21:39:42 +0200 | [diff] [blame] | 12 | import java.net.URLDecoder; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 13 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 14 | import org.apache.lucene.search.spans.SpanQuery; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 15 | import org.junit.Test; |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 16 | import org.junit.Ignore; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 17 | import org.junit.runner.RunWith; |
| 18 | import org.junit.runners.JUnit4; |
| 19 | |
| Akron | fafde02 | 2018-12-14 14:17:05 +0100 | [diff] [blame] | 20 | import com.fasterxml.jackson.databind.JsonNode; |
| 21 | |
| 22 | import static de.ids_mannheim.korap.TestSimple.*; |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 23 | import de.ids_mannheim.korap.Krill; |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 24 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 25 | import de.ids_mannheim.korap.KrillMeta; |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 26 | import de.ids_mannheim.korap.KrillQuery; |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 27 | import de.ids_mannheim.korap.query.QueryBuilder; |
| Nils Diewald | 92729ce | 2014-10-06 16:00:17 +0000 | [diff] [blame] | 28 | import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper; |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 29 | import de.ids_mannheim.korap.response.Match; |
| Akron | 685ec96 | 2019-02-25 19:04:46 +0100 | [diff] [blame] | 30 | import de.ids_mannheim.korap.response.MetaFields; |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 31 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 32 | import de.ids_mannheim.korap.util.QueryException; |
| 33 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 34 | import org.apache.lucene.document.Document; |
| 35 | |
| 36 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 37 | @RunWith(JUnit4.class) |
| 38 | public class TestFieldDocument { |
| 39 | |
| 40 | @Test |
| 41 | public void indexExample1 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 42 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 43 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 44 | fd.addString("corpusID", "WPD"); |
| 45 | fd.addString("ID", "WPD-AAA-00001"); |
| 46 | fd.addText("textClass", "music entertainment"); |
| 47 | fd.addText("author", "Peter Frankenfeld"); |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 48 | fd.addDate("pubDate", 20130617); |
| 49 | fd.addInt("justanumber", 12345678); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 50 | fd.addText("title", "Wikipedia"); |
| 51 | fd.addText("subTitle", "Die freie Enzyklopädie"); |
| 52 | fd.addStored("layerInfo", "opennlp/p=pos"); |
| 53 | fd.addString("pubPlace", "Bochum"); |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 54 | fd.addDate("lastModified", 20130717); |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 55 | fd.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" |
| 56 | + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]"); |
| Akron | 8bb3bc3 | 2018-12-12 19:34:56 +0100 | [diff] [blame] | 57 | fd.addAttachement("Wikilink", "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 58 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 59 | Document doc = fd.compile(); |
| 60 | |
| 61 | assertEquals(doc.getField("title").name(), "title"); |
| 62 | assertEquals(doc.getField("title").stringValue(), "Wikipedia"); |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 63 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 64 | assertEquals(doc.getField("corpusID").name(), "corpusID"); |
| 65 | assertEquals(doc.getField("corpusID").stringValue(), "WPD"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 66 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 67 | assertEquals(doc.getField("ID").name(), "ID"); |
| 68 | assertEquals(doc.getField("ID").stringValue(), "WPD-AAA-00001"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 69 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 70 | assertEquals(doc.getField("subTitle").name(), "subTitle"); |
| 71 | assertEquals(doc.getField("subTitle").stringValue(), |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 72 | "Die freie Enzyklopädie"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 73 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 74 | assertEquals(doc.getField("pubPlace").name(), "pubPlace"); |
| 75 | assertEquals(doc.getField("pubPlace").stringValue(), "Bochum"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 76 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 77 | assertEquals(doc.getField("lastModified").name(), "lastModified"); |
| 78 | assertEquals(doc.getField("lastModified").stringValue(), "20130717"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 79 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 80 | assertEquals(doc.getField("tokens").name(), "tokens"); |
| 81 | assertEquals(doc.getField("tokens").stringValue(), "abc"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 82 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 83 | assertEquals(doc.getField("author").name(), "author"); |
| 84 | assertEquals(doc.getField("author").stringValue(), |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 85 | "Peter Frankenfeld"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 86 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 87 | assertEquals(doc.getField("layerInfo").name(), "layerInfo"); |
| 88 | assertEquals(doc.getField("layerInfo").stringValue(), |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 89 | "opennlp/p=pos"); |
| Nils Diewald | d4401ec | 2014-06-16 17:04:02 +0000 | [diff] [blame] | 90 | |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 91 | assertEquals(doc.getField("textClass").name(), "textClass"); |
| 92 | assertEquals(doc.getField("textClass").stringValue(), |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 93 | "music entertainment"); |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 94 | assertEquals(doc.getField("Wikilink").name(), "Wikilink"); |
| 95 | assertEquals(doc.getField("Wikilink").stringValue(), |
| Akron | 8bb3bc3 | 2018-12-12 19:34:56 +0100 | [diff] [blame] | 96 | "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel" |
| 97 | ); |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 98 | |
| 99 | assertEquals(doc.getField("justanumber").numericValue().intValue(), 12345678); |
| 100 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 101 | }; |
| 102 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 103 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 104 | @Test |
| Nils Diewald | be5943e | 2014-10-21 19:35:34 +0000 | [diff] [blame] | 105 | public void indexExample2 () throws Exception { |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 106 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 107 | String json = new String("{" + " \"fields\" : [" + " { " |
| 108 | + " \"primaryData\" : \"abc\"" + " }," + " {" |
| 109 | + " \"name\" : \"tokens\"," + " \"data\" : [" |
| 110 | + " [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"]," |
| 111 | + " [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ]," |
| 112 | + " [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]" + " ]" |
| 113 | + " }" + " ]," + " \"corpusID\" : \"WPD\"," |
| 114 | + " \"ID\" : \"WPD-AAA-00001\"," |
| 115 | + " \"textClass\" : \"music entertainment\"," |
| 116 | + " \"author\" : \"Peter Frankenfeld\"," |
| 117 | + " \"pubDate\" : 20130617," |
| 118 | + " \"title\" : \"Wikipedia\"," |
| 119 | + " \"subTitle\" : \"Die freie Enzyklopädie\"," |
| 120 | + " \"pubPlace\" : \"Bochum\"" + "}"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 121 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 122 | KrillIndex ki = new KrillIndex(); |
| 123 | FieldDocument fd = ki.addDoc(json); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 124 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 125 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 126 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 127 | assertEquals(fd.getPrimaryData(), "abc"); |
| 128 | assertEquals(fd.getCorpusID(), "WPD"); |
| 129 | assertEquals(fd.getID(), "WPD-AAA-00001"); |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 130 | assertEquals(fd.getFieldValue("textClass"), "music entertainment"); |
| 131 | assertEquals(fd.getFieldValue("author"), "Peter Frankenfeld"); |
| 132 | assertEquals(fd.getFieldValue("title"), "Wikipedia"); |
| 133 | assertEquals(fd.getFieldValue("subTitle"), "Die freie Enzyklopädie"); |
| 134 | assertEquals(fd.getFieldValue("pubPlace"), "Bochum"); |
| 135 | assertEquals(fd.getFieldValueAsDate("pubDate").toDisplay(), "2013-06-17"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 136 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 137 | QueryBuilder kq = new QueryBuilder("tokens"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 138 | Result kr = ki |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 139 | .search((SpanQuery) kq.seq(kq.nr(3, kq.seg("s:b"))).toQuery()); |
| Nils Diewald | 12f00d4 | 2013-12-12 18:47:59 +0000 | [diff] [blame] | 140 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 141 | Match km = kr.getMatch(0); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 142 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 143 | assertEquals(km.getPrimaryData(), "abc"); |
| 144 | assertEquals(km.getCorpusID(), "WPD"); |
| 145 | assertEquals(km.getDocID(), "WPD-AAA-00001"); |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 146 | assertEquals(km.getFieldValue("textClass"), "music entertainment"); |
| 147 | assertEquals(km.getFieldValue("author"), "Peter Frankenfeld"); |
| 148 | assertEquals(km.getFieldValue("title"), "Wikipedia"); |
| 149 | assertEquals(km.getFieldValue("subTitle"), "Die freie Enzyklopädie"); |
| 150 | assertEquals(km.getFieldValue("pubPlace"), "Bochum"); |
| 151 | assertEquals(km.getFieldValueAsDate("pubDate").toDisplay(), "2013-06-17"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 152 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 153 | assertEquals(km.getSnippetBrackets(), "a[[{3:b}]]c"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 154 | }; |
| 155 | |
| Nils Diewald | 6802acd | 2014-03-18 18:29:30 +0000 | [diff] [blame] | 156 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 157 | @Test |
| 158 | public void indexExample3 () throws IOException { |
| 159 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 160 | // Construct index |
| 161 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 162 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 163 | // Indexing test files |
| 164 | for (String i : new String[] { "00001", "00002", "00003", "00004", |
| 165 | "00005", "00006", "02439" }) { |
| 166 | FieldDocument fd = ki.addDoc( |
| 167 | getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), |
| 168 | true); |
| 169 | }; |
| 170 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 171 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 172 | QueryBuilder kq = new QueryBuilder("tokens"); |
| Nils Diewald | 6802acd | 2014-03-18 18:29:30 +0000 | [diff] [blame] | 173 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 174 | Krill ks; |
| 175 | Result kr; |
| Nils Diewald | 6802acd | 2014-03-18 18:29:30 +0000 | [diff] [blame] | 176 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 177 | // Start creating query |
| 178 | // within(<s>, {1: {2: [mate/p=ADJA & mate/m=number:sg]}[opennlp/p=NN & tt/p=NN]}) |
| Nils Diewald | 6802acd | 2014-03-18 18:29:30 +0000 | [diff] [blame] | 179 | |
| Akron | 567b6fe | 2018-12-06 15:06:58 +0100 | [diff] [blame] | 180 | ks = new Krill(kq.contains(kq.tag("base/s:s"), kq.nr(1, |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 181 | kq.seq(kq.seg("mate/p:ADJA")).append(kq.seg("opennlp/p:NN"))))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 182 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 183 | KrillMeta meta = ks.getMeta(); |
| 184 | meta.setCount(1); |
| 185 | meta.setCutOff(true); |
| Nils Diewald | 6802acd | 2014-03-18 18:29:30 +0000 | [diff] [blame] | 186 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 187 | meta.getContext().left.setCharacter(true).setLength(6); |
| 188 | meta.getContext().right.setToken(true).setLength(6); |
| 189 | |
| 190 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 191 | "... okal. [[Der Buchstabe A hat in {1:deutschen Texten} eine durchschnittliche Häufigkeit von 6,51 %.]] Er ist damit der sechsthäufigste Buchstabe ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 192 | ks.apply(ki).getMatch(0).getSnippetBrackets()); |
| Akron | fbc7616 | 2019-06-04 15:51:09 +0200 | [diff] [blame] | 193 | |
| 194 | |
| 195 | // Do not retrieve snippets |
| 196 | meta.setSnippets(false); |
| 197 | |
| 198 | Match km = ks.apply(ki).getMatch(0); |
| 199 | |
| 200 | assertEquals("Ruru,Jens.Ol,Aglarech", km.toJsonNode().get("author").asText()); |
| 201 | assertTrue(!km.toJsonNode().has("snippet")); |
| 202 | assertEquals("", km.getPrimaryData()); |
| Akron | baeaf0e | 2019-06-19 15:04:41 +0200 | [diff] [blame] | 203 | assertFalse(km.toJsonNode().has("startMore")); |
| 204 | assertFalse(km.toJsonNode().has("endMore")); |
| 205 | assertFalse(km.toJsonNode().has("endCutted")); |
| 206 | assertFalse(km.toJsonNode().has("snippet")); |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 207 | }; |
| Nils Diewald | d0481e6 | 2014-02-15 23:55:10 +0000 | [diff] [blame] | 208 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 209 | |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 210 | @Test |
| Nils Diewald | be5943e | 2014-10-21 19:35:34 +0000 | [diff] [blame] | 211 | public void queryJSONBsp18 () throws Exception { |
| Nils Diewald | d0481e6 | 2014-02-15 23:55:10 +0000 | [diff] [blame] | 212 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 213 | // Construct index |
| 214 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 215 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 216 | // Indexing test files |
| 217 | for (String i : new String[] { "00001", "00002", "00003", "00004", |
| 218 | "00005", "00006", "02439" }) { |
| 219 | FieldDocument fd = ki.addDoc( |
| 220 | getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), |
| 221 | true); |
| Akron | 91c6011 | 2015-09-24 22:05:40 +0200 | [diff] [blame] | 222 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 223 | }; |
| 224 | ki.commit(); |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 225 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 226 | String jsonPath = URLDecoder.decode( |
| 227 | getClass().getResource("/queries/bsp18.jsonld").getFile(), |
| 228 | "UTF-8"); |
| 229 | |
| Akron | 8798be8 | 2016-06-23 23:10:25 +0200 | [diff] [blame] | 230 | // {1:der} \w0:5 nicht |
| Akron | fafde02 | 2018-12-14 14:17:05 +0100 | [diff] [blame] | 231 | SpanQueryWrapper sqwi = getJsonQuery(jsonPath); |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 232 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 233 | Result kr = ki.search(sqwi.toQuery(), 0, (short) 5, true, (short) 2, |
| 234 | false, (short) 5); |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 235 | |
| Akron | 8798be8 | 2016-06-23 23:10:25 +0200 | [diff] [blame] | 236 | assertEquals(1, kr.getTotalResults()); |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 237 | assertEquals( |
| 238 | "... bezeichnen, sofern [[{1:der} schwedische Buchstabe „Å“ nicht]] verfügbar ist im SI-Einheitensystem ist ...", |
| 239 | kr.getMatch(0).getSnippetBrackets()); |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 240 | }; |
| 241 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 242 | |
| Akron | a9d4c42 | 2017-04-28 21:18:59 +0200 | [diff] [blame] | 243 | @Test |
| 244 | public void indexNoValidDate () throws Exception { |
| 245 | |
| 246 | String json = new String("{" + " \"fields\" : [" + " { " |
| 247 | + " \"primaryData\" : \"abc\"" + " }," + " {" |
| 248 | + " \"name\" : \"tokens\"," + " \"data\" : [" |
| 249 | + " [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"]," |
| 250 | + " [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ]," |
| 251 | + " [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]" + " ]" |
| 252 | + " }" + " ]," + " \"corpusID\" : \"WPD\"," |
| 253 | + " \"ID\" : \"WPD-AAA-00001\"," |
| 254 | + " \"textClass\" : \"music entertainment\"," |
| 255 | + " \"author\" : \"Peter Frankenfeld\"," |
| 256 | + " \"pubDate\" : \"00000000\"," |
| 257 | + " \"title\" : \"Wikipedia\"," |
| 258 | + " \"subTitle\" : \"Die freie Enzyklopädie\"," |
| 259 | + " \"pubPlace\" : \"Bochum\"" + "}"); |
| 260 | |
| 261 | KrillIndex ki = new KrillIndex(); |
| 262 | FieldDocument fd = ki.addDoc(json); |
| 263 | |
| 264 | ki.commit(); |
| 265 | |
| 266 | assertEquals(fd.getPrimaryData(), "abc"); |
| 267 | assertEquals(fd.getCorpusID(), "WPD"); |
| 268 | assertEquals(fd.getID(), "WPD-AAA-00001"); |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 269 | assertEquals(fd.getFieldValue("textClass"), "music entertainment"); |
| 270 | assertEquals(fd.getFieldValue("author"), "Peter Frankenfeld"); |
| 271 | assertEquals(fd.getFieldValue("title"), "Wikipedia"); |
| 272 | assertEquals(fd.getFieldValue("subTitle"), "Die freie Enzyklopädie"); |
| 273 | assertEquals(fd.getFieldValue("pubPlace"), "Bochum"); |
| 274 | assertEquals(fd.getFieldValueAsDate("pubDate").toDisplay(), ""); |
| Akron | a9d4c42 | 2017-04-28 21:18:59 +0200 | [diff] [blame] | 275 | }; |
| Akron | 798e6a2 | 2018-06-18 15:29:35 +0200 | [diff] [blame] | 276 | |
| Akron | fafde02 | 2018-12-14 14:17:05 +0100 | [diff] [blame] | 277 | @Test |
| 278 | public void indexNewMetaData () throws Exception { |
| 279 | |
| 280 | String json = new String( |
| 281 | "{" |
| Akron | 510ba0b | 2019-02-06 19:07:17 +0100 | [diff] [blame] | 282 | + " \"data\" : {" |
| 283 | + " \"text\" : \"abc\"," |
| 284 | + " \"name\" : \"tokens\"," |
| 285 | + " \"stream\" : [" |
| 286 | + " [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"]," |
| 287 | + " [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ]," |
| 288 | + " [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]" |
| 289 | + " ]" |
| 290 | + " }," |
| Akron | fafde02 | 2018-12-14 14:17:05 +0100 | [diff] [blame] | 291 | + " \"fields\" : [" |
| Akron | fafde02 | 2018-12-14 14:17:05 +0100 | [diff] [blame] | 292 | + " {" |
| 293 | + " \"@type\" : \"koral:field\"," |
| 294 | + " \"type\" : \"type:string\"," |
| 295 | + " \"key\" : \"corpusID\"," |
| 296 | + " \"value\" : \"WPD\"" |
| 297 | + " }," |
| 298 | + " {" |
| 299 | + " \"@type\" : \"koral:field\"," |
| 300 | + " \"type\" : \"type:string\"," |
| 301 | + " \"key\" : \"textSigle\"," |
| 302 | + " \"value\" : \"x/y/z\"" |
| 303 | + " }," |
| 304 | + " {" |
| 305 | + " \"@type\" : \"koral:field\"," |
| 306 | + " \"type\" : \"type:string\"," |
| 307 | + " \"key\" : \"ID\"," |
| 308 | + " \"value\" : \"WPD-AAA-00001\"" |
| 309 | + " }," |
| 310 | + " {" |
| 311 | + " \"@type\" : \"koral:field\"," |
| 312 | + " \"type\" : \"type:string\"," |
| 313 | + " \"key\" : \"textClass\"," |
| 314 | + " \"value\" : [\"music\",\"entertainment\"]" |
| 315 | + " }," |
| 316 | + " {" |
| 317 | + " \"@type\" : \"koral:field\"," |
| 318 | + " \"type\" : \"type:text\"," |
| 319 | + " \"key\" : \"author\"," |
| 320 | + " \"value\" : \"Peter Frankenfeld\"" |
| 321 | + " }," |
| 322 | + " {" |
| 323 | + " \"@type\" : \"koral:field\"," |
| 324 | + " \"type\" : \"type:date\"," |
| 325 | + " \"key\" : \"pubDate\"," |
| 326 | + " \"value\" : \"2015-05-01\"" |
| 327 | + " }," |
| 328 | + " {" |
| 329 | + " \"@type\" : \"koral:field\"," |
| 330 | + " \"type\" : \"type:text\"," |
| 331 | + " \"key\" : \"title\"," |
| 332 | + " \"value\" : \"Wikipedia\"" |
| 333 | + " }," |
| 334 | + " {" |
| 335 | + " \"@type\" : \"koral:field\"," |
| 336 | + " \"type\" : \"type:text\"," |
| 337 | + " \"key\" : \"subTitle\"," |
| 338 | + " \"value\" : \"Die freie Enzyklopädie\"" |
| 339 | + " }," |
| 340 | + " {" |
| 341 | + " \"@type\" : \"koral:field\"," |
| 342 | + " \"type\" : \"type:string\"," |
| 343 | + " \"key\" : \"pubPlace\"," |
| 344 | + " \"value\" : \"Bochum\"" |
| 345 | + " }," |
| 346 | + " {" |
| 347 | + " \"@type\" : \"koral:field\"," |
| 348 | + " \"type\" : \"type:attachement\"," |
| 349 | + " \"key\" : \"link\"," |
| 350 | + " \"value\" : \"data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel\"" |
| 351 | + " }" |
| 352 | + " ]" |
| 353 | + "}"); |
| 354 | |
| 355 | KrillIndex ki = new KrillIndex(); |
| 356 | FieldDocument fd = ki.addDoc(json); |
| 357 | |
| 358 | ki.commit(); |
| 359 | |
| 360 | assertEquals(fd.getPrimaryData(), "abc"); |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 361 | // assertEquals(fd.doc.getField("corpusID").stringValue(), "WPD"); |
| Akron | fafde02 | 2018-12-14 14:17:05 +0100 | [diff] [blame] | 362 | assertEquals(fd.doc.getField("textSigle").stringValue(), "x/y/z"); |
| 363 | assertEquals(fd.doc.getField("ID").stringValue(), "WPD-AAA-00001"); |
| 364 | assertEquals(fd.doc.getField("textClass").stringValue(), "music entertainment"); |
| 365 | assertEquals(fd.doc.getField("author").stringValue(), "Peter Frankenfeld"); |
| 366 | assertEquals(fd.doc.getField("title").stringValue(), "Wikipedia"); |
| 367 | assertEquals(fd.doc.getField("subTitle").stringValue(), "Die freie Enzyklopädie"); |
| 368 | assertEquals(fd.doc.getField("pubPlace").stringValue(), "Bochum"); |
| 369 | assertEquals(fd.doc.getField("pubDate").stringValue(), "20150501"); |
| 370 | assertEquals(fd.doc.getField("link").stringValue(), "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel"); |
| 371 | |
| 372 | JsonNode res = ki.getFields("x/y/z").toJsonNode(); |
| 373 | |
| 374 | Iterator fieldIter = res.at("/document/fields").elements(); |
| 375 | |
| 376 | int checkC = 0; |
| 377 | while (fieldIter.hasNext()) { |
| 378 | JsonNode field = (JsonNode) fieldIter.next(); |
| 379 | |
| 380 | String key = field.at("/key").asText(); |
| 381 | |
| 382 | switch (key) { |
| 383 | case "corpusID": |
| 384 | assertEquals("type:string", field.at("/type").asText()); |
| 385 | assertEquals("koral:field", field.at("/@type").asText()); |
| 386 | assertEquals("WPD", field.at("/value").asText()); |
| 387 | checkC++; |
| 388 | break; |
| 389 | |
| 390 | case "textSigle": |
| 391 | assertEquals("type:string", field.at("/type").asText()); |
| 392 | assertEquals("koral:field", field.at("/@type").asText()); |
| 393 | assertEquals("x/y/z", field.at("/value").asText()); |
| 394 | checkC++; |
| 395 | break; |
| 396 | |
| 397 | case "ID": |
| 398 | assertEquals("type:string", field.at("/type").asText()); |
| 399 | assertEquals("koral:field", field.at("/@type").asText()); |
| 400 | assertEquals("WPD-AAA-00001", field.at("/value").asText()); |
| 401 | checkC++; |
| 402 | break; |
| 403 | |
| 404 | case "textClass": |
| 405 | assertEquals("type:keywords", field.at("/type").asText()); |
| 406 | assertEquals("koral:field", field.at("/@type").asText()); |
| 407 | assertEquals("music", field.at("/value/0").asText()); |
| 408 | assertEquals("entertainment", field.at("/value/1").asText()); |
| 409 | checkC++; |
| 410 | break; |
| 411 | |
| 412 | case "author": |
| 413 | assertEquals("type:text", field.at("/type").asText()); |
| 414 | assertEquals("koral:field", field.at("/@type").asText()); |
| 415 | assertEquals("Peter Frankenfeld", field.at("/value").asText()); |
| 416 | checkC++; |
| 417 | break; |
| 418 | |
| 419 | case "title": |
| 420 | assertEquals("type:text", field.at("/type").asText()); |
| 421 | assertEquals("koral:field", field.at("/@type").asText()); |
| 422 | assertEquals("Wikipedia", field.at("/value").asText()); |
| 423 | checkC++; |
| 424 | break; |
| 425 | |
| 426 | case "subTitle": |
| 427 | assertEquals("type:text", field.at("/type").asText()); |
| 428 | assertEquals("koral:field", field.at("/@type").asText()); |
| 429 | assertEquals("Die freie Enzyklopädie", field.at("/value").asText()); |
| 430 | checkC++; |
| 431 | break; |
| 432 | |
| 433 | case "pubPlace": |
| 434 | assertEquals("type:string", field.at("/type").asText()); |
| 435 | assertEquals("koral:field", field.at("/@type").asText()); |
| 436 | assertEquals("Bochum", field.at("/value").asText()); |
| 437 | checkC++; |
| 438 | break; |
| 439 | |
| 440 | case "pubDate": |
| 441 | assertEquals("type:date", field.at("/type").asText()); |
| 442 | assertEquals("koral:field", field.at("/@type").asText()); |
| 443 | assertEquals("2015-05-01", field.at("/value").asText()); |
| 444 | checkC++; |
| 445 | break; |
| 446 | |
| 447 | case "link": |
| 448 | assertEquals("type:attachement", field.at("/type").asText()); |
| 449 | assertEquals("koral:field", field.at("/@type").asText()); |
| 450 | assertEquals("data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel", field.at("/value").asText()); |
| 451 | checkC++; |
| 452 | break; |
| Akron | 1a975d1 | 2019-02-05 13:13:06 +0100 | [diff] [blame] | 453 | |
| 454 | default: |
| 455 | fail("Unknown field: " + key); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 456 | }; |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 457 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 458 | }; |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 459 | |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 460 | |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 461 | @Test |
| 462 | public void indexArbitraryMetaData () throws Exception { |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 463 | String json = createDocString1(); |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 464 | |
| 465 | KrillIndex ki = new KrillIndex(); |
| 466 | FieldDocument fd = ki.addDoc(json); |
| 467 | |
| 468 | ki.commit(); |
| 469 | |
| 470 | assertEquals(fd.getPrimaryData(), "abc"); |
| 471 | assertEquals(fd.doc.getField("alter").stringValue(), "40.0"); |
| 472 | assertEquals(fd.doc.getField("name").stringValue(), "Frank"); |
| 473 | assertEquals(fd.doc.getField("schluesselwoerter").stringValue(), "musik unterhaltung"); |
| 474 | assertEquals(fd.doc.getField("tags").stringValue(), "nachrichten feuilleton sport raetsel"); |
| 475 | assertEquals(fd.doc.getField("titel").stringValue(), "Der alte Baum"); |
| 476 | assertEquals(fd.doc.getField("anhang").stringValue(), "data:application/x.korap-link,http://spiegel.de/"); |
| 477 | assertEquals(fd.doc.getField("referenz").stringValue(), "So war das"); |
| 478 | assertEquals(fd.doc.getField("datum").stringValue(), "20180403"); |
| 479 | |
| 480 | JsonNode res = ki.getFields("aa/bb/cc").toJsonNode(); |
| 481 | |
| 482 | Iterator fieldIter = res.at("/document/fields").elements(); |
| 483 | |
| 484 | int checkC = 0; |
| 485 | while (fieldIter.hasNext()) { |
| 486 | JsonNode field = (JsonNode) fieldIter.next(); |
| 487 | |
| 488 | String key = field.at("/key").asText(); |
| 489 | |
| 490 | switch (key) { |
| 491 | case "textSigle": |
| 492 | assertEquals("type:string", field.at("/type").asText()); |
| 493 | assertEquals("koral:field", field.at("/@type").asText()); |
| 494 | assertEquals("aa/bb/cc", field.at("/value").asText()); |
| 495 | checkC++; |
| 496 | break; |
| 497 | |
| 498 | case "alter": |
| 499 | assertEquals("type:integer", field.at("/type").asText()); |
| 500 | assertEquals("koral:field", field.at("/@type").asText()); |
| 501 | assertEquals(40, field.at("/value").asInt()); |
| 502 | checkC++; |
| 503 | break; |
| 504 | |
| 505 | case "name": |
| 506 | assertEquals("type:string", field.at("/type").asText()); |
| 507 | assertEquals("koral:field", field.at("/@type").asText()); |
| 508 | assertEquals("Frank", field.at("/value").asText()); |
| 509 | checkC++; |
| 510 | break; |
| 511 | |
| 512 | case "schluesselwoerter": |
| 513 | assertEquals("type:keywords", field.at("/type").asText()); |
| 514 | assertEquals("koral:field", field.at("/@type").asText()); |
| 515 | assertEquals("musik", field.at("/value/0").asText()); |
| 516 | assertEquals("unterhaltung", field.at("/value/1").asText()); |
| 517 | checkC++; |
| 518 | break; |
| 519 | |
| 520 | case "tags": |
| 521 | assertEquals("type:keywords", field.at("/type").asText()); |
| 522 | assertEquals("koral:field", field.at("/@type").asText()); |
| 523 | assertEquals("nachrichten", field.at("/value/0").asText()); |
| 524 | assertEquals("feuilleton", field.at("/value/1").asText()); |
| 525 | assertEquals("sport", field.at("/value/2").asText()); |
| 526 | assertEquals("raetsel", field.at("/value/3").asText()); |
| 527 | checkC++; |
| 528 | break; |
| 529 | |
| 530 | case "titel": |
| 531 | assertEquals("type:text", field.at("/type").asText()); |
| 532 | assertEquals("koral:field", field.at("/@type").asText()); |
| 533 | assertEquals("Der alte Baum", field.at("/value").asText()); |
| 534 | checkC++; |
| 535 | break; |
| 536 | |
| 537 | case "anhang": |
| 538 | assertEquals("type:attachement", field.at("/type").asText()); |
| 539 | assertEquals("koral:field", field.at("/@type").asText()); |
| 540 | assertEquals("data:application/x.korap-link,http://spiegel.de/", field.at("/value").asText()); |
| 541 | checkC++; |
| 542 | break; |
| 543 | |
| 544 | case "referenz": |
| 545 | assertEquals("type:store", field.at("/type").asText()); |
| 546 | assertEquals("koral:field", field.at("/@type").asText()); |
| 547 | assertEquals("So war das", field.at("/value").asText()); |
| 548 | checkC++; |
| 549 | break; |
| 550 | |
| 551 | case "datum": |
| 552 | assertEquals("type:date", field.at("/type").asText()); |
| 553 | assertEquals("koral:field", field.at("/@type").asText()); |
| 554 | assertEquals("2018-04-03", field.at("/value").asText()); |
| 555 | checkC++; |
| 556 | break; |
| Akron | 1a975d1 | 2019-02-05 13:13:06 +0100 | [diff] [blame] | 557 | |
| 558 | default: |
| 559 | fail("Unknown field: " + key); |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 560 | }; |
| 561 | }; |
| 562 | }; |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 563 | |
| 564 | @Test |
| 565 | public void indexArbitraryMetaDataPartial () throws Exception { |
| 566 | String json = createDocString1(); |
| 567 | |
| 568 | KrillIndex ki = new KrillIndex(); |
| 569 | FieldDocument fd = ki.addDoc(json); |
| 570 | |
| 571 | ki.commit(); |
| 572 | |
| 573 | ArrayList hs = new ArrayList<String>(); |
| 574 | hs.add("datum"); |
| 575 | hs.add("titel"); |
| 576 | JsonNode res = ki.getFields("aa/bb/cc", hs).toJsonNode(); |
| 577 | assertEquals("type:date", res.at("/document/fields/0/type").asText()); |
| 578 | assertEquals("datum", res.at("/document/fields/0/key").asText()); |
| 579 | assertEquals("2018-04-03", res.at("/document/fields/0/value").asText()); |
| 580 | assertEquals("type:text", res.at("/document/fields/1/type").asText()); |
| 581 | assertEquals("titel", res.at("/document/fields/1/key").asText()); |
| 582 | assertEquals("Der alte Baum", res.at("/document/fields/1/value").asText()); |
| 583 | assertTrue(res.at("/document/fields/2").isMissingNode()); |
| 584 | }; |
| 585 | |
| 586 | @Test |
| 587 | public void indexArbitraryMetaDataSorted () throws Exception { |
| 588 | String json = createDocString1(); |
| 589 | |
| 590 | KrillIndex ki = new KrillIndex(); |
| 591 | FieldDocument fd = ki.addDoc(json); |
| 592 | |
| 593 | ki.commit(); |
| 594 | |
| 595 | ArrayList hs = new ArrayList<String>(); |
| 596 | hs.add("titel"); |
| 597 | hs.add("datum"); |
| 598 | JsonNode res = ki.getFields("aa/bb/cc", hs).toJsonNode(); |
| 599 | assertEquals("type:text", res.at("/document/fields/0/type").asText()); |
| 600 | assertEquals("titel", res.at("/document/fields/0/key").asText()); |
| 601 | assertEquals("Der alte Baum", res.at("/document/fields/0/value").asText()); |
| 602 | assertEquals("type:date", res.at("/document/fields/1/type").asText()); |
| 603 | assertEquals("datum", res.at("/document/fields/1/key").asText()); |
| 604 | assertEquals("2018-04-03", res.at("/document/fields/1/value").asText()); |
| 605 | assertTrue(res.at("/document/fields/2").isMissingNode()); |
| 606 | }; |
| 607 | |
| 608 | @Test |
| 609 | public void indexArbitraryMetaDataEmpty () throws Exception { |
| 610 | String json = createDocString1(); |
| 611 | |
| 612 | KrillIndex ki = new KrillIndex(); |
| 613 | FieldDocument fd = ki.addDoc(json); |
| 614 | |
| 615 | ki.commit(); |
| 616 | |
| 617 | ArrayList hs = new ArrayList<String>(); |
| 618 | hs.add("titel"); |
| 619 | hs.add("frage"); |
| 620 | hs.add("datum"); |
| 621 | JsonNode res = ki.getFields("aa/bb/cc", hs).toJsonNode(); |
| 622 | assertEquals("type:text", res.at("/document/fields/0/type").asText()); |
| 623 | assertEquals("titel", res.at("/document/fields/0/key").asText()); |
| 624 | assertEquals("Der alte Baum", res.at("/document/fields/0/value").asText()); |
| 625 | assertEquals("frage", res.at("/document/fields/1/key").asText()); |
| 626 | assertTrue(res.at("/document/fields/1/type").isMissingNode()); |
| 627 | assertEquals("type:date", res.at("/document/fields/2/type").asText()); |
| 628 | assertEquals("datum", res.at("/document/fields/2/key").asText()); |
| 629 | assertEquals("2018-04-03", res.at("/document/fields/2/value").asText()); |
| 630 | assertTrue(res.at("/document/fields/3").isMissingNode()); |
| 631 | }; |
| 632 | |
| Akron | 685ec96 | 2019-02-25 19:04:46 +0100 | [diff] [blame] | 633 | |
| 634 | @Test |
| 635 | public void indexUpsert () throws Exception { |
| 636 | KrillIndex ki = new KrillIndex(); |
| 637 | |
| 638 | // Add new document |
| 639 | FieldDocument fd = new FieldDocument(); |
| 640 | fd.addString("textSigle", "AAA/BBB/001"); |
| 641 | fd.addString("content", "Example1"); |
| 642 | ki.upsertDoc(fd); |
| 643 | ki.commit(); |
| 644 | |
| 645 | MetaFields mfs = ki.getFields("AAA/BBB/001"); |
| 646 | assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10); |
| 647 | assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}")); |
| 648 | assertEquals( |
| 649 | mfs.getFieldValue("indexCreationDate"), |
| 650 | mfs.getFieldValue("indexLastModified") |
| 651 | ); |
| 652 | assertEquals(mfs.getFieldValue("content"), "Example1"); |
| 653 | |
| 654 | |
| 655 | // Add new document |
| 656 | fd = new FieldDocument(); |
| 657 | fd.addString("textSigle", "AAA/BBB/002"); |
| 658 | fd.addString("content", "Example2"); |
| 659 | |
| 660 | ki.upsertDoc(fd); |
| 661 | ki.commit(); |
| 662 | |
| 663 | mfs = ki.getFields("AAA/BBB/002"); |
| 664 | assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10); |
| 665 | |
| 666 | assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}")); |
| 667 | assertEquals(mfs.getFieldValue("content"), "Example2"); |
| 668 | |
| 669 | fd = new FieldDocument(); |
| 670 | fd.addString("textSigle", "AAA/BBB/001"); |
| 671 | fd.addString("content", "Example3"); |
| 672 | |
| 673 | ki.upsertDoc(fd); |
| 674 | ki.commit(); |
| 675 | |
| 676 | mfs = ki.getFields("AAA/BBB/001"); |
| 677 | assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10); |
| 678 | assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}")); |
| 679 | assertEquals(mfs.getFieldValue("content"), "Example3"); |
| 680 | |
| 681 | assertEquals(ki.numberOf("documents"), 2); |
| Akron | f0e3653 | 2019-03-06 11:43:21 +0100 | [diff] [blame] | 682 | |
| 683 | // Test Inputstream method |
| 684 | ki.upsertDoc(getClass().getResourceAsStream("/wiki/WPD17-H81-63495.json.gz"), true); |
| 685 | ki.commit(); |
| 686 | assertEquals(ki.numberOf("documents"), 3); |
| Akron | 81829f1 | 2019-04-09 23:06:34 +0200 | [diff] [blame] | 687 | |
| 688 | ki.close(); |
| 689 | |
| 690 | fd = new FieldDocument(); |
| 691 | fd.addString("textSigle", "AAA/DDD/005"); |
| 692 | fd.addString("content", "Example4"); |
| 693 | |
| 694 | ki.upsertDoc(fd); |
| 695 | ki.commit(); |
| 696 | |
| 697 | assertEquals(ki.numberOf("documents"), 4); |
| 698 | |
| Akron | 685ec96 | 2019-02-25 19:04:46 +0100 | [diff] [blame] | 699 | }; |
| 700 | |
| 701 | |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 702 | private static String createDocString1 () { |
| 703 | return new String( |
| 704 | "{" |
| Akron | 510ba0b | 2019-02-06 19:07:17 +0100 | [diff] [blame] | 705 | + " \"data\" : {" |
| 706 | + " \"text\" : \"abc\"," |
| 707 | + " \"name\" : \"tokens\"," |
| 708 | + " \"stream\" : [" |
| 709 | + " [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"]," |
| 710 | + " [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ]," |
| 711 | + " [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]" |
| 712 | + " ]" |
| 713 | + " }," |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 714 | + " \"fields\" : [" |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 715 | + " {" |
| 716 | + " \"@type\" : \"koral:field\"," |
| 717 | + " \"type\" : \"type:string\"," |
| 718 | + " \"key\" : \"textSigle\"," |
| 719 | + " \"value\" : \"aa/bb/cc\"" |
| 720 | + " }," |
| 721 | + " {" |
| 722 | + " \"@type\" : \"koral:field\"," |
| 723 | + " \"type\" : \"type:integer\"," |
| 724 | + " \"key\" : \"alter\"," |
| 725 | + " \"value\" : 40" |
| 726 | + " }," |
| 727 | + " {" |
| 728 | + " \"@type\" : \"koral:field\"," |
| 729 | + " \"type\" : \"type:string\"," |
| 730 | + " \"key\" : \"name\"," |
| 731 | + " \"value\" : \"Frank\"" |
| 732 | + " }," |
| 733 | + " {" |
| 734 | + " \"@type\" : \"koral:field\"," |
| 735 | + " \"type\" : \"type:string\"," |
| 736 | + " \"key\" : \"name\"," |
| 737 | + " \"value\" : \"Julian\"" |
| 738 | + " }," |
| 739 | + " {" |
| 740 | + " \"@type\" : \"koral:field\"," |
| 741 | + " \"type\" : \"type:string\"," |
| 742 | + " \"key\" : \"schluesselwoerter\"," |
| 743 | + " \"value\" : [\"musik\",\"unterhaltung\"]" |
| 744 | + " }," |
| 745 | + " {" |
| 746 | + " \"@type\" : \"koral:field\"," |
| 747 | + " \"type\" : \"type:keywords\"," |
| 748 | + " \"key\" : \"tags\"," |
| 749 | + " \"value\" : \"nachrichten feuilleton\"" |
| 750 | + " }," |
| 751 | + " {" |
| 752 | + " \"@type\" : \"koral:field\"," |
| 753 | + " \"type\" : \"type:keywords\"," |
| 754 | + " \"key\" : \"tags\"," |
| 755 | + " \"value\" : [\"sport\",\"raetsel\"]" |
| 756 | + " }," |
| 757 | + " {" |
| 758 | + " \"@type\" : \"koral:field\"," |
| 759 | + " \"type\" : \"type:text\"," |
| 760 | + " \"key\" : \"titel\"," |
| 761 | + " \"value\" : \"Der alte Baum\"" |
| 762 | + " }," |
| 763 | + " {" |
| 764 | + " \"@type\" : \"koral:field\"," |
| 765 | + " \"type\" : \"type:attachement\"," |
| 766 | + " \"key\" : \"anhang\"," |
| 767 | + " \"value\" : \"data:application/x.korap-link,http://spiegel.de/\"" |
| 768 | + " }," |
| 769 | + " {" |
| 770 | + " \"@type\" : \"koral:field\"," |
| 771 | + " \"type\" : \"type:store\"," |
| 772 | + " \"key\" : \"referenz\"," |
| 773 | + " \"value\" : \"So war das\"" |
| 774 | + " }," |
| 775 | + " {" |
| 776 | + " \"@type\" : \"koral:field\"," |
| 777 | + " \"type\" : \"type:date\"," |
| 778 | + " \"key\" : \"datum\"," |
| 779 | + " \"value\" : \"2018-04-03\"" |
| 780 | + " }" |
| 781 | + " ]" |
| 782 | + "}"); |
| 783 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 784 | }; |