blob: 050005006e2840ff97a08774633622b225075494 [file] [log] [blame]
Eliza Margaretha01929182014-02-19 11:48:59 +00001package de.ids_mannheim.korap.index;
2
margaretha4f995582015-12-14 14:14:34 +01003import static org.junit.Assert.assertEquals;
4import static org.junit.Assert.assertNull;
5import static org.junit.Assert.assertTrue;
Nils Diewald68bb1f72014-01-07 14:07:05 +00006
margaretha4f995582015-12-14 14:14:34 +01007import java.io.IOException;
8import java.util.ArrayList;
9
Nils Diewald68bb1f72014-01-07 14:07:05 +000010import org.junit.Test;
Nils Diewald68bb1f72014-01-07 14:07:05 +000011import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
13
margaretha4f995582015-12-14 14:14:34 +010014import com.fasterxml.jackson.databind.JsonNode;
15import com.fasterxml.jackson.databind.ObjectMapper;
16
17import de.ids_mannheim.korap.Krill;
18import de.ids_mannheim.korap.KrillIndex;
19import de.ids_mannheim.korap.query.QueryBuilder;
20import de.ids_mannheim.korap.response.Match;
21import de.ids_mannheim.korap.response.Result;
Nils Diewaldff0f8742015-02-26 20:42:45 +000022import de.ids_mannheim.korap.response.match.MatchIdentifier;
23import de.ids_mannheim.korap.response.match.PosIdentifier;
Nils Diewald6e9eb4e2014-06-17 19:28:01 +000024import de.ids_mannheim.korap.util.QueryException;
25
Nils Diewald68bb1f72014-01-07 14:07:05 +000026@RunWith(JUnit4.class)
27public class TestMatchIdentifier {
28
Akron7d45e6b2015-06-26 17:23:42 +020029 ObjectMapper mapper = new ObjectMapper();
30
Akron13db6152016-02-19 14:08:38 +010031
Nils Diewald68bb1f72014-01-07 14:07:05 +000032 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +000033 public void identifierExample1 () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +000034 MatchIdentifier id = new MatchIdentifier("match-c1!d1-p4-20");
35 assertEquals(id.getCorpusID(), "c1");
36 assertEquals(id.getDocID(), "d1");
37 assertEquals(id.getStartPos(), 4);
38 assertEquals(id.getEndPos(), 20);
Nils Diewaldcde69082014-01-16 15:46:48 +000039
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 assertEquals(id.toString(), "match-c1!d1-p4-20");
41 id.addPos(10, 14, 2);
42 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14");
43 id.addPos(11, 12, 5);
44 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
45 // Ignore
46 id.addPos(11, 12, -8);
47 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
48 id.addPos(11, -12, 8);
49 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
50 id.addPos(-11, 12, 8);
51 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
Nils Diewaldcde69082014-01-16 15:46:48 +000052
Nils Diewaldbb33da22015-03-04 16:24:25 +000053 id = new MatchIdentifier("matc-c1!d1-p4-20");
54 assertNull(id.toString());
55 id = new MatchIdentifier("match-d1-p4-20");
56 assertNull(id.getCorpusID());
57 assertEquals(id.getDocID(), "d1");
58 id = new MatchIdentifier("match-p4-20");
59 assertNull(id.toString());
Nils Diewaldcde69082014-01-16 15:46:48 +000060
Nils Diewaldbb33da22015-03-04 16:24:25 +000061 id = new MatchIdentifier("match-c1!d1-p4-20");
62 assertEquals(id.toString(), "match-c1!d1-p4-20");
Nils Diewaldcde69082014-01-16 15:46:48 +000063
Nils Diewaldbb33da22015-03-04 16:24:25 +000064 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8");
65 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +000066
Nils Diewaldbb33da22015-03-04 16:24:25 +000067 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10");
68 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +000069
Eliza Margaretha6f989202016-10-14 21:48:29 +020070 id = new MatchIdentifier(
71 "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6");
Nils Diewaldbb33da22015-03-04 16:24:25 +000072 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4");
Nils Diewaldcde69082014-01-16 15:46:48 +000073
Nils Diewaldbb33da22015-03-04 16:24:25 +000074 id = new MatchIdentifier(
75 "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8");
76 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +000077
Nils Diewaldbb33da22015-03-04 16:24:25 +000078 id = new MatchIdentifier(
79 "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8(5)9--10");
Akron1f619572015-07-08 17:33:47 +020080 assertEquals(4, id.getStartPos());
81 assertEquals(20, id.getEndPos());
82 assertEquals("c1", id.getCorpusID());
83 assertEquals("d1", id.getDocID());
84 assertEquals(null, id.getTextSigle());
Nils Diewaldbb33da22015-03-04 16:24:25 +000085 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +000086
Akron40550172015-08-04 03:06:12 +020087 id = new MatchIdentifier("match-GOE!GOE_AGF.02286-p2105-2106");
Akron1f619572015-07-08 17:33:47 +020088 assertEquals(2105, id.getStartPos());
89 assertEquals(2106, id.getEndPos());
90 assertEquals(null, id.getCorpusID());
91 assertEquals(null, id.getDocID());
92 assertEquals("GOE_AGF.02286", id.getTextSigle());
93 assertEquals("match-GOE_AGF.02286-p2105-2106", id.toString());
Akronfc2625e2016-07-27 01:52:28 +020094
95 id = new MatchIdentifier("match-corpus-1/doc-1/text-1/p2105-2106");
96 assertEquals("match-corpus-1/doc-1/text-1-p2105-2106", id.toString());
97 assertEquals("corpus-1/doc-1/text-1", id.getTextSigle());
Akron1f619572015-07-08 17:33:47 +020098 };
Nils Diewaldbb33da22015-03-04 16:24:25 +000099
Akron40550172015-08-04 03:06:12 +0200100
Nils Diewaldcde69082014-01-16 15:46:48 +0000101 @Test
Nils Diewald345bdc02014-01-21 21:48:57 +0000102 public void posIdentifierExample1 () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000103 PosIdentifier id = new PosIdentifier();
104 id.setCorpusID("c1");
105 id.setDocID("d1");
106 id.setPos(8);
107 assertEquals(id.getCorpusID(), "c1");
108 assertEquals(id.getDocID(), "d1");
109 assertEquals(id.getPos(), 8);
110 assertEquals(id.toString(), "word-c1!d1-p8");
Nils Diewald345bdc02014-01-21 21:48:57 +0000111 };
112
Nils Diewaldbb33da22015-03-04 16:24:25 +0000113
Nils Diewald345bdc02014-01-21 21:48:57 +0000114 @Test
Nils Diewald68bb1f72014-01-07 14:07:05 +0000115 public void indexExample1 () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000116 KrillIndex ki = new KrillIndex();
117 ki.addDoc(createSimpleFieldDoc());
118 ki.commit();
Nils Diewald68bb1f72014-01-07 14:07:05 +0000119
Nils Diewaldbb33da22015-03-04 16:24:25 +0000120 QueryBuilder kq = new QueryBuilder("tokens");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200121 Krill ks = new Krill(
122 kq._(2, kq.seq(kq.seg("s:b")).append(kq._(kq.seg("s:a")))));
Nils Diewaldbb33da22015-03-04 16:24:25 +0000123 Result kr = ki.search(ks);
Nils Diewald68bb1f72014-01-07 14:07:05 +0000124
Nils Diewaldbb33da22015-03-04 16:24:25 +0000125 assertEquals("totalResults", kr.getTotalResults(), 1);
126 assertEquals("StartPos (0)", kr.getMatch(0).startPos, 7);
127 assertEquals("EndPos (0)", kr.getMatch(0).endPos, 9);
Nils Diewald68bb1f72014-01-07 14:07:05 +0000128
Nils Diewaldbb33da22015-03-04 16:24:25 +0000129 Match km = kr.getMatch(0);
Nils Diewald68bb1f72014-01-07 14:07:05 +0000130
Akronf05fde62016-08-03 23:46:17 +0200131 assertEquals("SnippetBrackets (0)", "... bcabca[[{2:b{1:a}}]]c",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000132 km.getSnippetBrackets());
133 assertEquals("ID (0)", "match-c1!d1-p7-9(2)7-8(1)8-8", km.getID());
Nils Diewald68bb1f72014-01-07 14:07:05 +0000134 };
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000135
Nils Diewaldbb33da22015-03-04 16:24:25 +0000136
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000137 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000138 public void indexExample2 () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000139 KrillIndex ki = new KrillIndex();
140 ki.addDoc(createSimpleFieldDoc());
141 ki.commit();
Nils Diewaldcde69082014-01-16 15:46:48 +0000142
Nils Diewaldbb33da22015-03-04 16:24:25 +0000143 Match km = ki.getMatch("match-c1!d1-p7-9(0)8-8(2)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +0000144
Nils Diewaldbb33da22015-03-04 16:24:25 +0000145 assertEquals("StartPos (0)", 7, km.getStartPos());
146 assertEquals("EndPos (0)", 9, km.getEndPos());
Nils Diewaldcde69082014-01-16 15:46:48 +0000147
Akronf05fde62016-08-03 23:46:17 +0200148 assertEquals("SnippetBrackets (0)", "... [[{2:b{a}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000149 km.getSnippetBrackets());
Nils Diewald1e5d5942014-05-20 13:29:53 +0000150
Nils Diewaldbb33da22015-03-04 16:24:25 +0000151 assertEquals("ID (0)", "match-c1!d1-p7-9(0)8-8(2)7-8", km.getID());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000152
Eliza Margaretha6f989202016-10-14 21:48:29 +0200153 km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f", "m",
154 false, false);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000155
Nils Diewaldbb33da22015-03-04 16:24:25 +0000156 assertEquals("SnippetBrackets (1)",
Eliza Margaretha6f989202016-10-14 21:48:29 +0200157 "... [[{f/m:acht:b}{f/m:neun:a}]] ...",
158 km.getSnippetBrackets());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000159
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000160
Akron7d45e6b2015-06-26 17:23:42 +0200161 km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f",
162 null, false, false);
Akron48937e92015-06-26 01:49:02 +0200163 assertEquals("SnippetBrackets (1b)",
Akronf05fde62016-08-03 23:46:17 +0200164 "... [[{f/m:acht:{f/y:eight:b}}{f/m:neun:{f/y:nine:a}}]] ...",
Akron48937e92015-06-26 01:49:02 +0200165 km.getSnippetBrackets());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000166
Eliza Margaretha6f989202016-10-14 21:48:29 +0200167 km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f", "m",
168 false, true);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000169
Nils Diewaldbb33da22015-03-04 16:24:25 +0000170 assertEquals("SnippetBrackets (2)",
Akronf05fde62016-08-03 23:46:17 +0200171 "... [[{2:{f/m:acht:b}{{f/m:neun:a}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000172 km.getSnippetBrackets());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000173
Eliza Margaretha6f989202016-10-14 21:48:29 +0200174 km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", "f", "m",
175 false, true);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000176
Nils Diewaldbb33da22015-03-04 16:24:25 +0000177 assertEquals("SnippetBrackets (3)",
Akronf05fde62016-08-03 23:46:17 +0200178 "... [[{2:{f/m:acht:b}{4:{f/m:neun:a}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000179 km.getSnippetBrackets());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000180
Nils Diewaldbb33da22015-03-04 16:24:25 +0000181 km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", "f",
182 null, false, true);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000183
Eliza Margaretha6f989202016-10-14 21:48:29 +0200184 assertEquals("SnippetBrackets (4)",
Akronf05fde62016-08-03 23:46:17 +0200185 "... [[{2:{f/m:acht:{f/y:eight:b}}{4:{f/m:neun:{f/y:nine:a}}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000186 km.getSnippetBrackets());
187
Eliza Margaretha6f989202016-10-14 21:48:29 +0200188 assertEquals("SnippetHTML (4)",
189 "<span class=\"context-left\">" + "<span class=\"more\">"
190 + "</span>" + "</span>" + "<span class=\"match\">"
191 + "<mark>" + "<mark class=\"class-2 level-0\">"
192 + "<span title=\"f/m:acht\">"
193 + "<span title=\"f/y:eight\">" + "b" + "</span>"
194 + "</span>" + "<mark class=\"class-4 level-1\">"
195 + "<span title=\"f/m:neun\">"
196 + "<span title=\"f/y:nine\">" + "a" + "</span>"
197 + "</span>" + "</mark>" + "</mark>" + "</mark>"
198 + "</span>" + "<span class=\"context-right\">"
199 + "<span class=\"more\">" + "</span>" + "</span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000200 km.getSnippetHTML());
Akron48937e92015-06-26 01:49:02 +0200201
Akron48937e92015-06-26 01:49:02 +0200202 JsonNode res = mapper.readTree(km.toJsonString());
203 assertEquals("tokens", res.at("/field").asText());
204 assertTrue(res.at("/startMore").asBoolean());
205 assertTrue(res.at("/endMore").asBoolean());
206 assertEquals("c1", res.at("/corpusID").asText());
207 assertEquals("d1", res.at("/docID").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200208 assertEquals("match-c1!d1-p7-9(4)8-8(2)7-8",
209 res.at("/matchID").asText());
Akron48937e92015-06-26 01:49:02 +0200210 assertTrue(res.at("/pubDate").isMissingNode());
Nils Diewald345bdc02014-01-21 21:48:57 +0000211 };
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000212
Nils Diewald345bdc02014-01-21 21:48:57 +0000213
214 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000215 public void indexExample3 () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000216 KrillIndex ki = new KrillIndex();
217 ki.addDoc(createSimpleFieldDoc());
218 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000219
Nils Diewaldbb33da22015-03-04 16:24:25 +0000220 Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens",
221 null, null, false, true);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000222
Nils Diewald345bdc02014-01-21 21:48:57 +0000223
Nils Diewaldbb33da22015-03-04 16:24:25 +0000224 assertEquals("SnippetHTML (1)", "<span class=\"context-left\">"
Akron08f4ceb2016-08-03 23:53:32 +0200225 + "<span class=\"more\">" + "</span>" + "</span>"
226 + "<span class=\"match\">" + "<mark>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000227 + "<mark class=\"class-2 level-0\">"
228 + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">"
229 + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">"
230 + "b" + "</span>" + "</span>" + "</span>" + "</span>"
231 + "<mark class=\"class-4 level-1\">"
232 + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">"
233 + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">"
234 + "a" + "</span>" + "</span>" + "</span>" + "</span>"
Akron08f4ceb2016-08-03 23:53:32 +0200235 + "</mark>" + "</mark>" + "</mark>" + "</span>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000236 + "<span class=\"context-right\">" + "<span class=\"more\">"
237 + "</span>" + "</span>", km.getSnippetHTML());
Nils Diewaldcde69082014-01-16 15:46:48 +0000238 };
239
Nils Diewaldbb33da22015-03-04 16:24:25 +0000240
Nils Diewald345bdc02014-01-21 21:48:57 +0000241 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000242 public void indexExample4 () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000243 KrillIndex ki = new KrillIndex();
244 ki.addDoc(createSimpleFieldDoc());
245 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000246
Nils Diewaldbb33da22015-03-04 16:24:25 +0000247 Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens",
248 null, null, false, false);
Nils Diewald345bdc02014-01-21 21:48:57 +0000249
250
Nils Diewaldbb33da22015-03-04 16:24:25 +0000251 assertEquals("SnippetHTML (1)", "<span class=\"context-left\">"
Akron08f4ceb2016-08-03 23:53:32 +0200252 + "<span class=\"more\">" + "</span>" + "</span>"
253 + "<span class=\"match\">" + "<mark>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000254 + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">"
255 + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">"
256 + "b" + "</span>" + "</span>" + "</span>" + "</span>"
257 + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">"
258 + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">"
259 + "a" + "</span>" + "</span>" + "</span>" + "</span>"
Akronf05fde62016-08-03 23:46:17 +0200260 + "</mark>" + "</span>" + "<span class=\"context-right\">"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000261 + "<span class=\"more\">" + "</span>" + "</span>",
262 km.getSnippetHTML());
Nils Diewald345bdc02014-01-21 21:48:57 +0000263 };
264
Akron40550172015-08-04 03:06:12 +0200265
Akron1f619572015-07-08 17:33:47 +0200266 @Test
267 public void indexNewStructure () throws IOException, QueryException {
268 KrillIndex ki = new KrillIndex();
269 ki.addDoc(getClass().getResourceAsStream("/goe/AGX-00002.json"), false);
270 ki.commit();
271
Eliza Margaretha6f989202016-10-14 21:48:29 +0200272 Match km = ki.getMatchInfo("match-GOE!GOE_AGX.00002-p210-211", "tokens",
273 true, (String) null, (String) null, true, true, true);
Akron1f619572015-07-08 17:33:47 +0200274
275 JsonNode res = mapper.readTree(km.toJsonString());
276 assertEquals("tokens", res.at("/field").asText());
277 assertEquals("GOE_AGX.00002", res.at("/textSigle").asText());
278 assertEquals("Goethe, Johann Wolfgang von", res.at("/author").asText());
Akron1ad99882016-09-29 18:48:46 +0200279
280 /*
281 km = ki.getMatchInfo(
Akronf9def5e2016-10-10 21:26:46 +0200282 "match-GOE!GOE_AGX.00002-p10-20",
283 "tokens", true, (String) null, (String) null, true, true, false);
Akron1ad99882016-09-29 18:48:46 +0200284 assertEquals("", km.toJsonString());
Akronf9def5e2016-10-10 21:26:46 +0200285 */
Akron1f619572015-07-08 17:33:47 +0200286 };
287
Nils Diewaldbb33da22015-03-04 16:24:25 +0000288
Nils Diewald345bdc02014-01-21 21:48:57 +0000289 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000290 public void indexExample5Spans () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000291 KrillIndex ki = new KrillIndex();
292 ki.addDoc(createSimpleFieldDoc());
293 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000294
Nils Diewaldbb33da22015-03-04 16:24:25 +0000295 Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens",
296 null, null, true, false);
Nils Diewald345bdc02014-01-21 21:48:57 +0000297
298
Eliza Margaretha6f989202016-10-14 21:48:29 +0200299 assertEquals("SnippetBrackets (1)",
Akronf05fde62016-08-03 23:46:17 +0200300 "... [[{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000301 km.getSnippetBrackets());
Nils Diewald345bdc02014-01-21 21:48:57 +0000302 };
303
Nils Diewaldbb33da22015-03-04 16:24:25 +0000304
Nils Diewald345bdc02014-01-21 21:48:57 +0000305 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000306 public void indexExample6Spans () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000307 KrillIndex ki = new KrillIndex();
308 ki.addDoc(createSimpleFieldDoc());
309 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000310
Nils Diewaldbb33da22015-03-04 16:24:25 +0000311 Match km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8", "tokens",
312 null, null, true, false);
Nils Diewald345bdc02014-01-21 21:48:57 +0000313
314
Eliza Margaretha6f989202016-10-14 21:48:29 +0200315 assertEquals("SnippetBrackets (1)",
Akronf05fde62016-08-03 23:46:17 +0200316 "... [[{x/tag:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]]",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000317 km.getSnippetBrackets());
Nils Diewald345bdc02014-01-21 21:48:57 +0000318 };
319
Nils Diewaldbb33da22015-03-04 16:24:25 +0000320
Nils Diewald345bdc02014-01-21 21:48:57 +0000321 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000322 public void indexExample7Spans () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000323 KrillIndex ki = new KrillIndex();
324 ki.addDoc(createSimpleFieldDoc());
325 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000326
Nils Diewaldbb33da22015-03-04 16:24:25 +0000327 Match km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8", "tokens",
328 null, null, true, true);
Nils Diewald345bdc02014-01-21 21:48:57 +0000329
330
Eliza Margaretha6f989202016-10-14 21:48:29 +0200331 assertEquals("SnippetBrackets (1)",
Akronf05fde62016-08-03 23:46:17 +0200332 "... [[{x/tag:{2:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{4:{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]]",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000333 km.getSnippetBrackets());
Nils Diewald345bdc02014-01-21 21:48:57 +0000334
Nils Diewaldbb33da22015-03-04 16:24:25 +0000335 assertEquals("SnippetHTML (1)", "<span class=\"context-left\">"
Akron08f4ceb2016-08-03 23:53:32 +0200336 + "<span class=\"more\">" + "</span>" + "</span>"
Eliza Margaretha6f989202016-10-14 21:48:29 +0200337 + "<span class=\"match\">" + "<mark>" + "<span title=\"x/tag\">"
338 + "<mark class=\"class-2 level-0\">"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000339 + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">"
340 + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">"
341 + "b" + "</span>" + "</span>" + "</span>" + "</span>"
342 + "<mark class=\"class-4 level-1\">"
343 + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">"
344 + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">"
345 + "a" + "</span>" + "</span>" + "</span>" + "</span>"
346 + "</mark>" + "</mark>" + "<span title=\"f/m:zehn\">"
347 + "<span title=\"f/y:ten\">" + "<span title=\"it/is:10\">"
348 + "<span title=\"x/o:zehntens\">" + "c" + "</span>" + "</span>"
Akron08f4ceb2016-08-03 23:53:32 +0200349 + "</span>" + "</span>" + "</span>" + "</mark>" + "</span>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000350 + "<span class=\"context-right\">" + "</span>",
351 km.getSnippetHTML());
Nils Diewald345bdc02014-01-21 21:48:57 +0000352 };
353
Nils Diewaldbb33da22015-03-04 16:24:25 +0000354
Nils Diewald345bdc02014-01-21 21:48:57 +0000355 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000356 public void indexExample6Relations () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000357 KrillIndex ki = new KrillIndex();
358 ki.addDoc(createSimpleFieldDoc());
359 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000360
Nils Diewaldbb33da22015-03-04 16:24:25 +0000361 Match km = ki.getMatchInfo("match-c1!d1-p0-5(4)8-8(2)7-8", "tokens",
362 "x", null, true, false);
Nils Diewald345bdc02014-01-21 21:48:57 +0000363
Eliza Margaretha6f989202016-10-14 21:48:29 +0200364 assertEquals("SnippetBrackets (1)",
Akron417eaa92017-01-13 18:00:15 +0100365 "[[{x/o:erstens:{x/rel:a>3:a}}{x/o:zweitens:b}{x/o:drittens:c}{#3:{x/o:viertens:a}}{x/o:fünftens:b}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000366 km.getSnippetBrackets());
Nils Diewald345bdc02014-01-21 21:48:57 +0000367
Akron417eaa92017-01-13 18:00:15 +0100368 assertEquals("SnippetHTML (1)", "<span class=\"context-left\">"
369 + "</span>" + "<span class=\"match\">"
370 + "<mark>"
371 + "<span title=\"x/o:erstens\">"
372 + "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" "
373 + "xlink:href=\"#word-c1!d1-p3\">"
374 + "a" + "</span>"
375 + "</span>"
376 + "<span title=\"x/o:zweitens\">" + "b" + "</span>"
377 + "<span title=\"x/o:drittens\">" + "c" + "</span>"
378 + "<span xml:id=\"word-c1!d1-p3\">"
379 + "<span title=\"x/o:viertens\">" + "a" + "</span>"
380 + "</span>"
381 + "<span title=\"x/o:fünftens\">" + "b" + "</span>"
382 + "</mark>"
383 + "</span>"
384 + "<span class=\"context-right\">"
385 + "<span class=\"more\">"
386 + "</span>"
387 + "</span>",
Akron08f4ceb2016-08-03 23:53:32 +0200388 km.getSnippetHTML());
Nils Diewald345bdc02014-01-21 21:48:57 +0000389
Nils Diewaldbb33da22015-03-04 16:24:25 +0000390 km = ki.getMatchInfo("match-c1!d1-p0-5(7)2-3(4)8-8(2)7-8", "tokens",
391 "x", null, true, true);
Nils Diewald345bdc02014-01-21 21:48:57 +0000392
Akron417eaa92017-01-13 18:00:15 +0100393 assertEquals("SnippetHTML (2)",
394 "<span class=\"context-left\">"
395 + "</span>" + "<span class=\"match\">"+"<mark>"
396 +"<span title=\"x/o:erstens\">"
397 +"<span xlink:title=\"x/rel:a\" " + "xlink:type=\"simple\" "
398 +"xlink:href=\"#word-c1!d1-p3\">a</span>"
399 +"</span>"
400 +"<span title=\"x/o:zweitens\">b</span>"
401 +"<mark class=\"class-7 level-0\">"
402 +"<span title=\"x/o:drittens\">c</span>"
403 +"<span xml:id=\"word-c1!d1-p3\">"
404 +"<span title=\"x/o:viertens\">a</span>"
405 +"</span>"
406 +"</mark>"
407 +"<span title=\"x/o:fünftens\">b</span>"
408 +"</mark>"
409 +"</span>"
410 +"<span class=\"context-right\">"
411 +"<span class=\"more\"></span>"
412 +"</span>",
413 km.getSnippetHTML());
Nils Diewald345bdc02014-01-21 21:48:57 +0000414 };
415
416
Nils Diewalda1118032014-02-13 20:50:48 +0000417 @Test
Eliza Margaretha6f989202016-10-14 21:48:29 +0200418 public void indexExample7SentenceExpansion ()
419 throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000420 KrillIndex ki = new KrillIndex();
421 ki.addDoc(createSimpleFieldDoc());
422 ki.addDoc(createSimpleFieldDoc2());
423 ki.addDoc(createSimpleFieldDoc3());
424 ki.addDoc(createSimpleFieldDoc4());
425 ki.commit();
426 Match km;
Nils Diewalda1118032014-02-13 20:50:48 +0000427
Nils Diewaldbb33da22015-03-04 16:24:25 +0000428 km = ki.getMatchInfo("match-c1!d1-p3-4", "tokens", null, null, false,
429 false);
Nils Diewalda1118032014-02-13 20:50:48 +0000430
Nils Diewaldbb33da22015-03-04 16:24:25 +0000431 assertEquals(
Akronf05fde62016-08-03 23:46:17 +0200432 "... [[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000433 km.getSnippetBrackets());
Nils Diewalda1118032014-02-13 20:50:48 +0000434
Nils Diewald1e5d5942014-05-20 13:29:53 +0000435
Nils Diewaldbb33da22015-03-04 16:24:25 +0000436 km = ki.getMatchInfo("match-c1!d1-p3-4", "tokens", null, null, false,
437 false, true); // extendToSentence
Nils Diewalda1118032014-02-13 20:50:48 +0000438
Akroncb1093a2016-07-28 16:27:59 +0200439 // This will
440 // [{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}]
Nils Diewaldbb33da22015-03-04 16:24:25 +0000441 assertEquals(
Akroncb1093a2016-07-28 16:27:59 +0200442 "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}]",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000443 km.getSnippetBrackets());
Akroncb1093a2016-07-28 16:27:59 +0200444
Akron08f4ceb2016-08-03 23:53:32 +0200445 assertEquals("<span class=\"context-left\"></span>"
446 + "<span class=\"match\">" + "<span title=\"f/m:drei\">"
447 + "<span title=\"f/y:three\">" + "<span title=\"it/is:3\">"
448 + "<span title=\"x/o:drittens\">c</span>" + "</span>"
Eliza Margaretha6f989202016-10-14 21:48:29 +0200449 + "</span>" + "</span>" + "<mark>" + "<span title=\"f/m:vier\">"
450 + "<span title=\"f/y:four\">" + "<span title=\"it/is:4\">"
Akron08f4ceb2016-08-03 23:53:32 +0200451 + "<span title=\"x/o:viertens\">a</span>" + "</span>"
452 + "</span>" + "</span>" + "</mark>"
453 + "<span title=\"f/m:fuenf\">" + "<span title=\"f/y:five\">"
454 + "<span title=\"it/is:5\">"
455 + "<span title=\"x/o:fünftens\">b</span>" + "</span>"
456 + "</span>" + "</span>" + "</span>"
457 + "<span class=\"context-right\"></span>", km.getSnippetHTML());
Akroncb1093a2016-07-28 16:27:59 +0200458
459
460
Akron43cea662016-02-15 23:43:59 +0100461 /*
Nils Diewaldbb33da22015-03-04 16:24:25 +0000462 km = ki.getMatchInfo("match-c1!d3-p3-4", "tokens", null, null, false,
463 false, true); // extendToSentence
Eliza Margaretha6f989202016-10-14 21:48:29 +0200464
Nils Diewaldbb33da22015-03-04 16:24:25 +0000465 assertEquals(
466 "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:cc}}}} {f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:aa}}}} {f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:bb}}}}]",
467 km.getSnippetBrackets());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200468
469
Nils Diewaldbb33da22015-03-04 16:24:25 +0000470 km = ki.getMatchInfo("match-c1!d4-p4-6", "tokens", null, null, false,
471 false, true); // extendToSentence
472 assertEquals(
473 "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}{f/m:sechs:{f/y:six:{it/is:6:{x/o:sechstens:c}}}}{f/m:sieben:{f/y:seven:{it/is:7:{x/o:siebtens:a}}}}]",
474 km.getSnippetBrackets());
Akron43cea662016-02-15 23:43:59 +0100475 */
Nils Diewalda1118032014-02-13 20:50:48 +0000476 };
Nils Diewald345bdc02014-01-21 21:48:57 +0000477
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000478
479 @Test
Eliza Margaretha6f989202016-10-14 21:48:29 +0200480 public void indexExample7Dependencies ()
481 throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000482 KrillIndex ki = new KrillIndex();
483 ki.addDoc(createSimpleFieldDoc2());
484 ki.commit();
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000485
Nils Diewaldbb33da22015-03-04 16:24:25 +0000486 Match km = ki.getMatchInfo("match-c1!d1-p0-4", "tokens", null, null,
Akron417eaa92017-01-13 18:00:15 +0100487 true, true);
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000488
Eliza Margaretha6f989202016-10-14 21:48:29 +0200489 assertEquals("SnippetHTML (2)", "<span class=\"context-left\">"
Akron417eaa92017-01-13 18:00:15 +0100490 + "</span>" + "<span class=\"match\">" + "<mark>"
491 + "<span title=\"f/m:eins\">" + "<span title=\"f/y:one\">"
492 + "<span title=\"it/is:1\">"
493 + "<span title=\"x/o:erstens\">"
494 + "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">"
495 + "a</span>" + "</span>" + "</span>"
496 + "</span>" + "</span>"
497 + "<span title=\"f/m:zwei\">" + "<span title=\"f/y:two\">"
498 + "<span title=\"it/is:2\">"
499 + "<span title=\"x/o:zweitens\">"
500 + "<span xlink:title=\"x/rel:b\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">"
501 + "b</span>" + "</span>"
502 + "</span>" + "</span>" + "</span>"
503 + "<span title=\"f/m:drei\">" + "<span title=\"f/y:three\">"
504 + "<span title=\"it/is:3\">"
505 + "<span title=\"x/o:drittens\">c</span>" + "</span>"
506 + "</span>" + "</span>" + "<span xml:id=\"word-c1!d1-p3\">"
507 + "<span title=\"f/m:vier\">" + "<span title=\"f/y:four\">"
508 + "<span title=\"it/is:4\">"
509 + "<span title=\"x/o:viertens\">a</span>" + "</span>"
510 + "</span>" + "</span>" + "</span>" + "</mark>" + "</span>"
511 + "<span class=\"context-right\">" + "<span class=\"more\">"
512 + "</span>" + "</span>", km.getSnippetHTML());
Nils Diewaldcde69082014-01-16 15:46:48 +0000513 };
Nils Diewald50389b02014-04-11 16:27:52 +0000514
Nils Diewaldbb33da22015-03-04 16:24:25 +0000515
516 @Test
Eliza Margaretha6f989202016-10-14 21:48:29 +0200517 public void indexExampleMultipleFoundries ()
518 throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000519 KrillIndex ki = new KrillIndex();
520 ki.addDoc(createSimpleFieldDoc4());
521 ki.commit();
522
523 Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "f", "m",
524 false, false);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200525 assertEquals("f:m info", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200526 "... [[{f/m:vier:a}{f/m:fuenf:b}{f/m:sechs:c}{f/m:sieben:a}{f/m:acht:b}{f/m:neun:a}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000527
528 km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "f", null, false,
529 false);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200530 assertEquals("f info", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200531 "... [[{f/m:vier:{f/y:four:a}}{f/m:fuenf:{f/y:five:b}}{f/m:sechs:{f/y:six:c}}{f/m:sieben:{f/y:seven:a}}{f/m:acht:{f/y:eight:b}}{f/m:neun:{f/y:nine:a}}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000532
533
534 km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", null, null, false,
535 false);
536 assertEquals("all info", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200537 "... [[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000538
539 ArrayList<String> foundryList = new ArrayList<>(2);
540 foundryList.add("f");
541 foundryList.add("x");
542
543 km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList,
544 (ArrayList<String>) null, false, false, false);
545 assertEquals("f|x info", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200546 "... [[{f/m:vier:{f/y:four:{x/o:viertens:a}}}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000547
548 foundryList.clear();
549 foundryList.add("y");
550 foundryList.add("x");
551
552 km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList,
553 (ArrayList<String>) null, false, false, false);
554 assertEquals("y|x info", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200555 "... [[{x/o:viertens:a}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000556
557
558 foundryList.clear();
559 foundryList.add("f");
560 foundryList.add("it");
561
562 ArrayList<String> layerList = new ArrayList<>(2);
563 layerList.add("is");
564
565 km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList,
566 layerList, false, false, false);
567 assertEquals("f|it/is", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200568 "... [[{it/is:4:a}]] ...");
Nils Diewald50389b02014-04-11 16:27:52 +0000569 };
Nils Diewald1e5d5942014-05-20 13:29:53 +0000570
Nils Diewaldbb33da22015-03-04 16:24:25 +0000571
Akron7d45e6b2015-06-26 17:23:42 +0200572 @Test
Eliza Margaretha6f989202016-10-14 21:48:29 +0200573 public void indexExampleFailingFoundry ()
574 throws IOException, QueryException {
Akron7d45e6b2015-06-26 17:23:42 +0200575 KrillIndex ki = new KrillIndex();
576 ki.addDoc(createSimpleFieldDoc4());
577 ki.commit();
578
579 Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "*", "m",
580 false, false);
581 JsonNode res = mapper.readTree(km.toJsonString());
582 assertEquals("c1", res.at("/corpusID").asText());
583 assertEquals("d4", res.at("/docID").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200584 assertEquals("Invalid foundry requested",
585 res.at("/errors/0/1").asText());
Akron7d45e6b2015-06-26 17:23:42 +0200586 };
587
588
589 @Test
Akron13db6152016-02-19 14:08:38 +0100590 public void indexFailingMatchID () throws IOException, QueryException {
Akron8abefa12016-02-13 05:35:42 +0100591 KrillIndex ki = new KrillIndex();
Akron13db6152016-02-19 14:08:38 +0100592 Match km = ki.getMatchInfo(
593 "match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01", "tokens",
594 "*", "m", false, false);
Akron8abefa12016-02-13 05:35:42 +0100595 JsonNode res = mapper.readTree(km.toJsonString());
596 assertEquals("730", res.at("/errors/0/0").asText());
597 };
598
Akron13db6152016-02-19 14:08:38 +0100599
Akron8abefa12016-02-13 05:35:42 +0100600 @Test
Akron7d45e6b2015-06-26 17:23:42 +0200601 public void indexExampleNullInfo () throws IOException, QueryException {
602 KrillIndex ki = new KrillIndex();
603 ki.addDoc(createSimpleFieldDoc4());
604 ki.commit();
605 Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", null, null,
606 false, false);
607 JsonNode res = mapper.readTree(km.toJsonString());
608 assertEquals("tokens", res.at("/field").asText());
609 assertTrue(res.at("/startMore").asBoolean());
610 assertTrue(res.at("/endMore").asBoolean());
611 assertEquals("c1", res.at("/corpusID").asText());
612 assertEquals("d4", res.at("/docID").asText());
613 assertEquals(
Akronf05fde62016-08-03 23:46:17 +0200614 "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\"><mark><span title=\"f/m:vier\"><span title=\"f/y:four\"><span title=\"it/is:4\"><span title=\"x/o:viertens\">a</span></span></span></span><span title=\"f/m:fuenf\"><span title=\"f/y:five\"><span title=\"it/is:5\"><span title=\"x/o:fünftens\">b</span></span></span></span><span title=\"f/m:sechs\"><span title=\"f/y:six\"><span title=\"it/is:6\"><span title=\"x/o:sechstens\">c</span></span></span></span><span title=\"f/m:sieben\"><span title=\"f/y:seven\"><span title=\"it/is:7\"><span title=\"x/o:siebtens\">a</span></span></span></span><span title=\"f/m:acht\"><span title=\"f/y:eight\"><span title=\"it/is:8\"><span title=\"x/o:achtens\">b</span></span></span></span><span title=\"f/m:neun\"><span title=\"f/y:nine\"><span title=\"it/is:9\"><span title=\"x/o:neuntens\">a</span></span></span></span></mark></span><span class=\"context-right\"><span class=\"more\"></span></span>",
Akron7d45e6b2015-06-26 17:23:42 +0200615 res.at("/snippet").asText());
616 assertEquals("match-c1!d4-p3-9", res.at("/matchID").asText());
617 assertTrue(res.at("/pubDate").isMissingNode());
618 };
619
620
Akronb35261a2016-02-10 20:24:24 +0100621 @Test
622 public void indexAttributeInfo () throws IOException, QueryException {
623 KrillIndex ki = new KrillIndex();
624 ki.addDoc(createAttributeFieldDoc());
625 ki.commit();
Akron13db6152016-02-19 14:08:38 +0100626 Match km = ki.getMatchInfo("match-ca1!da1-p7-10", "tokens", null, null,
627 false, false);
Akronb35261a2016-02-10 20:24:24 +0100628 JsonNode res = mapper.readTree(km.toJsonString());
629 assertEquals("tokens", res.at("/field").asText());
630 assertTrue(res.at("/startMore").asBoolean());
631 assertTrue(res.at("/endMore").asBoolean());
632 assertEquals("ca1", res.at("/corpusID").asText());
633 assertEquals("da1", res.at("/docID").asText());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200634 assertEquals("<span class=\"context-left\">" + "<span class=\"more\">"
635 + "</span>" + "</span>" + "<span class=\"match\"><mark>" +
Akron13db6152016-02-19 14:08:38 +0100636 // "<span title=\"@:x/s:key:value\">"+
Eliza Margaretha6f989202016-10-14 21:48:29 +0200637 "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">"
Akron13db6152016-02-19 14:08:38 +0100638 + "<span title=\"it/is:8\">"
Eliza Margaretha6f989202016-10-14 21:48:29 +0200639 + "<span title=\"x/o:achtens\">b</span>" +
Akron13db6152016-02-19 14:08:38 +0100640 // "</span>"+
641 "</span>" + "</span>" + "</span>" + "<span title=\"f/m:neun\">"
642 + "<span title=\"f/y:nine\">" + "<span title=\"it/is:9\">"
643 + "<span title=\"x/o:neuntens\">a</span>" + "</span>"
644 + "</span>" + "</span>" + "<span title=\"f/m:zehn\">"
645 + "<span title=\"f/y:ten\">" + "<span title=\"it/is:10\">"
646 + "<span title=\"x/o:zehntens\">c</span>" + "</span>"
Akron08f4ceb2016-08-03 23:53:32 +0200647 + "</span>" + "</span>" + "</mark>" + "</span>"
Akron13db6152016-02-19 14:08:38 +0100648 + "<span class=\"context-right\">" + "</span>",
649 res.at("/snippet").asText());
Akronb35261a2016-02-10 20:24:24 +0100650 };
651
Akron394607a2017-05-29 13:27:37 +0200652 @Test
653 public void indexWithFieldInfo () throws IOException, QueryException {
654 KrillIndex ki = new KrillIndex();
655 ki.addDoc(createSimpleFieldDoc());
656 ki.commit();
657
658 Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens",
659 null, null, false, false);
660 assertEquals(km.getAvailability(), "CC-BY-SA");
661 };
662
Akronb35261a2016-02-10 20:24:24 +0100663
Nils Diewaldbb33da22015-03-04 16:24:25 +0000664 private FieldDocument createSimpleFieldDoc () {
665 FieldDocument fd = new FieldDocument();
666 fd.addString("corpusID", "c1");
667 fd.addString("ID", "d1");
Akron394607a2017-05-29 13:27:37 +0200668 fd.addString("availability", "CC-BY-SA");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200669 fd.addTV("tokens", "abcabcabac",
Akron5f044032015-12-18 00:35:38 +0100670 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]"
margaretha4f995582015-12-14 14:14:34 +0100671 + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]"
Akron43cea662016-02-15 23:43:59 +0100672 + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]"
Akron5f044032015-12-18 00:35:38 +0100673 + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +0100674 + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
675 + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]"
676 + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
Akron5f044032015-12-18 00:35:38 +0100677 + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]"
margaretha4f995582015-12-14 14:14:34 +0100678 + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
679 + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000680 return fd;
Nils Diewald1e5d5942014-05-20 13:29:53 +0000681 };
Nils Diewald84934372014-05-20 13:48:18 +0000682
Nils Diewaldbb33da22015-03-04 16:24:25 +0000683
684 private FieldDocument createSimpleFieldDoc2 () {
685 FieldDocument fd = new FieldDocument();
686 fd.addString("corpusID", "c1");
687 fd.addString("ID", "d1");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200688 fd.addTV("tokens", "abcabcabac",
Akron5f044032015-12-18 00:35:38 +0100689 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]"
690 + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|>:x/rel:b$<b>32<i>4<s>0<s>0<s>0|_1$<i>1<i>2]"
Akron43cea662016-02-15 23:43:59 +0100691 + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]"
Akron5f044032015-12-18 00:35:38 +0100692 + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +0100693 + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
694 + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]"
695 + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
Akron5f044032015-12-18 00:35:38 +0100696 + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]"
margaretha4f995582015-12-14 14:14:34 +0100697 + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
698 + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000699 return fd;
700 };
701
702
703 private FieldDocument createSimpleFieldDoc3 () {
704 FieldDocument fd = new FieldDocument();
705 fd.addString("corpusID", "c1");
706 fd.addString("ID", "d3");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200707 fd.addTV("tokens", "aa bb cc aa bb cc aa bb aa cc ",
Akron5f044032015-12-18 00:35:38 +0100708 "[(0-2)s:aa|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>2|-:t$<i>10]"
margaretha4f995582015-12-14 14:14:34 +0100709 + "[(3-5)s:bb|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>3<i>5]"
Akron43cea662016-02-15 23:43:59 +0100710 + "[(6-8)s:cc|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>6<i>8|<>:base/s:s$<b>64<i>6<i>14<i>5]"
Akron5f044032015-12-18 00:35:38 +0100711 + "[(9-11)s:aa|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>9<i>11]"
margaretha4f995582015-12-14 14:14:34 +0100712 + "[(12-14)s:bb|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>12<i>14]"
713 + "[(15-17)s:cc|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>15<i>17]"
714 + "[(18-20)s:aa|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>18<i>20]"
Akron5f044032015-12-18 00:35:38 +0100715 + "[(21-23)s:bb|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>21<i>23]"
margaretha4f995582015-12-14 14:14:34 +0100716 + "[(24-26)s:aa|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>24<i>26]"
717 + "[(27-29)s:cc|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>27<i>29]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000718 return fd;
719 };
720
721
722 private FieldDocument createSimpleFieldDoc4 () {
723 FieldDocument fd = new FieldDocument();
724 fd.addString("corpusID", "c1");
725 fd.addString("ID", "d4");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200726 fd.addTV("tokens", "abcabcabac",
Akron5f044032015-12-18 00:35:38 +0100727 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]"
margaretha4f995582015-12-14 14:14:34 +0100728 + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]"
Akron43cea662016-02-15 23:43:59 +0100729 + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]"
Akron5f044032015-12-18 00:35:38 +0100730 + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +0100731 + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
Akron43cea662016-02-15 23:43:59 +0100732 + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6|<>:base/s:s$<b>64<i>5<i>7<i>7]"
margaretha4f995582015-12-14 14:14:34 +0100733 + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
Akron5f044032015-12-18 00:35:38 +0100734 + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]"
margaretha4f995582015-12-14 14:14:34 +0100735 + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
736 + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000737 return fd;
Nils Diewald84934372014-05-20 13:48:18 +0000738 };
739
Akron13db6152016-02-19 14:08:38 +0100740
Akronb35261a2016-02-10 20:24:24 +0100741 /*
742 Check for terms|spans|rels ...
743 */
744 private FieldDocument createAttributeFieldDoc () {
745 FieldDocument fd = new FieldDocument();
746 fd.addString("corpusID", "ca1");
747 fd.addString("ID", "da1");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200748 fd.addTV("tokens", "abcabcabac",
Akronb35261a2016-02-10 20:24:24 +0100749 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|_0$<i>0<i>1|-:t$<i>10]"
750 + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]"
Akron43cea662016-02-15 23:43:59 +0100751 + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]"
Akronb35261a2016-02-10 20:24:24 +0100752 + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|_3$<i>3<i>4]"
753 + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
754 + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]"
755 + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
756 + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/s:tag$<b>64<i>7<i>10<i>10<b>0<s>1|@:x/s:key:value$<b>17<i>10<s>1|_7$<i>7<i>8]"
757 + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
758 + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
759 return fd;
760 };
761
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000762};