blob: ba1354407438e81423eda9f5a13a0183a9619159 [file] [log] [blame]
Eliza Margaretha01929182014-02-19 11:48:59 +00001package de.ids_mannheim.korap.index;
2
margaretha4f995582015-12-14 14:14:34 +01003import static org.junit.Assert.assertEquals;
4import static org.junit.Assert.assertNull;
5import static org.junit.Assert.assertTrue;
Nils Diewald68bb1f72014-01-07 14:07:05 +00006
margaretha4f995582015-12-14 14:14:34 +01007import java.io.IOException;
8import java.util.ArrayList;
9
Nils Diewald68bb1f72014-01-07 14:07:05 +000010import org.junit.Test;
Nils Diewald68bb1f72014-01-07 14:07:05 +000011import org.junit.runner.RunWith;
12import org.junit.runners.JUnit4;
13
margaretha4f995582015-12-14 14:14:34 +010014import com.fasterxml.jackson.databind.JsonNode;
15import com.fasterxml.jackson.databind.ObjectMapper;
16
17import de.ids_mannheim.korap.Krill;
18import de.ids_mannheim.korap.KrillIndex;
19import de.ids_mannheim.korap.query.QueryBuilder;
20import de.ids_mannheim.korap.response.Match;
21import de.ids_mannheim.korap.response.Result;
Nils Diewaldff0f8742015-02-26 20:42:45 +000022import de.ids_mannheim.korap.response.match.MatchIdentifier;
23import de.ids_mannheim.korap.response.match.PosIdentifier;
Nils Diewald6e9eb4e2014-06-17 19:28:01 +000024import de.ids_mannheim.korap.util.QueryException;
25
Nils Diewald68bb1f72014-01-07 14:07:05 +000026@RunWith(JUnit4.class)
27public class TestMatchIdentifier {
28
Akron7d45e6b2015-06-26 17:23:42 +020029 ObjectMapper mapper = new ObjectMapper();
30
Akron13db6152016-02-19 14:08:38 +010031
Nils Diewald68bb1f72014-01-07 14:07:05 +000032 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +000033 public void identifierExample1 () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +000034 MatchIdentifier id = new MatchIdentifier("match-c1!d1-p4-20");
35 assertEquals(id.getCorpusID(), "c1");
36 assertEquals(id.getDocID(), "d1");
37 assertEquals(id.getStartPos(), 4);
38 assertEquals(id.getEndPos(), 20);
Nils Diewaldcde69082014-01-16 15:46:48 +000039
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 assertEquals(id.toString(), "match-c1!d1-p4-20");
41 id.addPos(10, 14, 2);
42 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14");
43 id.addPos(11, 12, 5);
44 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
45 // Ignore
46 id.addPos(11, 12, -8);
47 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
48 id.addPos(11, -12, 8);
49 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
50 id.addPos(-11, 12, 8);
51 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
Nils Diewaldcde69082014-01-16 15:46:48 +000052
Nils Diewaldbb33da22015-03-04 16:24:25 +000053 id = new MatchIdentifier("matc-c1!d1-p4-20");
54 assertNull(id.toString());
55 id = new MatchIdentifier("match-d1-p4-20");
56 assertNull(id.getCorpusID());
57 assertEquals(id.getDocID(), "d1");
58 id = new MatchIdentifier("match-p4-20");
59 assertNull(id.toString());
Nils Diewaldcde69082014-01-16 15:46:48 +000060
Nils Diewaldbb33da22015-03-04 16:24:25 +000061 id = new MatchIdentifier("match-c1!d1-p4-20");
62 assertEquals(id.toString(), "match-c1!d1-p4-20");
Nils Diewaldcde69082014-01-16 15:46:48 +000063
Nils Diewaldbb33da22015-03-04 16:24:25 +000064 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8");
65 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +000066
Nils Diewaldbb33da22015-03-04 16:24:25 +000067 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10");
68 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +000069
Nils Diewaldbb33da22015-03-04 16:24:25 +000070 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6");
71 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4");
Nils Diewaldcde69082014-01-16 15:46:48 +000072
Nils Diewaldbb33da22015-03-04 16:24:25 +000073 id = new MatchIdentifier(
74 "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8");
75 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +000076
Nils Diewaldbb33da22015-03-04 16:24:25 +000077 id = new MatchIdentifier(
78 "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8(5)9--10");
Akron1f619572015-07-08 17:33:47 +020079 assertEquals(4, id.getStartPos());
80 assertEquals(20, id.getEndPos());
81 assertEquals("c1", id.getCorpusID());
82 assertEquals("d1", id.getDocID());
83 assertEquals(null, id.getTextSigle());
Nils Diewaldbb33da22015-03-04 16:24:25 +000084 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +000085
Akron40550172015-08-04 03:06:12 +020086 id = new MatchIdentifier("match-GOE!GOE_AGF.02286-p2105-2106");
Akron1f619572015-07-08 17:33:47 +020087 assertEquals(2105, id.getStartPos());
88 assertEquals(2106, id.getEndPos());
89 assertEquals(null, id.getCorpusID());
90 assertEquals(null, id.getDocID());
91 assertEquals("GOE_AGF.02286", id.getTextSigle());
92 assertEquals("match-GOE_AGF.02286-p2105-2106", id.toString());
Akronfc2625e2016-07-27 01:52:28 +020093
94 id = new MatchIdentifier("match-corpus-1/doc-1/text-1/p2105-2106");
95 assertEquals("match-corpus-1/doc-1/text-1-p2105-2106", id.toString());
96 assertEquals("corpus-1/doc-1/text-1", id.getTextSigle());
Akron1f619572015-07-08 17:33:47 +020097 };
Nils Diewaldbb33da22015-03-04 16:24:25 +000098
Akron40550172015-08-04 03:06:12 +020099
Nils Diewaldcde69082014-01-16 15:46:48 +0000100 @Test
Nils Diewald345bdc02014-01-21 21:48:57 +0000101 public void posIdentifierExample1 () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000102 PosIdentifier id = new PosIdentifier();
103 id.setCorpusID("c1");
104 id.setDocID("d1");
105 id.setPos(8);
106 assertEquals(id.getCorpusID(), "c1");
107 assertEquals(id.getDocID(), "d1");
108 assertEquals(id.getPos(), 8);
109 assertEquals(id.toString(), "word-c1!d1-p8");
Nils Diewald345bdc02014-01-21 21:48:57 +0000110 };
111
Nils Diewaldbb33da22015-03-04 16:24:25 +0000112
Nils Diewald345bdc02014-01-21 21:48:57 +0000113 @Test
Nils Diewald68bb1f72014-01-07 14:07:05 +0000114 public void indexExample1 () throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000115 KrillIndex ki = new KrillIndex();
116 ki.addDoc(createSimpleFieldDoc());
117 ki.commit();
Nils Diewald68bb1f72014-01-07 14:07:05 +0000118
Nils Diewaldbb33da22015-03-04 16:24:25 +0000119 QueryBuilder kq = new QueryBuilder("tokens");
120 Krill ks = new Krill(kq._(2,
121 kq.seq(kq.seg("s:b")).append(kq._(kq.seg("s:a")))));
122 Result kr = ki.search(ks);
Nils Diewald68bb1f72014-01-07 14:07:05 +0000123
Nils Diewaldbb33da22015-03-04 16:24:25 +0000124 assertEquals("totalResults", kr.getTotalResults(), 1);
125 assertEquals("StartPos (0)", kr.getMatch(0).startPos, 7);
126 assertEquals("EndPos (0)", kr.getMatch(0).endPos, 9);
Nils Diewald68bb1f72014-01-07 14:07:05 +0000127
Nils Diewaldbb33da22015-03-04 16:24:25 +0000128 Match km = kr.getMatch(0);
Nils Diewald68bb1f72014-01-07 14:07:05 +0000129
Akronf05fde62016-08-03 23:46:17 +0200130 assertEquals("SnippetBrackets (0)", "... bcabca[[{2:b{1:a}}]]c",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000131 km.getSnippetBrackets());
132 assertEquals("ID (0)", "match-c1!d1-p7-9(2)7-8(1)8-8", km.getID());
Nils Diewald68bb1f72014-01-07 14:07:05 +0000133 };
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000134
Nils Diewaldbb33da22015-03-04 16:24:25 +0000135
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000136 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000137 public void indexExample2 () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000138 KrillIndex ki = new KrillIndex();
139 ki.addDoc(createSimpleFieldDoc());
140 ki.commit();
Nils Diewaldcde69082014-01-16 15:46:48 +0000141
Nils Diewaldbb33da22015-03-04 16:24:25 +0000142 Match km = ki.getMatch("match-c1!d1-p7-9(0)8-8(2)7-8");
Nils Diewaldcde69082014-01-16 15:46:48 +0000143
Nils Diewaldbb33da22015-03-04 16:24:25 +0000144 assertEquals("StartPos (0)", 7, km.getStartPos());
145 assertEquals("EndPos (0)", 9, km.getEndPos());
Nils Diewaldcde69082014-01-16 15:46:48 +0000146
Akronf05fde62016-08-03 23:46:17 +0200147 assertEquals("SnippetBrackets (0)", "... [[{2:b{a}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000148 km.getSnippetBrackets());
Nils Diewald1e5d5942014-05-20 13:29:53 +0000149
Nils Diewaldbb33da22015-03-04 16:24:25 +0000150 assertEquals("ID (0)", "match-c1!d1-p7-9(0)8-8(2)7-8", km.getID());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000151
Nils Diewaldbb33da22015-03-04 16:24:25 +0000152 km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f",
153 "m", false, false);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000154
Nils Diewaldbb33da22015-03-04 16:24:25 +0000155 assertEquals("SnippetBrackets (1)",
Akronf05fde62016-08-03 23:46:17 +0200156 "... [[{f/m:acht:b}{f/m:neun:a}]] ...", km.getSnippetBrackets());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000157
Nils Diewaldd0d6feb2014-02-26 18:51:08 +0000158
Akron7d45e6b2015-06-26 17:23:42 +0200159 km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f",
160 null, false, false);
Akron48937e92015-06-26 01:49:02 +0200161 assertEquals("SnippetBrackets (1b)",
Akronf05fde62016-08-03 23:46:17 +0200162 "... [[{f/m:acht:{f/y:eight:b}}{f/m:neun:{f/y:nine:a}}]] ...",
Akron48937e92015-06-26 01:49:02 +0200163 km.getSnippetBrackets());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000164
Nils Diewaldbb33da22015-03-04 16:24:25 +0000165 km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f",
166 "m", false, true);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000167
Nils Diewaldbb33da22015-03-04 16:24:25 +0000168 assertEquals("SnippetBrackets (2)",
Akronf05fde62016-08-03 23:46:17 +0200169 "... [[{2:{f/m:acht:b}{{f/m:neun:a}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000170 km.getSnippetBrackets());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000171
Nils Diewaldbb33da22015-03-04 16:24:25 +0000172 km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", "f",
173 "m", false, true);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000174
Nils Diewaldbb33da22015-03-04 16:24:25 +0000175 assertEquals("SnippetBrackets (3)",
Akronf05fde62016-08-03 23:46:17 +0200176 "... [[{2:{f/m:acht:b}{4:{f/m:neun:a}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000177 km.getSnippetBrackets());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000178
Nils Diewaldbb33da22015-03-04 16:24:25 +0000179 km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", "f",
180 null, false, true);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000181
Nils Diewaldbb33da22015-03-04 16:24:25 +0000182 assertEquals(
183 "SnippetBrackets (4)",
Akronf05fde62016-08-03 23:46:17 +0200184 "... [[{2:{f/m:acht:{f/y:eight:b}}{4:{f/m:neun:{f/y:nine:a}}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000185 km.getSnippetBrackets());
186
187 assertEquals("SnippetHTML (4)", "<span class=\"context-left\">"
Akronf05fde62016-08-03 23:46:17 +0200188 + "<span class=\"more\">" + "</span>" + "</span>" +
189 "<span class=\"match\">" + "<mark>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000190 + "<mark class=\"class-2 level-0\">"
191 + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">"
192 + "b" + "</span>" + "</span>"
193 + "<mark class=\"class-4 level-1\">"
194 + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">"
195 + "a" + "</span>" + "</span>" + "</mark>" + "</mark>"
Akronf05fde62016-08-03 23:46:17 +0200196 + "</mark>" + "</span>" + "<span class=\"context-right\">"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000197 + "<span class=\"more\">" + "</span>" + "</span>",
198 km.getSnippetHTML());
Akron48937e92015-06-26 01:49:02 +0200199
Akron48937e92015-06-26 01:49:02 +0200200 JsonNode res = mapper.readTree(km.toJsonString());
201 assertEquals("tokens", res.at("/field").asText());
202 assertTrue(res.at("/startMore").asBoolean());
203 assertTrue(res.at("/endMore").asBoolean());
204 assertEquals("c1", res.at("/corpusID").asText());
205 assertEquals("d1", res.at("/docID").asText());
Akron7d45e6b2015-06-26 17:23:42 +0200206 assertEquals("match-c1!d1-p7-9(4)8-8(2)7-8", res.at("/matchID")
207 .asText());
Akron48937e92015-06-26 01:49:02 +0200208 assertTrue(res.at("/pubDate").isMissingNode());
Nils Diewald345bdc02014-01-21 21:48:57 +0000209 };
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000210
Nils Diewald345bdc02014-01-21 21:48:57 +0000211
212 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000213 public void indexExample3 () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000214 KrillIndex ki = new KrillIndex();
215 ki.addDoc(createSimpleFieldDoc());
216 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000217
Nils Diewaldbb33da22015-03-04 16:24:25 +0000218 Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens",
219 null, null, false, true);
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000220
Nils Diewald345bdc02014-01-21 21:48:57 +0000221
Nils Diewaldbb33da22015-03-04 16:24:25 +0000222 assertEquals("SnippetHTML (1)", "<span class=\"context-left\">"
Akronf05fde62016-08-03 23:46:17 +0200223 + "<span class=\"more\">" + "</span>" + "</span>" + "<span class=\"match\">" + "<mark>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000224 + "<mark class=\"class-2 level-0\">"
225 + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">"
226 + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">"
227 + "b" + "</span>" + "</span>" + "</span>" + "</span>"
228 + "<mark class=\"class-4 level-1\">"
229 + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">"
230 + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">"
231 + "a" + "</span>" + "</span>" + "</span>" + "</span>"
Akronf05fde62016-08-03 23:46:17 +0200232 + "</mark>" + "</mark>" + "</mark>" + "</span>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000233 + "<span class=\"context-right\">" + "<span class=\"more\">"
234 + "</span>" + "</span>", km.getSnippetHTML());
Nils Diewaldcde69082014-01-16 15:46:48 +0000235 };
236
Nils Diewaldbb33da22015-03-04 16:24:25 +0000237
Nils Diewald345bdc02014-01-21 21:48:57 +0000238 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000239 public void indexExample4 () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000240 KrillIndex ki = new KrillIndex();
241 ki.addDoc(createSimpleFieldDoc());
242 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000243
Nils Diewaldbb33da22015-03-04 16:24:25 +0000244 Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens",
245 null, null, false, false);
Nils Diewald345bdc02014-01-21 21:48:57 +0000246
247
Nils Diewaldbb33da22015-03-04 16:24:25 +0000248 assertEquals("SnippetHTML (1)", "<span class=\"context-left\">"
Akronf05fde62016-08-03 23:46:17 +0200249 + "<span class=\"more\">" + "</span>" + "</span>" + "<span class=\"match\">" + "<mark>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000250 + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">"
251 + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">"
252 + "b" + "</span>" + "</span>" + "</span>" + "</span>"
253 + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">"
254 + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">"
255 + "a" + "</span>" + "</span>" + "</span>" + "</span>"
Akronf05fde62016-08-03 23:46:17 +0200256 + "</mark>" + "</span>" + "<span class=\"context-right\">"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000257 + "<span class=\"more\">" + "</span>" + "</span>",
258 km.getSnippetHTML());
Nils Diewald345bdc02014-01-21 21:48:57 +0000259 };
260
Akron40550172015-08-04 03:06:12 +0200261
Akron1f619572015-07-08 17:33:47 +0200262 @Test
263 public void indexNewStructure () throws IOException, QueryException {
264 KrillIndex ki = new KrillIndex();
265 ki.addDoc(getClass().getResourceAsStream("/goe/AGX-00002.json"), false);
266 ki.commit();
267
Akron40550172015-08-04 03:06:12 +0200268 Match km = ki.getMatchInfo("match-GOE!GOE_AGX.00002-p210-211",
269 "tokens", true, (String) null, (String) null, true, true, true);
Akron1f619572015-07-08 17:33:47 +0200270
271 JsonNode res = mapper.readTree(km.toJsonString());
272 assertEquals("tokens", res.at("/field").asText());
273 assertEquals("GOE_AGX.00002", res.at("/textSigle").asText());
274 assertEquals("Goethe, Johann Wolfgang von", res.at("/author").asText());
275 };
276
Nils Diewaldbb33da22015-03-04 16:24:25 +0000277
Nils Diewald345bdc02014-01-21 21:48:57 +0000278 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000279 public void indexExample5Spans () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000280 KrillIndex ki = new KrillIndex();
281 ki.addDoc(createSimpleFieldDoc());
282 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000283
Nils Diewaldbb33da22015-03-04 16:24:25 +0000284 Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens",
285 null, null, true, false);
Nils Diewald345bdc02014-01-21 21:48:57 +0000286
287
Nils Diewaldbb33da22015-03-04 16:24:25 +0000288 assertEquals(
289 "SnippetBrackets (1)",
Akronf05fde62016-08-03 23:46:17 +0200290 "... [[{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000291 km.getSnippetBrackets());
Nils Diewald345bdc02014-01-21 21:48:57 +0000292 };
293
Nils Diewaldbb33da22015-03-04 16:24:25 +0000294
Nils Diewald345bdc02014-01-21 21:48:57 +0000295 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000296 public void indexExample6Spans () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000297 KrillIndex ki = new KrillIndex();
298 ki.addDoc(createSimpleFieldDoc());
299 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000300
Nils Diewaldbb33da22015-03-04 16:24:25 +0000301 Match km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8", "tokens",
302 null, null, true, false);
Nils Diewald345bdc02014-01-21 21:48:57 +0000303
304
Nils Diewaldbb33da22015-03-04 16:24:25 +0000305 assertEquals(
306 "SnippetBrackets (1)",
Akronf05fde62016-08-03 23:46:17 +0200307 "... [[{x/tag:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]]",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000308 km.getSnippetBrackets());
Nils Diewald345bdc02014-01-21 21:48:57 +0000309 };
310
Nils Diewaldbb33da22015-03-04 16:24:25 +0000311
Nils Diewald345bdc02014-01-21 21:48:57 +0000312 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000313 public void indexExample7Spans () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000314 KrillIndex ki = new KrillIndex();
315 ki.addDoc(createSimpleFieldDoc());
316 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000317
Nils Diewaldbb33da22015-03-04 16:24:25 +0000318 Match km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8", "tokens",
319 null, null, true, true);
Nils Diewald345bdc02014-01-21 21:48:57 +0000320
321
Nils Diewaldbb33da22015-03-04 16:24:25 +0000322 assertEquals(
323 "SnippetBrackets (1)",
Akronf05fde62016-08-03 23:46:17 +0200324 "... [[{x/tag:{2:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{4:{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]]",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000325 km.getSnippetBrackets());
Nils Diewald345bdc02014-01-21 21:48:57 +0000326
Nils Diewaldbb33da22015-03-04 16:24:25 +0000327 assertEquals("SnippetHTML (1)", "<span class=\"context-left\">"
Akronf05fde62016-08-03 23:46:17 +0200328 + "<span class=\"more\">" + "</span>" + "</span>" +
329 "<span class=\"match\">" +"<mark>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000330 + "<span title=\"x/tag\">" + "<mark class=\"class-2 level-0\">"
331 + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">"
332 + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">"
333 + "b" + "</span>" + "</span>" + "</span>" + "</span>"
334 + "<mark class=\"class-4 level-1\">"
335 + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">"
336 + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">"
337 + "a" + "</span>" + "</span>" + "</span>" + "</span>"
338 + "</mark>" + "</mark>" + "<span title=\"f/m:zehn\">"
339 + "<span title=\"f/y:ten\">" + "<span title=\"it/is:10\">"
340 + "<span title=\"x/o:zehntens\">" + "c" + "</span>" + "</span>"
341 + "</span>" + "</span>" + "</span>" + "</mark>"
Akronf05fde62016-08-03 23:46:17 +0200342 + "</span>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000343 + "<span class=\"context-right\">" + "</span>",
344 km.getSnippetHTML());
Nils Diewald345bdc02014-01-21 21:48:57 +0000345 };
346
Nils Diewaldbb33da22015-03-04 16:24:25 +0000347
Nils Diewald345bdc02014-01-21 21:48:57 +0000348 @Test
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000349 public void indexExample6Relations () throws IOException, QueryException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000350 KrillIndex ki = new KrillIndex();
351 ki.addDoc(createSimpleFieldDoc());
352 ki.commit();
Nils Diewald345bdc02014-01-21 21:48:57 +0000353
Nils Diewaldbb33da22015-03-04 16:24:25 +0000354 Match km = ki.getMatchInfo("match-c1!d1-p0-5(4)8-8(2)7-8", "tokens",
355 "x", null, true, false);
Nils Diewald345bdc02014-01-21 21:48:57 +0000356
Nils Diewaldbb33da22015-03-04 16:24:25 +0000357 assertEquals(
358 "SnippetBrackets (1)",
Akronf05fde62016-08-03 23:46:17 +0200359 "[[{x/rel:a>3:{x/o:erstens:a}}{x/o:zweitens:b}{x/o:drittens:c}{#3:{x/o:viertens:a}}{x/o:fünftens:b}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000360 km.getSnippetBrackets());
Nils Diewald345bdc02014-01-21 21:48:57 +0000361
Akronf05fde62016-08-03 23:46:17 +0200362 assertEquals("SnippetBrackets (1)",
363 "<span class=\"context-left\">"
364 + "</span>"
365 + "<span class=\"match\">"
366 + "<mark>"
367 + "<span xlink:title=\"x/rel:a\" "
368 + "xlink:type=\"simple\" " + "xlink:href=\"#word-c1!d1-p3\">"
369 + "<span title=\"x/o:erstens\">" + "a" + "</span>" + "</span>"
370 + "<span title=\"x/o:zweitens\">" + "b" + "</span>"
371 + "<span title=\"x/o:drittens\">" + "c" + "</span>"
372 + "<span xml:id=\"word-c1!d1-p3\">"
373 + "<span title=\"x/o:viertens\">" + "a" + "</span>"
374 + "</span>"
375 + "<span title=\"x/o:fünftens\">" + "b" + "</span>"
376 + "</mark>"
377 + "</span>"
378 + "<span class=\"context-right\">" + "<span class=\"more\">"
379 + "</span>" +
380 "</span>", km.getSnippetHTML());
Nils Diewald345bdc02014-01-21 21:48:57 +0000381
Nils Diewaldbb33da22015-03-04 16:24:25 +0000382 km = ki.getMatchInfo("match-c1!d1-p0-5(7)2-3(4)8-8(2)7-8", "tokens",
383 "x", null, true, true);
Nils Diewald345bdc02014-01-21 21:48:57 +0000384
Nils Diewaldbb33da22015-03-04 16:24:25 +0000385 assertEquals("SnippetBrackets (1)", "<span class=\"context-left\">"
Akronf05fde62016-08-03 23:46:17 +0200386 + "</span>" +
387 "<span class=\"match\">" +
388 "<mark>" + "<span xlink:title=\"x/rel:a\" "
Nils Diewaldbb33da22015-03-04 16:24:25 +0000389 + "xlink:type=\"simple\" " + "xlink:href=\"#word-c1!d1-p3\">"
390 + "<span title=\"x/o:erstens\">" + "a" + "</span>" + "</span>"
391 + "<span title=\"x/o:zweitens\">" + "b" + "</span>"
392 + "<mark class=\"class-7 level-0\">"
393 + "<span title=\"x/o:drittens\">" + "c" + "</span>"
394 + "<span xml:id=\"word-c1!d1-p3\">"
395 + "<span title=\"x/o:viertens\">" + "a" + "</span>" + "</span>"
396 + "</mark>" + "<span title=\"x/o:fünftens\">" + "b" + "</span>"
Akronf05fde62016-08-03 23:46:17 +0200397 + "</mark>" +
398"</span>" +
399 "<span class=\"context-right\">"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000400 + "<span class=\"more\">" + "</span>" + "</span>",
401 km.getSnippetHTML());
Nils Diewald345bdc02014-01-21 21:48:57 +0000402 };
403
404
Nils Diewalda1118032014-02-13 20:50:48 +0000405 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000406 public void indexExample7SentenceExpansion () throws IOException,
407 QueryException {
408 KrillIndex ki = new KrillIndex();
409 ki.addDoc(createSimpleFieldDoc());
410 ki.addDoc(createSimpleFieldDoc2());
411 ki.addDoc(createSimpleFieldDoc3());
412 ki.addDoc(createSimpleFieldDoc4());
413 ki.commit();
414 Match km;
Nils Diewalda1118032014-02-13 20:50:48 +0000415
Nils Diewaldbb33da22015-03-04 16:24:25 +0000416 km = ki.getMatchInfo("match-c1!d1-p3-4", "tokens", null, null, false,
417 false);
Nils Diewalda1118032014-02-13 20:50:48 +0000418
Nils Diewaldbb33da22015-03-04 16:24:25 +0000419 assertEquals(
Akronf05fde62016-08-03 23:46:17 +0200420 "... [[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]] ...",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000421 km.getSnippetBrackets());
Nils Diewalda1118032014-02-13 20:50:48 +0000422
Nils Diewald1e5d5942014-05-20 13:29:53 +0000423
Nils Diewaldbb33da22015-03-04 16:24:25 +0000424 km = ki.getMatchInfo("match-c1!d1-p3-4", "tokens", null, null, false,
425 false, true); // extendToSentence
Nils Diewalda1118032014-02-13 20:50:48 +0000426
Akroncb1093a2016-07-28 16:27:59 +0200427 // This will
428 // [{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}]
Nils Diewaldbb33da22015-03-04 16:24:25 +0000429 assertEquals(
Akroncb1093a2016-07-28 16:27:59 +0200430 "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}]",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000431 km.getSnippetBrackets());
Akroncb1093a2016-07-28 16:27:59 +0200432
433 assertEquals(
434 "<span class=\"context-left\"></span>"+
Akronf05fde62016-08-03 23:46:17 +0200435 "<span class=\"match\">" +
Akroncb1093a2016-07-28 16:27:59 +0200436 "<span title=\"f/m:drei\">"+
437 "<span title=\"f/y:three\">"+
438 "<span title=\"it/is:3\">"+
439 "<span title=\"x/o:drittens\">c</span>"+
440 "</span>"+
441 "</span>"+
442 "</span>"+
443 "<mark>"+
444 "<span title=\"f/m:vier\">"+
445 "<span title=\"f/y:four\">"+
446 "<span title=\"it/is:4\">"+
447 "<span title=\"x/o:viertens\">a</span>"+
448 "</span>"+
449 "</span>"+
450 "</span>"+
451 "</mark>"+
452 "<span title=\"f/m:fuenf\">"+
453 "<span title=\"f/y:five\">"+
454 "<span title=\"it/is:5\">"+
455 "<span title=\"x/o:fünftens\">b</span>"+
456 "</span>"+
457 "</span>"+
458 "</span>"+
Akronf05fde62016-08-03 23:46:17 +0200459 "</span>" +
Akroncb1093a2016-07-28 16:27:59 +0200460 "<span class=\"context-right\"></span>",
461 km.getSnippetHTML());
462
463
464
465
Akron43cea662016-02-15 23:43:59 +0100466 /*
Nils Diewaldbb33da22015-03-04 16:24:25 +0000467 km = ki.getMatchInfo("match-c1!d3-p3-4", "tokens", null, null, false,
468 false, true); // extendToSentence
Nils Diewald1e5d5942014-05-20 13:29:53 +0000469
Nils Diewaldbb33da22015-03-04 16:24:25 +0000470 assertEquals(
471 "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:cc}}}} {f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:aa}}}} {f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:bb}}}}]",
472 km.getSnippetBrackets());
Nils Diewald1e5d5942014-05-20 13:29:53 +0000473
Nils Diewald84934372014-05-20 13:48:18 +0000474
Nils Diewaldbb33da22015-03-04 16:24:25 +0000475 km = ki.getMatchInfo("match-c1!d4-p4-6", "tokens", null, null, false,
476 false, true); // extendToSentence
477 assertEquals(
478 "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}{f/m:sechs:{f/y:six:{it/is:6:{x/o:sechstens:c}}}}{f/m:sieben:{f/y:seven:{it/is:7:{x/o:siebtens:a}}}}]",
479 km.getSnippetBrackets());
Akron43cea662016-02-15 23:43:59 +0100480 */
Nils Diewalda1118032014-02-13 20:50:48 +0000481 };
Nils Diewald345bdc02014-01-21 21:48:57 +0000482
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000483
484 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000485 public void indexExample7Dependencies () throws IOException, QueryException {
486 KrillIndex ki = new KrillIndex();
487 ki.addDoc(createSimpleFieldDoc2());
488 ki.commit();
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000489
Nils Diewaldbb33da22015-03-04 16:24:25 +0000490 Match km = ki.getMatchInfo("match-c1!d1-p0-4", "tokens", null, null,
491 true, true);
Nils Diewald6e9eb4e2014-06-17 19:28:01 +0000492
Nils Diewaldbb33da22015-03-04 16:24:25 +0000493 assertEquals(
494 "SnippetHTML (2)",
495 "<span class=\"context-left\">"
496 + "</span>"
Akronf05fde62016-08-03 23:46:17 +0200497 + "<span class=\"match\">"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000498 + "<mark>"
499 + "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">"
500 + "<span title=\"f/m:eins\">"
501 + "<span title=\"f/y:one\">"
502 + "<span title=\"it/is:1\">"
503 + "<span title=\"x/o:erstens\">a</span>"
504 + "</span>"
505 + "</span>"
506 + "</span>"
507 + "</span>"
508 + "<span xlink:title=\"x/rel:b\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">"
509 + "<span title=\"f/m:zwei\">"
510 + "<span title=\"f/y:two\">"
511 + "<span title=\"it/is:2\">"
512 + "<span title=\"x/o:zweitens\">b</span>" + "</span>"
513 + "</span>" + "</span>" + "</span>"
514 + "<span title=\"f/m:drei\">"
515 + "<span title=\"f/y:three\">"
516 + "<span title=\"it/is:3\">"
517 + "<span title=\"x/o:drittens\">c</span>" + "</span>"
518 + "</span>" + "</span>"
519 + "<span xml:id=\"word-c1!d1-p3\">"
520 + "<span title=\"f/m:vier\">"
521 + "<span title=\"f/y:four\">"
522 + "<span title=\"it/is:4\">"
523 + "<span title=\"x/o:viertens\">a</span>" + "</span>"
Akronf05fde62016-08-03 23:46:17 +0200524 + "</span>" + "</span>" + "</span>" + "</mark>"
525 + "</span>"
Nils Diewaldbb33da22015-03-04 16:24:25 +0000526 + "<span class=\"context-right\">"
527 + "<span class=\"more\">" + "</span>" + "</span>",
528 km.getSnippetHTML());
Nils Diewaldcde69082014-01-16 15:46:48 +0000529 };
Nils Diewald50389b02014-04-11 16:27:52 +0000530
Nils Diewaldbb33da22015-03-04 16:24:25 +0000531
532 @Test
533 public void indexExampleMultipleFoundries () throws IOException,
534 QueryException {
535 KrillIndex ki = new KrillIndex();
536 ki.addDoc(createSimpleFieldDoc4());
537 ki.commit();
538
539 Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "f", "m",
540 false, false);
541 assertEquals(
542 "f:m info",
543 km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200544 "... [[{f/m:vier:a}{f/m:fuenf:b}{f/m:sechs:c}{f/m:sieben:a}{f/m:acht:b}{f/m:neun:a}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000545
546 km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "f", null, false,
547 false);
548 assertEquals(
549 "f info",
550 km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200551 "... [[{f/m:vier:{f/y:four:a}}{f/m:fuenf:{f/y:five:b}}{f/m:sechs:{f/y:six:c}}{f/m:sieben:{f/y:seven:a}}{f/m:acht:{f/y:eight:b}}{f/m:neun:{f/y:nine:a}}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000552
553
554 km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", null, null, false,
555 false);
556 assertEquals("all info", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200557 "... [[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000558
559 ArrayList<String> foundryList = new ArrayList<>(2);
560 foundryList.add("f");
561 foundryList.add("x");
562
563 km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList,
564 (ArrayList<String>) null, false, false, false);
565 assertEquals("f|x info", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200566 "... [[{f/m:vier:{f/y:four:{x/o:viertens:a}}}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000567
568 foundryList.clear();
569 foundryList.add("y");
570 foundryList.add("x");
571
572 km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList,
573 (ArrayList<String>) null, false, false, false);
574 assertEquals("y|x info", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200575 "... [[{x/o:viertens:a}]] ...");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000576
577
578 foundryList.clear();
579 foundryList.add("f");
580 foundryList.add("it");
581
582 ArrayList<String> layerList = new ArrayList<>(2);
583 layerList.add("is");
584
585 km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList,
586 layerList, false, false, false);
587 assertEquals("f|it/is", km.getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200588 "... [[{it/is:4:a}]] ...");
Nils Diewald50389b02014-04-11 16:27:52 +0000589 };
Nils Diewald1e5d5942014-05-20 13:29:53 +0000590
Nils Diewaldbb33da22015-03-04 16:24:25 +0000591
Akron7d45e6b2015-06-26 17:23:42 +0200592 @Test
593 public void indexExampleFailingFoundry () throws IOException,
594 QueryException {
595 KrillIndex ki = new KrillIndex();
596 ki.addDoc(createSimpleFieldDoc4());
597 ki.commit();
598
599 Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "*", "m",
600 false, false);
601 JsonNode res = mapper.readTree(km.toJsonString());
602 assertEquals("c1", res.at("/corpusID").asText());
603 assertEquals("d4", res.at("/docID").asText());
604 assertEquals("Invalid foundry requested", res.at("/errors/0/1")
605 .asText());
606 };
607
608
609 @Test
Akron13db6152016-02-19 14:08:38 +0100610 public void indexFailingMatchID () throws IOException, QueryException {
Akron8abefa12016-02-13 05:35:42 +0100611 KrillIndex ki = new KrillIndex();
Akron13db6152016-02-19 14:08:38 +0100612 Match km = ki.getMatchInfo(
613 "match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01", "tokens",
614 "*", "m", false, false);
Akron8abefa12016-02-13 05:35:42 +0100615 JsonNode res = mapper.readTree(km.toJsonString());
616 assertEquals("730", res.at("/errors/0/0").asText());
617 };
618
Akron13db6152016-02-19 14:08:38 +0100619
Akron8abefa12016-02-13 05:35:42 +0100620 @Test
Akron7d45e6b2015-06-26 17:23:42 +0200621 public void indexExampleNullInfo () throws IOException, QueryException {
622 KrillIndex ki = new KrillIndex();
623 ki.addDoc(createSimpleFieldDoc4());
624 ki.commit();
625 Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", null, null,
626 false, false);
627 JsonNode res = mapper.readTree(km.toJsonString());
628 assertEquals("tokens", res.at("/field").asText());
629 assertTrue(res.at("/startMore").asBoolean());
630 assertTrue(res.at("/endMore").asBoolean());
631 assertEquals("c1", res.at("/corpusID").asText());
632 assertEquals("d4", res.at("/docID").asText());
633 assertEquals(
Akronf05fde62016-08-03 23:46:17 +0200634 "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\"><mark><span title=\"f/m:vier\"><span title=\"f/y:four\"><span title=\"it/is:4\"><span title=\"x/o:viertens\">a</span></span></span></span><span title=\"f/m:fuenf\"><span title=\"f/y:five\"><span title=\"it/is:5\"><span title=\"x/o:fünftens\">b</span></span></span></span><span title=\"f/m:sechs\"><span title=\"f/y:six\"><span title=\"it/is:6\"><span title=\"x/o:sechstens\">c</span></span></span></span><span title=\"f/m:sieben\"><span title=\"f/y:seven\"><span title=\"it/is:7\"><span title=\"x/o:siebtens\">a</span></span></span></span><span title=\"f/m:acht\"><span title=\"f/y:eight\"><span title=\"it/is:8\"><span title=\"x/o:achtens\">b</span></span></span></span><span title=\"f/m:neun\"><span title=\"f/y:nine\"><span title=\"it/is:9\"><span title=\"x/o:neuntens\">a</span></span></span></span></mark></span><span class=\"context-right\"><span class=\"more\"></span></span>",
Akron7d45e6b2015-06-26 17:23:42 +0200635 res.at("/snippet").asText());
636 assertEquals("match-c1!d4-p3-9", res.at("/matchID").asText());
637 assertTrue(res.at("/pubDate").isMissingNode());
638 };
639
640
Akronb35261a2016-02-10 20:24:24 +0100641 @Test
642 public void indexAttributeInfo () throws IOException, QueryException {
643 KrillIndex ki = new KrillIndex();
644 ki.addDoc(createAttributeFieldDoc());
645 ki.commit();
Akron13db6152016-02-19 14:08:38 +0100646 Match km = ki.getMatchInfo("match-ca1!da1-p7-10", "tokens", null, null,
647 false, false);
Akronb35261a2016-02-10 20:24:24 +0100648 JsonNode res = mapper.readTree(km.toJsonString());
649 assertEquals("tokens", res.at("/field").asText());
650 assertTrue(res.at("/startMore").asBoolean());
651 assertTrue(res.at("/endMore").asBoolean());
652 assertEquals("ca1", res.at("/corpusID").asText());
653 assertEquals("da1", res.at("/docID").asText());
Akron13db6152016-02-19 14:08:38 +0100654 assertEquals("<span class=\"context-left\">"
655 + "<span class=\"more\">"
656 + "</span>"
657 + "</span>"
Akronf05fde62016-08-03 23:46:17 +0200658 + "<span class=\"match\"><mark>"
Akron13db6152016-02-19 14:08:38 +0100659 +
660 // "<span title=\"@:x/s:key:value\">"+
661 "<span title=\"f/m:acht\">"
662 + "<span title=\"f/y:eight\">"
663 + "<span title=\"it/is:8\">"
664 + "<span title=\"x/o:achtens\">b</span>"
665 +
666 // "</span>"+
667 "</span>" + "</span>" + "</span>" + "<span title=\"f/m:neun\">"
668 + "<span title=\"f/y:nine\">" + "<span title=\"it/is:9\">"
669 + "<span title=\"x/o:neuntens\">a</span>" + "</span>"
670 + "</span>" + "</span>" + "<span title=\"f/m:zehn\">"
671 + "<span title=\"f/y:ten\">" + "<span title=\"it/is:10\">"
672 + "<span title=\"x/o:zehntens\">c</span>" + "</span>"
673 + "</span>" + "</span>" + "</mark>"
Akronf05fde62016-08-03 23:46:17 +0200674 + "</span>"
Akron13db6152016-02-19 14:08:38 +0100675 + "<span class=\"context-right\">" + "</span>",
676 res.at("/snippet").asText());
Akronb35261a2016-02-10 20:24:24 +0100677 };
678
679
Nils Diewaldbb33da22015-03-04 16:24:25 +0000680 private FieldDocument createSimpleFieldDoc () {
681 FieldDocument fd = new FieldDocument();
682 fd.addString("corpusID", "c1");
683 fd.addString("ID", "d1");
684 fd.addTV(
685 "tokens",
686 "abcabcabac",
Akron5f044032015-12-18 00:35:38 +0100687 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]"
margaretha4f995582015-12-14 14:14:34 +0100688 + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]"
Akron43cea662016-02-15 23:43:59 +0100689 + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]"
Akron5f044032015-12-18 00:35:38 +0100690 + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +0100691 + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
692 + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]"
693 + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
Akron5f044032015-12-18 00:35:38 +0100694 + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]"
margaretha4f995582015-12-14 14:14:34 +0100695 + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
696 + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000697 return fd;
Nils Diewald1e5d5942014-05-20 13:29:53 +0000698 };
Nils Diewald84934372014-05-20 13:48:18 +0000699
Nils Diewaldbb33da22015-03-04 16:24:25 +0000700
701 private FieldDocument createSimpleFieldDoc2 () {
702 FieldDocument fd = new FieldDocument();
703 fd.addString("corpusID", "c1");
704 fd.addString("ID", "d1");
705 fd.addTV(
706 "tokens",
707 "abcabcabac",
Akron5f044032015-12-18 00:35:38 +0100708 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]"
709 + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|>:x/rel:b$<b>32<i>4<s>0<s>0<s>0|_1$<i>1<i>2]"
Akron43cea662016-02-15 23:43:59 +0100710 + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]"
Akron5f044032015-12-18 00:35:38 +0100711 + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +0100712 + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
713 + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]"
714 + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
Akron5f044032015-12-18 00:35:38 +0100715 + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]"
margaretha4f995582015-12-14 14:14:34 +0100716 + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
717 + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000718 return fd;
719 };
720
721
722 private FieldDocument createSimpleFieldDoc3 () {
723 FieldDocument fd = new FieldDocument();
724 fd.addString("corpusID", "c1");
725 fd.addString("ID", "d3");
726 fd.addTV(
727 "tokens",
728 "aa bb cc aa bb cc aa bb aa cc ",
Akron5f044032015-12-18 00:35:38 +0100729 "[(0-2)s:aa|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>2|-:t$<i>10]"
margaretha4f995582015-12-14 14:14:34 +0100730 + "[(3-5)s:bb|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>3<i>5]"
Akron43cea662016-02-15 23:43:59 +0100731 + "[(6-8)s:cc|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>6<i>8|<>:base/s:s$<b>64<i>6<i>14<i>5]"
Akron5f044032015-12-18 00:35:38 +0100732 + "[(9-11)s:aa|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>9<i>11]"
margaretha4f995582015-12-14 14:14:34 +0100733 + "[(12-14)s:bb|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>12<i>14]"
734 + "[(15-17)s:cc|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>15<i>17]"
735 + "[(18-20)s:aa|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>18<i>20]"
Akron5f044032015-12-18 00:35:38 +0100736 + "[(21-23)s:bb|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>21<i>23]"
margaretha4f995582015-12-14 14:14:34 +0100737 + "[(24-26)s:aa|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>24<i>26]"
738 + "[(27-29)s:cc|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>27<i>29]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000739 return fd;
740 };
741
742
743 private FieldDocument createSimpleFieldDoc4 () {
744 FieldDocument fd = new FieldDocument();
745 fd.addString("corpusID", "c1");
746 fd.addString("ID", "d4");
747 fd.addTV(
748 "tokens",
749 "abcabcabac",
Akron5f044032015-12-18 00:35:38 +0100750 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]"
margaretha4f995582015-12-14 14:14:34 +0100751 + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]"
Akron43cea662016-02-15 23:43:59 +0100752 + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]"
Akron5f044032015-12-18 00:35:38 +0100753 + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]"
margaretha4f995582015-12-14 14:14:34 +0100754 + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
Akron43cea662016-02-15 23:43:59 +0100755 + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6|<>:base/s:s$<b>64<i>5<i>7<i>7]"
margaretha4f995582015-12-14 14:14:34 +0100756 + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
Akron5f044032015-12-18 00:35:38 +0100757 + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]"
margaretha4f995582015-12-14 14:14:34 +0100758 + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
759 + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000760 return fd;
Nils Diewald84934372014-05-20 13:48:18 +0000761 };
762
Akron13db6152016-02-19 14:08:38 +0100763
Akronb35261a2016-02-10 20:24:24 +0100764 /*
765 Check for terms|spans|rels ...
766 */
767 private FieldDocument createAttributeFieldDoc () {
768 FieldDocument fd = new FieldDocument();
769 fd.addString("corpusID", "ca1");
770 fd.addString("ID", "da1");
771 fd.addTV(
772 "tokens",
773 "abcabcabac",
774 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|_0$<i>0<i>1|-:t$<i>10]"
775 + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]"
Akron43cea662016-02-15 23:43:59 +0100776 + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]"
Akronb35261a2016-02-10 20:24:24 +0100777 + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|_3$<i>3<i>4]"
778 + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
779 + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]"
780 + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
781 + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/s:tag$<b>64<i>7<i>10<i>10<b>0<s>1|@:x/s:key:value$<b>17<i>10<s>1|_7$<i>7<i>8]"
782 + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
783 + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
784 return fd;
785 };
786
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000787};