blob: 7a5c550676e19650f7c92c0d4da515cb9193640f [file] [log] [blame]
Eliza Margaretha269e5a62014-09-30 16:58:23 +00001package de.ids_mannheim.korap.highlight;
2
Nils Diewaldf399a672013-11-18 17:55:22 +00003import java.util.*;
4import java.io.IOException;
5
Nils Diewald3caa00d2013-12-13 02:24:04 +00006import org.apache.lucene.search.spans.SpanQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +00007
Nils Diewalda14ecd62015-02-26 21:00:20 +00008import de.ids_mannheim.korap.KrillIndex;
Nils Diewald0339d462015-02-26 14:53:56 +00009import de.ids_mannheim.korap.KrillQuery;
Nils Diewald8904c1d2015-02-26 16:13:18 +000010import de.ids_mannheim.korap.query.QueryBuilder;
Nils Diewald884dbcf2015-02-27 17:02:28 +000011import de.ids_mannheim.korap.response.Result;
Nils Diewaldbbd39a52015-02-23 19:56:57 +000012import de.ids_mannheim.korap.Krill;
Nils Diewald392bcf32015-02-26 20:01:17 +000013import de.ids_mannheim.korap.response.Match;
Nils Diewald3caa00d2013-12-13 02:24:04 +000014import de.ids_mannheim.korap.index.FieldDocument;
Nils Diewaldf399a672013-11-18 17:55:22 +000015
Nils Diewaldbe5943e2014-10-21 19:35:34 +000016import de.ids_mannheim.korap.util.QueryException;
17
Nils Diewald52bd1cd2014-11-06 20:44:24 +000018import static de.ids_mannheim.korap.TestSimple.*;
Nils Diewaldf399a672013-11-18 17:55:22 +000019
20import static org.junit.Assert.*;
21import org.junit.Test;
22import org.junit.Ignore;
23import org.junit.runner.RunWith;
24import org.junit.runners.JUnit4;
25
26@RunWith(JUnit4.class)
27public class TestHighlight { // extends LuceneTestCase {
28
Nils Diewaldf399a672013-11-18 17:55:22 +000029 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +000030 public void checkHighlights () throws IOException, QueryException {
Nils Diewaldf399a672013-11-18 17:55:22 +000031
Nils Diewalda14ecd62015-02-26 21:00:20 +000032 KrillIndex ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +000033 String json = new String("{" + " \"fields\" : [" + " { "
34 + " \"primaryData\" : \"abc\"" + " }," + " {"
35 + " \"name\" : \"tokens\"," + " \"data\" : ["
36 + " [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"],"
37 + " [ \"s:b\", \"i:b\", \"_1#1-2\" ],"
38 + " [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + " ]"
39 + " }" + " ]" + "}");
Nils Diewaldf399a672013-11-18 17:55:22 +000040
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000041 FieldDocument fd = ki.addDoc(json);
42 ki.commit();
Nils Diewaldf399a672013-11-18 17:55:22 +000043
Nils Diewaldf399a672013-11-18 17:55:22 +000044
Nils Diewald8904c1d2015-02-26 16:13:18 +000045 QueryBuilder kq = new QueryBuilder("tokens");
Eliza Margaretha6f989202016-10-14 21:48:29 +020046 Result kr = ki
Akron4f52a632018-02-09 19:02:40 +010047 .search((SpanQuery) kq.seq(kq.nr(1, kq.seg("s:b"))).toQuery());
Nils Diewald392bcf32015-02-26 20:01:17 +000048 Match km = kr.getMatch(0);
Nils Diewaldbb33da22015-03-04 16:24:25 +000049 assertEquals(km.getStartPos(), 1);
50 assertEquals(km.getEndPos(), 2);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000051 assertEquals(km.getStartPos(1), 1);
Nils Diewaldbb33da22015-03-04 16:24:25 +000052 assertEquals(km.getEndPos(1), 2);
53 assertEquals(
Akronf05fde62016-08-03 23:46:17 +020054 "<span class=\"context-left\">a</span><span class=\"match\"><mark><mark class=\"class-1 level-0\">b</mark></mark></span><span class=\"context-right\">c</span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +000055 km.getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +000056
Akron4f52a632018-02-09 19:02:40 +010057 kr = ki.search((SpanQuery) kq.seq(kq.nr(1, kq.seg("s:b")))
58 .append(kq.nr(2, kq.seg("s:c"))).toQuery());
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000059 km = kr.getMatch(0);
Nils Diewaldbb33da22015-03-04 16:24:25 +000060 assertEquals(km.getStartPos(), 1);
61 assertEquals(km.getEndPos(), 3);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000062 assertEquals(km.getStartPos(1), 1);
Nils Diewaldbb33da22015-03-04 16:24:25 +000063 assertEquals(km.getEndPos(1), 2);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000064 assertEquals(km.getStartPos(2), 2);
Nils Diewaldbb33da22015-03-04 16:24:25 +000065 assertEquals(km.getEndPos(2), 3);
66 assertEquals(
Akronf05fde62016-08-03 23:46:17 +020067 "<span class=\"context-left\">a</span><span class=\"match\"><mark><mark class=\"class-1 level-0\">b</mark><mark class=\"class-2 level-0\">c</mark></mark></span><span class=\"context-right\"></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +000068 km.getSnippetHTML());
Nils Diewaldf399a672013-11-18 17:55:22 +000069
Nils Diewald8c221782013-12-13 19:52:58 +000070
Nils Diewaldbb33da22015-03-04 16:24:25 +000071 kr = ki.search((SpanQuery) kq
Akron4f52a632018-02-09 19:02:40 +010072 .seq(kq.nr(1, kq.seq(kq.seg("s:a")).append(kq.seg("s:b"))))
73 .append(kq.nr(2, kq.seg("s:c"))).toQuery());
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000074 km = kr.getMatch(0);
75 assertEquals(km.getStartPos(), 0);
Nils Diewaldbb33da22015-03-04 16:24:25 +000076 assertEquals(km.getEndPos(), 3);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000077 assertEquals(km.getStartPos(1), 0);
Nils Diewaldbb33da22015-03-04 16:24:25 +000078 assertEquals(km.getEndPos(1), 2);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000079 assertEquals(km.getStartPos(2), 2);
Nils Diewaldbb33da22015-03-04 16:24:25 +000080 assertEquals(km.getEndPos(2), 3);
81 assertEquals(
Akronf05fde62016-08-03 23:46:17 +020082 "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-1 level-0\">ab</mark><mark class=\"class-2 level-0\">c</mark></mark></span><span class=\"context-right\"></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +000083 km.getSnippetHTML());
Nils Diewald833fe7e2013-12-14 16:06:33 +000084
85
Eliza Margaretha6f989202016-10-14 21:48:29 +020086 kr = ki.search(
Akron4f52a632018-02-09 19:02:40 +010087 (SpanQuery) kq.nr(
Eliza Margaretha6f989202016-10-14 21:48:29 +020088 3, kq
Akron4f52a632018-02-09 19:02:40 +010089 .seq(kq.nr(1,
Eliza Margaretha6f989202016-10-14 21:48:29 +020090 kq.seq(kq.seg("s:a"))
91 .append(kq.seg("s:b"))))
Akron4f52a632018-02-09 19:02:40 +010092 .append(kq.nr(2, kq.seg("s:c"))))
Eliza Margaretha6f989202016-10-14 21:48:29 +020093 .toQuery());
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000094 km = kr.getMatch(0);
95 assertEquals(km.getStartPos(), 0);
Nils Diewaldbb33da22015-03-04 16:24:25 +000096 assertEquals(km.getEndPos(), 3);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000097 assertEquals(km.getStartPos(1), 0);
Nils Diewaldbb33da22015-03-04 16:24:25 +000098 assertEquals(km.getEndPos(1), 2);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +000099 assertEquals(km.getStartPos(2), 2);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000100 assertEquals(km.getEndPos(2), 3);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000101 assertEquals(km.getStartPos(3), 0);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000102 assertEquals(km.getEndPos(3), 3);
103 assertEquals(
Akronf05fde62016-08-03 23:46:17 +0200104 "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-3 level-0\"><mark class=\"class-1 level-1\">ab</mark><mark class=\"class-2 level-1\">c</mark></mark></mark></span><span class=\"context-right\"></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000105 km.getSnippetHTML());
Nils Diewald833fe7e2013-12-14 16:06:33 +0000106 };
107
Nils Diewaldbb33da22015-03-04 16:24:25 +0000108
Nils Diewald833fe7e2013-12-14 16:06:33 +0000109 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000110 public void checkHighlightsManually () throws IOException, QueryException {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000111
Nils Diewalda14ecd62015-02-26 21:00:20 +0000112 KrillIndex ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000113 String json = new String("{" + " \"fields\" : [" + " { "
114 + " \"primaryData\" : \"abc\"" + " }," + " {"
115 + " \"name\" : \"tokens\"," + " \"data\" : ["
116 + " [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"],"
117 + " [ \"s:b\", \"i:b\", \"_1#1-2\" ],"
118 + " [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + " ]"
119 + " }" + " ]" + "}");
Nils Diewald833fe7e2013-12-14 16:06:33 +0000120
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000121 FieldDocument fd = ki.addDoc(json);
122 ki.commit();
Nils Diewald833fe7e2013-12-14 16:06:33 +0000123
Nils Diewald8904c1d2015-02-26 16:13:18 +0000124 QueryBuilder kq = new QueryBuilder("tokens");
Nils Diewald833fe7e2013-12-14 16:06:33 +0000125
Nils Diewaldbb33da22015-03-04 16:24:25 +0000126 Result kr = ki.search((SpanQuery) kq.seq(kq.seg("s:a"))
127 .append(kq.seg("s:b")).append(kq.seg("s:c")).toQuery());
Nils Diewald392bcf32015-02-26 20:01:17 +0000128 Match km = kr.getMatch(0);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000129 km.addHighlight(0, 1, (short) 7);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000130 assertEquals(
Akronf05fde62016-08-03 23:46:17 +0200131 "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">ab</mark>c</mark></span><span class=\"context-right\"></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000132 km.getSnippetHTML());
Nils Diewald833fe7e2013-12-14 16:06:33 +0000133
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000134 km.addHighlight(1, 2, (short) 6);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000135 assertEquals(
Akronf05fde62016-08-03 23:46:17 +0200136 "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">a<mark class=\"class-6 level-1\">b</mark></mark><mark class=\"class-6 level-1\">c</mark></mark></span><span class=\"context-right\"></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000137 km.getSnippetHTML());
Nils Diewald833fe7e2013-12-14 16:06:33 +0000138
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000139 km.addHighlight(0, 1, (short) 5);
Akron417eaa92017-01-13 18:00:15 +0100140 assertEquals("[[{5:{7:a{6:b}}}{6:c}]]", km.getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141 assertEquals(
Akron417eaa92017-01-13 18:00:15 +0100142 "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-5 level-0\"><mark class=\"class-7 level-1\">a<mark class=\"class-6 level-2\">b</mark></mark></mark><mark class=\"class-6 level-2\">c</mark></mark></span><span class=\"context-right\"></span>",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000143 km.getSnippetHTML());
Nils Diewald833fe7e2013-12-14 16:06:33 +0000144
Nils Diewaldf399a672013-11-18 17:55:22 +0000145 };
Nils Diewald67f54042014-09-27 14:53:38 +0000146
Nils Diewaldbb33da22015-03-04 16:24:25 +0000147
Nils Diewald67f54042014-09-27 14:53:38 +0000148 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000149 public void highlightMissingBug () throws IOException, QueryException {
Nils Diewalda14ecd62015-02-26 21:00:20 +0000150 KrillIndex ki = new KrillIndex();
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000151 FieldDocument fd = new FieldDocument();
152 fd.addString("ID", "doc-1");
153 fd.addString("UID", "1");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200154 fd.addTV("base", "abab",
155 "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]"
156 + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000157 ki.addDoc(fd);
158 fd = new FieldDocument();
159 fd.addString("ID", "doc-2");
160 fd.addString("UID", "2");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000161 fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]"
162 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]");
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000163 ki.addDoc(fd);
Nils Diewald67f54042014-09-27 14:53:38 +0000164
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000165 // Commit!
166 ki.commit();
167 fd = new FieldDocument();
168 fd.addString("ID", "doc-3");
169 fd.addString("UID", "3");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200170 fd.addTV("base", "abab",
171 "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]"
172 + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000173 ki.addDoc(fd);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000174
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000175 // Commit!
176 ki.commit();
177 fd = new FieldDocument();
178 fd.addString("ID", "doc-4");
179 fd.addString("UID", "4");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000180 fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]"
181 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]");
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000182 ki.addDoc(fd);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000183
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000184 // Commit!
185 ki.commit();
Nils Diewald67f54042014-09-27 14:53:38 +0000186
Nils Diewald8904c1d2015-02-26 16:13:18 +0000187 QueryBuilder kq = new QueryBuilder("base");
Akron4f52a632018-02-09 19:02:40 +0100188 SpanQuery q = (SpanQuery) kq.or(kq.nr(1, kq.seg("s:a")))
189 .or(kq.nr(2, kq.seg("s:b"))).toQuery();
Nils Diewald884dbcf2015-02-27 17:02:28 +0000190 Result kr = ki.search(q);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000191 assertEquals((long) 14, kr.getTotalResults());
Akronf05fde62016-08-03 23:46:17 +0200192 assertEquals("[[{1:a}]]bab", kr.getMatch(0).getSnippetBrackets());
Akronb98c2662017-02-14 19:38:05 +0100193
Akronf05fde62016-08-03 23:46:17 +0200194 assertEquals("a[[{2:b}]]ab", kr.getMatch(1).getSnippetBrackets());
195 assertEquals("ab[[{1:a}]]b", kr.getMatch(2).getSnippetBrackets());
196 assertEquals("aba[[{2:b}]]", kr.getMatch(3).getSnippetBrackets());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000197
Akronf05fde62016-08-03 23:46:17 +0200198 assertEquals("[[{1:a}]]ba", kr.getMatch(4).getSnippetBrackets());
199 assertEquals("a[[{2:b}]]a", kr.getMatch(5).getSnippetBrackets());
200 assertEquals("ab[[{1:a}]]", kr.getMatch(6).getSnippetBrackets());
Nils Diewald67f54042014-09-27 14:53:38 +0000201
Akronf05fde62016-08-03 23:46:17 +0200202 assertEquals("[[{1:a}]]bab", kr.getMatch(7).getSnippetBrackets());
203 assertEquals("a[[{2:b}]]ab", kr.getMatch(8).getSnippetBrackets());
204 assertEquals("ab[[{1:a}]]b", kr.getMatch(9).getSnippetBrackets());
205 assertEquals("aba[[{2:b}]]", kr.getMatch(10).getSnippetBrackets());
Nils Diewald67f54042014-09-27 14:53:38 +0000206
Akronf05fde62016-08-03 23:46:17 +0200207 assertEquals("[[{1:a}]]ba", kr.getMatch(11).getSnippetBrackets());
208 assertEquals("a[[{2:b}]]a", kr.getMatch(12).getSnippetBrackets());
209 assertEquals("ab[[{1:a}]]", kr.getMatch(13).getSnippetBrackets());
Nils Diewald67f54042014-09-27 14:53:38 +0000210
Nils Diewald8904c1d2015-02-26 16:13:18 +0000211 kq = new QueryBuilder("base");
Akron4f52a632018-02-09 19:02:40 +0100212 q = (SpanQuery) kq.or(kq.nr(1, kq.seg("i:a"))).or(kq.nr(2, kq.seg("i:c")))
Eliza Margaretha6f989202016-10-14 21:48:29 +0200213 .toQuery();
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000214 Krill qs = new Krill(q);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000215 qs.getMeta().getContext().left.setToken(true).setLength((short) 1);
216 qs.getMeta().getContext().right.setToken(true).setLength((short) 1);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000217 kr = ki.search(qs);
218 assertEquals((long) 10, kr.getTotalResults());
Nils Diewald67f54042014-09-27 14:53:38 +0000219
Akronf05fde62016-08-03 23:46:17 +0200220 assertEquals("[[{1:a}]]b ...", kr.getMatch(0).getSnippetBrackets());
221 assertEquals("... b[[{2:a}]]b", kr.getMatch(1).getSnippetBrackets());
222 assertEquals("... a[[{1:b}]]", kr.getMatch(2).getSnippetBrackets());
223 assertEquals("[[{1:a}]]b ...", kr.getMatch(3).getSnippetBrackets());
224 assertEquals("... b[[{2:a}]]", kr.getMatch(4).getSnippetBrackets());
225 assertEquals("[[{1:a}]]b ...", kr.getMatch(5).getSnippetBrackets());
226 assertEquals("... b[[{2:a}]]b", kr.getMatch(6).getSnippetBrackets());
227 assertEquals("... a[[{1:b}]]", kr.getMatch(7).getSnippetBrackets());
228 assertEquals("[[{1:a}]]b ...", kr.getMatch(8).getSnippetBrackets());
229 assertEquals("... b[[{2:a}]]", kr.getMatch(9).getSnippetBrackets());
Nils Diewald67f54042014-09-27 14:53:38 +0000230
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000231 qs.getMeta().getContext().left.setToken(true).setLength((short) 0);
232 qs.getMeta().getContext().right.setToken(true).setLength((short) 0);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000233 kr = ki.search(qs);
234 assertEquals((long) 10, kr.getTotalResults());
Nils Diewald67f54042014-09-27 14:53:38 +0000235
Akronf05fde62016-08-03 23:46:17 +0200236 assertEquals("[[{1:a}]] ...", kr.getMatch(0).getSnippetBrackets());
237 assertEquals("... [[{2:a}]] ...", kr.getMatch(1).getSnippetBrackets());
238 assertEquals("... [[{1:b}]]", kr.getMatch(2).getSnippetBrackets());
239 assertEquals("[[{1:a}]] ...", kr.getMatch(3).getSnippetBrackets());
240 assertEquals("... [[{2:a}]]", kr.getMatch(4).getSnippetBrackets());
241 assertEquals("[[{1:a}]] ...", kr.getMatch(5).getSnippetBrackets());
242 assertEquals("... [[{2:a}]] ...", kr.getMatch(6).getSnippetBrackets());
243 assertEquals("... [[{1:b}]]", kr.getMatch(7).getSnippetBrackets());
244 assertEquals("[[{1:a}]] ...", kr.getMatch(8).getSnippetBrackets());
245 assertEquals("... [[{2:a}]]", kr.getMatch(9).getSnippetBrackets());
Nils Diewald67f54042014-09-27 14:53:38 +0000246
Eliza Margaretha6f989202016-10-14 21:48:29 +0200247 q = (SpanQuery) kq
Akron4f52a632018-02-09 19:02:40 +0100248 .nr(3, kq.or(kq.nr(1, kq.seg("i:a"))).or(kq.nr(2, kq.seg("i:c"))))
Nils Diewaldbb33da22015-03-04 16:24:25 +0000249 .toQuery();
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000250 qs = new Krill(q);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000251 qs.getMeta().getContext().left.setToken(true).setLength((short) 0);
252 qs.getMeta().getContext().right.setToken(true).setLength((short) 0);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000253 kr = ki.search(qs);
254 assertEquals((long) 10, kr.getTotalResults());
Nils Diewald67f54042014-09-27 14:53:38 +0000255
Akron417eaa92017-01-13 18:00:15 +0100256 assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(0).getSnippetBrackets());
257 assertEquals("... [[{2:{3:a}}]] ...",
Eliza Margaretha6f989202016-10-14 21:48:29 +0200258 kr.getMatch(1).getSnippetBrackets());
Akron417eaa92017-01-13 18:00:15 +0100259 assertEquals("... [[{1:{3:b}}]]", kr.getMatch(2).getSnippetBrackets());
260 assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(3).getSnippetBrackets());
261 assertEquals("... [[{2:{3:a}}]]", kr.getMatch(4).getSnippetBrackets());
262 assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(5).getSnippetBrackets());
263 assertEquals("... [[{2:{3:a}}]] ...",
Eliza Margaretha6f989202016-10-14 21:48:29 +0200264 kr.getMatch(6).getSnippetBrackets());
Akron417eaa92017-01-13 18:00:15 +0100265 assertEquals("... [[{1:{3:b}}]]", kr.getMatch(7).getSnippetBrackets());
266 assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(8).getSnippetBrackets());
267 assertEquals("... [[{2:{3:a}}]]", kr.getMatch(9).getSnippetBrackets());
Nils Diewald67f54042014-09-27 14:53:38 +0000268 };
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000269
Nils Diewaldbb33da22015-03-04 16:24:25 +0000270
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000271 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000272 public void highlightGreaterClassBug () throws IOException, QueryException {
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000273
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000274 // Construct index
Nils Diewalda14ecd62015-02-26 21:00:20 +0000275 KrillIndex ki = new KrillIndex();
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000276 // Indexing test files
Nils Diewaldbb33da22015-03-04 16:24:25 +0000277 for (String i : new String[] { "00001", "00002" }) {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200278 ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
Nils Diewaldbb33da22015-03-04 16:24:25 +0000279 true);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000280 };
281 ki.commit();
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000282
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000283 // 15
Eliza Margaretha6f989202016-10-14 21:48:29 +0200284 String json = getJsonString(getClass()
285 .getResource("/queries/bugs/greater_highlights_15.jsonld")
286 .getFile());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000287
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000288 Krill ks = new Krill(json);
Nils Diewald884dbcf2015-02-27 17:02:28 +0000289 Result kr = ks.apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000290 assertEquals(kr.getSerialQuery(), "{15: tokens:s:Alphabet}");
291 assertEquals(kr.getTotalResults(), 7);
292 assertEquals(kr.getStartIndex(), 0);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200293 assertEquals(kr.getMatch(0).getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200294 "... 2. Herkunft Die aus dem proto-semitischen [[{15:Alphabet}]] stammende Urform des Buchstaben ist wahrscheinlich ...");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200295 assertEquals(kr.getMatch(0).getSnippetHTML(),
Akronf05fde62016-08-03 23:46:17 +0200296 "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark><mark class=\"class-15 level-0\">Alphabet</mark></mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000297
Eliza Margaretha6f989202016-10-14 21:48:29 +0200298 json = getJsonString(getClass()
299 .getResource("/queries/bugs/greater_highlights_16.jsonld")
300 .getFile());
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000301
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000302 // 16
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000303 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000304 kr = ks.apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000305 assertEquals(kr.getSerialQuery(), "{16: tokens:s:Alphabet}");
306 assertEquals(kr.getTotalResults(), 7);
307 assertEquals(kr.getStartIndex(), 0);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200308 assertEquals(kr.getMatch(0).getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200309 "... 2. Herkunft Die aus dem proto-semitischen [[{16:Alphabet}]] stammende Urform des Buchstaben ist wahrscheinlich ...");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200310 assertEquals(kr.getMatch(0).getSnippetHTML(),
Akronf05fde62016-08-03 23:46:17 +0200311 "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark><mark class=\"class-16 level-0\">Alphabet</mark></mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000312
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000313 // 127
Eliza Margaretha6f989202016-10-14 21:48:29 +0200314 json = getJsonString(getClass()
315 .getResource("/queries/bugs/greater_highlights_127.jsonld")
316 .getFile());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000317
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000318 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000319 kr = ks.apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000320 assertEquals(kr.getSerialQuery(), "{127: tokens:s:Alphabet}");
321 assertEquals(kr.getTotalResults(), 7);
322 assertEquals(kr.getStartIndex(), 0);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200323 assertEquals(kr.getMatch(0).getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200324 "... 2. Herkunft Die aus dem proto-semitischen [[{127:Alphabet}]] stammende Urform des Buchstaben ist wahrscheinlich ...");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200325 assertEquals(kr.getMatch(0).getSnippetHTML(),
Akronf05fde62016-08-03 23:46:17 +0200326 "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark><mark class=\"class-127 level-0\">Alphabet</mark></mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000327
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000328 // 255
Eliza Margaretha6f989202016-10-14 21:48:29 +0200329 json = getJsonString(getClass()
330 .getResource("/queries/bugs/greater_highlights_255.jsonld")
331 .getFile());
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000332
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000333 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000334 kr = ks.apply(ki);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000335 assertEquals(kr.getSerialQuery(), "{255: tokens:s:Alphabet}");
336 assertEquals(kr.getTotalResults(), 7);
337 assertEquals(kr.getStartIndex(), 0);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200338 assertEquals(kr.getMatch(0).getSnippetBrackets(),
Akronf05fde62016-08-03 23:46:17 +0200339 "... 2. Herkunft Die aus dem proto-semitischen [[Alphabet]] stammende Urform des Buchstaben ist wahrscheinlich ...");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200340 assertEquals(kr.getMatch(0).getSnippetHTML(),
Akronf05fde62016-08-03 23:46:17 +0200341 "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark>Alphabet</mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000342
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000343 // 300
Eliza Margaretha6f989202016-10-14 21:48:29 +0200344 json = getJsonString(getClass()
345 .getResource("/queries/bugs/greater_highlights_300.jsonld")
346 .getFile());
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000347
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000348 ks = new Krill(json);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000349 kr = ks.apply(ki);
Nils Diewaldc99ed5b2015-01-21 22:08:53 +0000350 assertEquals(709, kr.getError(0).getCode());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200351 assertEquals("Valid class numbers exceeded",
352 kr.getError(0).getMessage());
Nils Diewaldc471b182014-11-19 22:51:15 +0000353
Nils Diewaldbb33da22015-03-04 16:24:25 +0000354 assertEquals(kr.getError(0).getMessage(),
355 "Valid class numbers exceeded");
Nils Diewald52bd1cd2014-11-06 20:44:24 +0000356 };
Akronfc2625e2016-07-27 01:52:28 +0200357
Akron08f4ceb2016-08-03 23:53:32 +0200358
Akronfc2625e2016-07-27 01:52:28 +0200359 @Test
360 public void highlightEscapes () throws IOException, QueryException {
361 KrillIndex ki = new KrillIndex();
362 FieldDocument fd = new FieldDocument();
363 fd.addString("ID", "doc-1");
364 fd.addString("UID", "1");
365 fd.addString("textSigle", "c1/d1/1");
366
367 // Make this clean for HTML and Brackets!
368
Eliza Margaretha6f989202016-10-14 21:48:29 +0200369 fd.addTV("base", "Mit \"Mann\" & {Ma\\us}",
Akron08f4ceb2016-08-03 23:53:32 +0200370 "[(0-3)s:Mit|i:mit|_0#0-3|-:t$<i>4|<>:base/t:t$<b>64<i>0<i>20<i>4<b>0]"
371 + "[(4-10)s:\"Mann\"|i:\"mann\"|base/l:\"Mann\"|_1#4-10]"
372 + "[(11-12)s:&|i:&|base/l:&|_2#11-12]"
373 + "[(13-20)s:{Ma\\us}|i:{ma\\us}|_3#13-20]");
Akronfc2625e2016-07-27 01:52:28 +0200374 ki.addDoc(fd);
375
376 // Commit!
377 ki.commit();
378 QueryBuilder kq = new QueryBuilder("base");
379 SpanQuery q = (SpanQuery) kq.tag("base/t:t").toQuery();
380
381 Krill qs = new Krill(q);
382 qs.getMeta().getContext().left.setToken(true).setLength((short) 0);
383 qs.getMeta().getContext().right.setToken(true).setLength((short) 0);
384
385 Result kr = ki.search(qs);
386 assertEquals((long) 1, kr.getTotalResults());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200387 assertEquals("[[Mit \"Mann\" & \\{Ma\\\\us\\}]]",
388 kr.getMatch(0).getSnippetBrackets());
Akron08f4ceb2016-08-03 23:53:32 +0200389 assertEquals(
390 "<span class=\"context-left\"></span><span class=\"match\"><mark>Mit &quot;Mann&quot; &amp; {Ma\\us}</mark></span><span class=\"context-right\"></span>",
391 kr.getMatch(0).getSnippetHTML());
Akronfc2625e2016-07-27 01:52:28 +0200392 assertEquals("match-c1/d1/1-p0-4", kr.getMatch(0).getID());
393
Akron08f4ceb2016-08-03 23:53:32 +0200394 Match km = ki.getMatchInfo("match-c1/d1/1-p0-4", "base", true,
395 (ArrayList) null, (ArrayList) null, true, true, false);
Akronfc2625e2016-07-27 01:52:28 +0200396 assertEquals(0, km.getStartPos());
Eliza Margaretha6f989202016-10-14 21:48:29 +0200397 assertEquals(
398 "<span class=\"context-left\"></span>"
399 + "<span class=\"match\"><mark><span title=\"base/t:t\">"
400 + "Mit " + "<span title=\"base/l:&quot;Mann&quot;\">"
401 + "&quot;Mann&quot;" + "</span>" + " "
402 + "<span title=\"base/l:&amp;\">&amp;</span>" + " "
403 + "{Ma\\us}" + "</span>" + "</mark></span>"
404 + "<span class=\"context-right\"></span>",
405 km.getSnippetHTML());
Akronfc2625e2016-07-27 01:52:28 +0200406 };
Akron8288ad02016-11-11 19:23:05 +0100407
408
409 @Test
410 public void highlightEmptySpan () throws IOException, QueryException {
411
412 KrillIndex ki = new KrillIndex();
413
414 // <>:s$<b>65<i>38<b>0
415 // <a>x<a>y<a>zhij</a>hij</a>hij</a>hij</a>
416 FieldDocument fd = new FieldDocument();
417 fd.addTV("base", "x y z h i j h i j h i j ",
418 "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]"
419 + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]"
420 + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6|<>:a$<b>65<i>6]"
421 + "[(9-12)s:h<b>0]" + "[(12-15)s:i]" + "[(15-18)s:j]"
422 + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]"
423 + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
424 ki.addDoc(fd);
425
426 // Commit!
427 ki.commit();
428 QueryBuilder kq = new QueryBuilder("base");
429 SpanQuery q = (SpanQuery) kq.tag("a").toQuery();
430
431 Krill qs = new Krill(q);
432 qs.getMeta().getContext().left.setToken(true).setLength((short) 5);
433 qs.getMeta().getContext().right.setToken(true).setLength((short) 5);
434
435 Result kr = ki.search(qs);
436 assertEquals((long) 4, kr.getTotalResults());
437
438 Match km = kr.getMatch(2);
439 assertEquals(
440 "<span class=\"context-left\">"+
441 "</span>"+
442 "<span class=\"match\">"+
443 "<mark>x y z </mark>"+
444 "</span><span class=\"context-right\">h i j h i j h i j </span>",
445 km.getSnippetHTML());
446
447 km = kr.getMatch(3);
448 assertEquals(
Akron1dd062d2016-11-11 23:21:46 +0100449 "<span class=\"context-left\"><span class=\"match\"></span></span>",
Akron8288ad02016-11-11 19:23:05 +0100450 km.getSnippetHTML());
451
452 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000453};