| Eliza Margaretha | 269e5a6 | 2014-09-30 16:58:23 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.highlight; |
| 2 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 3 | import java.util.*; |
| 4 | import java.io.IOException; |
| 5 | |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 6 | import org.apache.lucene.search.spans.SpanQuery; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 7 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 8 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | 0339d46 | 2015-02-26 14:53:56 +0000 | [diff] [blame] | 9 | import de.ids_mannheim.korap.KrillQuery; |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 10 | import de.ids_mannheim.korap.query.QueryBuilder; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 11 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 12 | import de.ids_mannheim.korap.Krill; |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 13 | import de.ids_mannheim.korap.response.Match; |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.index.FieldDocument; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 15 | |
| Nils Diewald | be5943e | 2014-10-21 19:35:34 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.util.QueryException; |
| 17 | |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 18 | import static de.ids_mannheim.korap.TestSimple.*; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 19 | |
| 20 | import static org.junit.Assert.*; |
| 21 | import org.junit.Test; |
| 22 | import org.junit.Ignore; |
| 23 | import org.junit.runner.RunWith; |
| 24 | import org.junit.runners.JUnit4; |
| 25 | |
| 26 | @RunWith(JUnit4.class) |
| 27 | public class TestHighlight { // extends LuceneTestCase { |
| 28 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 29 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 30 | public void checkHighlights () throws IOException, QueryException { |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 31 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 32 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 33 | String json = new String("{" + " \"fields\" : [" + " { " |
| 34 | + " \"primaryData\" : \"abc\"" + " }," + " {" |
| 35 | + " \"name\" : \"tokens\"," + " \"data\" : [" |
| 36 | + " [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"]," |
| 37 | + " [ \"s:b\", \"i:b\", \"_1#1-2\" ]," |
| 38 | + " [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + " ]" |
| 39 | + " }" + " ]" + "}"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 40 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 41 | FieldDocument fd = ki.addDoc(json); |
| 42 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 43 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 44 | |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 45 | QueryBuilder kq = new QueryBuilder("tokens"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 46 | Result kr = ki |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 47 | .search((SpanQuery) kq.seq(kq.nr(1, kq.seg("s:b"))).toQuery()); |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 48 | Match km = kr.getMatch(0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 49 | assertEquals(km.getStartPos(), 1); |
| 50 | assertEquals(km.getEndPos(), 2); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 51 | assertEquals(km.getStartPos(1), 1); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 52 | assertEquals(km.getEndPos(1), 2); |
| 53 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 54 | "<span class=\"context-left\">a</span><span class=\"match\"><mark><mark class=\"class-1 level-0\">b</mark></mark></span><span class=\"context-right\">c</span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 55 | km.getSnippetHTML()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 56 | |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 57 | kr = ki.search((SpanQuery) kq.seq(kq.nr(1, kq.seg("s:b"))) |
| 58 | .append(kq.nr(2, kq.seg("s:c"))).toQuery()); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 59 | km = kr.getMatch(0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 60 | assertEquals(km.getStartPos(), 1); |
| 61 | assertEquals(km.getEndPos(), 3); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 62 | assertEquals(km.getStartPos(1), 1); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 63 | assertEquals(km.getEndPos(1), 2); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 64 | assertEquals(km.getStartPos(2), 2); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 65 | assertEquals(km.getEndPos(2), 3); |
| 66 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 67 | "<span class=\"context-left\">a</span><span class=\"match\"><mark><mark class=\"class-1 level-0\">b</mark><mark class=\"class-2 level-0\">c</mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 68 | km.getSnippetHTML()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 69 | |
| Nils Diewald | 8c22178 | 2013-12-13 19:52:58 +0000 | [diff] [blame] | 70 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 71 | kr = ki.search((SpanQuery) kq |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 72 | .seq(kq.nr(1, kq.seq(kq.seg("s:a")).append(kq.seg("s:b")))) |
| 73 | .append(kq.nr(2, kq.seg("s:c"))).toQuery()); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 74 | km = kr.getMatch(0); |
| 75 | assertEquals(km.getStartPos(), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 76 | assertEquals(km.getEndPos(), 3); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 77 | assertEquals(km.getStartPos(1), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 78 | assertEquals(km.getEndPos(1), 2); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 79 | assertEquals(km.getStartPos(2), 2); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 80 | assertEquals(km.getEndPos(2), 3); |
| 81 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 82 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-1 level-0\">ab</mark><mark class=\"class-2 level-0\">c</mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 83 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 84 | |
| 85 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 86 | kr = ki.search( |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 87 | (SpanQuery) kq.nr( |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 88 | 3, kq |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 89 | .seq(kq.nr(1, |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 90 | kq.seq(kq.seg("s:a")) |
| 91 | .append(kq.seg("s:b")))) |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 92 | .append(kq.nr(2, kq.seg("s:c")))) |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 93 | .toQuery()); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 94 | km = kr.getMatch(0); |
| 95 | assertEquals(km.getStartPos(), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 96 | assertEquals(km.getEndPos(), 3); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 97 | assertEquals(km.getStartPos(1), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 98 | assertEquals(km.getEndPos(1), 2); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 99 | assertEquals(km.getStartPos(2), 2); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 100 | assertEquals(km.getEndPos(2), 3); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 101 | assertEquals(km.getStartPos(3), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 102 | assertEquals(km.getEndPos(3), 3); |
| 103 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 104 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-3 level-0\"><mark class=\"class-1 level-1\">ab</mark><mark class=\"class-2 level-1\">c</mark></mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 105 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 106 | }; |
| 107 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 108 | |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 109 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 110 | public void checkHighlightsManually () throws IOException, QueryException { |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 111 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 112 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 113 | String json = new String("{" + " \"fields\" : [" + " { " |
| 114 | + " \"primaryData\" : \"abc\"" + " }," + " {" |
| 115 | + " \"name\" : \"tokens\"," + " \"data\" : [" |
| 116 | + " [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"]," |
| 117 | + " [ \"s:b\", \"i:b\", \"_1#1-2\" ]," |
| 118 | + " [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + " ]" |
| 119 | + " }" + " ]" + "}"); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 120 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 121 | FieldDocument fd = ki.addDoc(json); |
| 122 | ki.commit(); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 123 | |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 124 | QueryBuilder kq = new QueryBuilder("tokens"); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 125 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 126 | Result kr = ki.search((SpanQuery) kq.seq(kq.seg("s:a")) |
| 127 | .append(kq.seg("s:b")).append(kq.seg("s:c")).toQuery()); |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 128 | Match km = kr.getMatch(0); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 129 | km.addHighlight(0, 1, (short) 7); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 130 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 131 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">ab</mark>c</mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 132 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 133 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 134 | km.addHighlight(1, 2, (short) 6); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 135 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 136 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">a<mark class=\"class-6 level-1\">b</mark></mark><mark class=\"class-6 level-1\">c</mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 137 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 138 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 139 | km.addHighlight(0, 1, (short) 5); |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 140 | assertEquals("[[{5:{7:a{6:b}}}{6:c}]]", km.getSnippetBrackets()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 141 | assertEquals( |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 142 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-5 level-0\"><mark class=\"class-7 level-1\">a<mark class=\"class-6 level-2\">b</mark></mark></mark><mark class=\"class-6 level-2\">c</mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 143 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 144 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 145 | }; |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 146 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 147 | |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 148 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 149 | public void highlightMissingBug () throws IOException, QueryException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 150 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 151 | FieldDocument fd = new FieldDocument(); |
| 152 | fd.addString("ID", "doc-1"); |
| 153 | fd.addString("UID", "1"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 154 | fd.addTV("base", "abab", |
| 155 | "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" |
| 156 | + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]"); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 157 | ki.addDoc(fd); |
| 158 | fd = new FieldDocument(); |
| 159 | fd.addString("ID", "doc-2"); |
| 160 | fd.addString("UID", "2"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 161 | fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" |
| 162 | + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]"); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 163 | ki.addDoc(fd); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 164 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 165 | // Commit! |
| 166 | ki.commit(); |
| 167 | fd = new FieldDocument(); |
| 168 | fd.addString("ID", "doc-3"); |
| 169 | fd.addString("UID", "3"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 170 | fd.addTV("base", "abab", |
| 171 | "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" |
| 172 | + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]"); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 173 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 174 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 175 | // Commit! |
| 176 | ki.commit(); |
| 177 | fd = new FieldDocument(); |
| 178 | fd.addString("ID", "doc-4"); |
| 179 | fd.addString("UID", "4"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 180 | fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" |
| 181 | + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]"); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 182 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 183 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 184 | // Commit! |
| 185 | ki.commit(); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 186 | |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 187 | QueryBuilder kq = new QueryBuilder("base"); |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 188 | SpanQuery q = (SpanQuery) kq.or(kq.nr(1, kq.seg("s:a"))) |
| 189 | .or(kq.nr(2, kq.seg("s:b"))).toQuery(); |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 190 | Result kr = ki.search(q); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 191 | assertEquals((long) 14, kr.getTotalResults()); |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 192 | assertEquals("[[{1:a}]]bab", kr.getMatch(0).getSnippetBrackets()); |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 193 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 194 | assertEquals("a[[{2:b}]]ab", kr.getMatch(1).getSnippetBrackets()); |
| 195 | assertEquals("ab[[{1:a}]]b", kr.getMatch(2).getSnippetBrackets()); |
| 196 | assertEquals("aba[[{2:b}]]", kr.getMatch(3).getSnippetBrackets()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 197 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 198 | assertEquals("[[{1:a}]]ba", kr.getMatch(4).getSnippetBrackets()); |
| 199 | assertEquals("a[[{2:b}]]a", kr.getMatch(5).getSnippetBrackets()); |
| 200 | assertEquals("ab[[{1:a}]]", kr.getMatch(6).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 201 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 202 | assertEquals("[[{1:a}]]bab", kr.getMatch(7).getSnippetBrackets()); |
| 203 | assertEquals("a[[{2:b}]]ab", kr.getMatch(8).getSnippetBrackets()); |
| 204 | assertEquals("ab[[{1:a}]]b", kr.getMatch(9).getSnippetBrackets()); |
| 205 | assertEquals("aba[[{2:b}]]", kr.getMatch(10).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 206 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 207 | assertEquals("[[{1:a}]]ba", kr.getMatch(11).getSnippetBrackets()); |
| 208 | assertEquals("a[[{2:b}]]a", kr.getMatch(12).getSnippetBrackets()); |
| 209 | assertEquals("ab[[{1:a}]]", kr.getMatch(13).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 210 | |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 211 | kq = new QueryBuilder("base"); |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 212 | q = (SpanQuery) kq.or(kq.nr(1, kq.seg("i:a"))).or(kq.nr(2, kq.seg("i:c"))) |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 213 | .toQuery(); |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 214 | Krill qs = new Krill(q); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 215 | qs.getMeta().getContext().left.setToken(true).setLength((short) 1); |
| 216 | qs.getMeta().getContext().right.setToken(true).setLength((short) 1); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 217 | kr = ki.search(qs); |
| 218 | assertEquals((long) 10, kr.getTotalResults()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 219 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 220 | assertEquals("[[{1:a}]]b ...", kr.getMatch(0).getSnippetBrackets()); |
| 221 | assertEquals("... b[[{2:a}]]b", kr.getMatch(1).getSnippetBrackets()); |
| 222 | assertEquals("... a[[{1:b}]]", kr.getMatch(2).getSnippetBrackets()); |
| 223 | assertEquals("[[{1:a}]]b ...", kr.getMatch(3).getSnippetBrackets()); |
| 224 | assertEquals("... b[[{2:a}]]", kr.getMatch(4).getSnippetBrackets()); |
| 225 | assertEquals("[[{1:a}]]b ...", kr.getMatch(5).getSnippetBrackets()); |
| 226 | assertEquals("... b[[{2:a}]]b", kr.getMatch(6).getSnippetBrackets()); |
| 227 | assertEquals("... a[[{1:b}]]", kr.getMatch(7).getSnippetBrackets()); |
| 228 | assertEquals("[[{1:a}]]b ...", kr.getMatch(8).getSnippetBrackets()); |
| 229 | assertEquals("... b[[{2:a}]]", kr.getMatch(9).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 230 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 231 | qs.getMeta().getContext().left.setToken(true).setLength((short) 0); |
| 232 | qs.getMeta().getContext().right.setToken(true).setLength((short) 0); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 233 | kr = ki.search(qs); |
| 234 | assertEquals((long) 10, kr.getTotalResults()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 235 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 236 | assertEquals("[[{1:a}]] ...", kr.getMatch(0).getSnippetBrackets()); |
| 237 | assertEquals("... [[{2:a}]] ...", kr.getMatch(1).getSnippetBrackets()); |
| 238 | assertEquals("... [[{1:b}]]", kr.getMatch(2).getSnippetBrackets()); |
| 239 | assertEquals("[[{1:a}]] ...", kr.getMatch(3).getSnippetBrackets()); |
| 240 | assertEquals("... [[{2:a}]]", kr.getMatch(4).getSnippetBrackets()); |
| 241 | assertEquals("[[{1:a}]] ...", kr.getMatch(5).getSnippetBrackets()); |
| 242 | assertEquals("... [[{2:a}]] ...", kr.getMatch(6).getSnippetBrackets()); |
| 243 | assertEquals("... [[{1:b}]]", kr.getMatch(7).getSnippetBrackets()); |
| 244 | assertEquals("[[{1:a}]] ...", kr.getMatch(8).getSnippetBrackets()); |
| 245 | assertEquals("... [[{2:a}]]", kr.getMatch(9).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 246 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 247 | q = (SpanQuery) kq |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 248 | .nr(3, kq.or(kq.nr(1, kq.seg("i:a"))).or(kq.nr(2, kq.seg("i:c")))) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 249 | .toQuery(); |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 250 | qs = new Krill(q); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 251 | qs.getMeta().getContext().left.setToken(true).setLength((short) 0); |
| 252 | qs.getMeta().getContext().right.setToken(true).setLength((short) 0); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 253 | kr = ki.search(qs); |
| 254 | assertEquals((long) 10, kr.getTotalResults()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 255 | |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 256 | assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(0).getSnippetBrackets()); |
| 257 | assertEquals("... [[{2:{3:a}}]] ...", |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 258 | kr.getMatch(1).getSnippetBrackets()); |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 259 | assertEquals("... [[{1:{3:b}}]]", kr.getMatch(2).getSnippetBrackets()); |
| 260 | assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(3).getSnippetBrackets()); |
| 261 | assertEquals("... [[{2:{3:a}}]]", kr.getMatch(4).getSnippetBrackets()); |
| 262 | assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(5).getSnippetBrackets()); |
| 263 | assertEquals("... [[{2:{3:a}}]] ...", |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 264 | kr.getMatch(6).getSnippetBrackets()); |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 265 | assertEquals("... [[{1:{3:b}}]]", kr.getMatch(7).getSnippetBrackets()); |
| 266 | assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(8).getSnippetBrackets()); |
| 267 | assertEquals("... [[{2:{3:a}}]]", kr.getMatch(9).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 268 | }; |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 269 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 270 | |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 271 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 272 | public void highlightGreaterClassBug () throws IOException, QueryException { |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 273 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 274 | // Construct index |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 275 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 276 | // Indexing test files |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 277 | for (String i : new String[] { "00001", "00002" }) { |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 278 | ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 279 | true); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 280 | }; |
| 281 | ki.commit(); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 282 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 283 | // 15 |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 284 | String json = getJsonString(getClass() |
| 285 | .getResource("/queries/bugs/greater_highlights_15.jsonld") |
| 286 | .getFile()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 287 | |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 288 | Krill ks = new Krill(json); |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 289 | Result kr = ks.apply(ki); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 290 | assertEquals(kr.getSerialQuery(), "{15: tokens:s:Alphabet}"); |
| 291 | assertEquals(kr.getTotalResults(), 7); |
| 292 | assertEquals(kr.getStartIndex(), 0); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 293 | assertEquals(kr.getMatch(0).getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 294 | "... 2. Herkunft Die aus dem proto-semitischen [[{15:Alphabet}]] stammende Urform des Buchstaben ist wahrscheinlich ..."); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 295 | assertEquals(kr.getMatch(0).getSnippetHTML(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 296 | "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark><mark class=\"class-15 level-0\">Alphabet</mark></mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 297 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 298 | json = getJsonString(getClass() |
| 299 | .getResource("/queries/bugs/greater_highlights_16.jsonld") |
| 300 | .getFile()); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 301 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 302 | // 16 |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 303 | ks = new Krill(json); |
| Nils Diewald | 3aa9e69 | 2015-02-20 22:20:11 +0000 | [diff] [blame] | 304 | kr = ks.apply(ki); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 305 | assertEquals(kr.getSerialQuery(), "{16: tokens:s:Alphabet}"); |
| 306 | assertEquals(kr.getTotalResults(), 7); |
| 307 | assertEquals(kr.getStartIndex(), 0); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 308 | assertEquals(kr.getMatch(0).getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 309 | "... 2. Herkunft Die aus dem proto-semitischen [[{16:Alphabet}]] stammende Urform des Buchstaben ist wahrscheinlich ..."); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 310 | assertEquals(kr.getMatch(0).getSnippetHTML(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 311 | "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark><mark class=\"class-16 level-0\">Alphabet</mark></mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 312 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 313 | // 127 |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 314 | json = getJsonString(getClass() |
| 315 | .getResource("/queries/bugs/greater_highlights_127.jsonld") |
| 316 | .getFile()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 317 | |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 318 | ks = new Krill(json); |
| Nils Diewald | 3aa9e69 | 2015-02-20 22:20:11 +0000 | [diff] [blame] | 319 | kr = ks.apply(ki); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 320 | assertEquals(kr.getSerialQuery(), "{127: tokens:s:Alphabet}"); |
| 321 | assertEquals(kr.getTotalResults(), 7); |
| 322 | assertEquals(kr.getStartIndex(), 0); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 323 | assertEquals(kr.getMatch(0).getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 324 | "... 2. Herkunft Die aus dem proto-semitischen [[{127:Alphabet}]] stammende Urform des Buchstaben ist wahrscheinlich ..."); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 325 | assertEquals(kr.getMatch(0).getSnippetHTML(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 326 | "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark><mark class=\"class-127 level-0\">Alphabet</mark></mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 327 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 328 | // 255 |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 329 | json = getJsonString(getClass() |
| 330 | .getResource("/queries/bugs/greater_highlights_255.jsonld") |
| 331 | .getFile()); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 332 | |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 333 | ks = new Krill(json); |
| Nils Diewald | 3aa9e69 | 2015-02-20 22:20:11 +0000 | [diff] [blame] | 334 | kr = ks.apply(ki); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 335 | assertEquals(kr.getSerialQuery(), "{255: tokens:s:Alphabet}"); |
| 336 | assertEquals(kr.getTotalResults(), 7); |
| 337 | assertEquals(kr.getStartIndex(), 0); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 338 | assertEquals(kr.getMatch(0).getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 339 | "... 2. Herkunft Die aus dem proto-semitischen [[Alphabet]] stammende Urform des Buchstaben ist wahrscheinlich ..."); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 340 | assertEquals(kr.getMatch(0).getSnippetHTML(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 341 | "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark>Alphabet</mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 342 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 343 | // 300 |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 344 | json = getJsonString(getClass() |
| 345 | .getResource("/queries/bugs/greater_highlights_300.jsonld") |
| 346 | .getFile()); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 347 | |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 348 | ks = new Krill(json); |
| Nils Diewald | 3aa9e69 | 2015-02-20 22:20:11 +0000 | [diff] [blame] | 349 | kr = ks.apply(ki); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 350 | assertEquals(709, kr.getError(0).getCode()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 351 | assertEquals("Valid class numbers exceeded", |
| 352 | kr.getError(0).getMessage()); |
| Nils Diewald | c471b18 | 2014-11-19 22:51:15 +0000 | [diff] [blame] | 353 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 354 | assertEquals(kr.getError(0).getMessage(), |
| 355 | "Valid class numbers exceeded"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 356 | }; |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 357 | |
| Akron | 75d0f38 | 2019-03-15 14:56:03 +0100 | [diff] [blame] | 358 | @Test |
| 359 | public void highlightSnippetOffsetBug () throws IOException, QueryException { |
| 360 | KrillIndex ki = new KrillIndex(); |
| 361 | ki.addDoc(getClass().getResourceAsStream("/wiki/WUD17-G97-20422.json.gz"), true); |
| 362 | ki.commit(); |
| 363 | |
| 364 | /* |
| 365 | QueryBuilder kq = new QueryBuilder("tokens"); |
| 366 | SpanQuery q = (SpanQuery) kq.seg("s:Sockenpuppe").toQuery(); |
| 367 | |
| 368 | Krill qs = new Krill(q); |
| 369 | qs.getMeta().getContext().left.setToken(true).setLength((short) 0); |
| 370 | qs.getMeta().getContext().right.setToken(true).setLength((short) 0); |
| 371 | Result kr = ki.search(qs); |
| 372 | */ |
| 373 | Match km; |
| 374 | |
| 375 | km = ki.getMatch("match-WUD17/G97/20422-p1020-1021"); |
| 376 | assertEquals(km.getSnippetBrackets(), "... [[Madonna]] ..."); |
| 377 | |
| 378 | km = ki.getMatch("match-WUD17/G97/20422-p1030-1031"); |
| 379 | assertEquals(km.getSnippetBrackets(), "... [[Kurier]] ..."); |
| 380 | |
| 381 | km = ki.getMatch("match-WUD17/G97/20422-p1032-1033"); |
| 382 | assertEquals(km.getSnippetBrackets(), "... [[Spalte]] ..."); |
| 383 | |
| 384 | // There is a surrogate between 6500, 6600 that makes the substring |
| 385 | // broken, as the original substring works on utf-8, but Java works on utf-16 |
| 386 | |
| 387 | km = ki.getMatch("match-WUD17/G97/20422-p1033-1034"); |
| 388 | assertEquals(km.getSnippetBrackets(), "... [[Neue]] ..."); |
| 389 | |
| 390 | km = ki.getMatch("match-WUD17/G97/20422-p1034-1035"); |
| 391 | assertEquals(km.getSnippetBrackets(), "... [[Artikel]] ..."); |
| 392 | |
| 393 | km = ki.getMatch("match-WUD17/G97/20422-p5707-5708"); |
| 394 | assertEquals(km.getSnippetBrackets(), "... [[Sockenpuppe]] ..."); |
| 395 | } |
| 396 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 397 | |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 398 | @Test |
| 399 | public void highlightEscapes () throws IOException, QueryException { |
| 400 | KrillIndex ki = new KrillIndex(); |
| 401 | FieldDocument fd = new FieldDocument(); |
| 402 | fd.addString("ID", "doc-1"); |
| 403 | fd.addString("UID", "1"); |
| 404 | fd.addString("textSigle", "c1/d1/1"); |
| 405 | |
| 406 | // Make this clean for HTML and Brackets! |
| 407 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 408 | fd.addTV("base", "Mit \"Mann\" & {Ma\\us}", |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 409 | "[(0-3)s:Mit|i:mit|_0#0-3|-:t$<i>4|<>:base/t:t$<b>64<i>0<i>20<i>4<b>0]" |
| 410 | + "[(4-10)s:\"Mann\"|i:\"mann\"|base/l:\"Mann\"|_1#4-10]" |
| 411 | + "[(11-12)s:&|i:&|base/l:&|_2#11-12]" |
| 412 | + "[(13-20)s:{Ma\\us}|i:{ma\\us}|_3#13-20]"); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 413 | ki.addDoc(fd); |
| 414 | |
| 415 | // Commit! |
| 416 | ki.commit(); |
| 417 | QueryBuilder kq = new QueryBuilder("base"); |
| 418 | SpanQuery q = (SpanQuery) kq.tag("base/t:t").toQuery(); |
| 419 | |
| 420 | Krill qs = new Krill(q); |
| 421 | qs.getMeta().getContext().left.setToken(true).setLength((short) 0); |
| 422 | qs.getMeta().getContext().right.setToken(true).setLength((short) 0); |
| 423 | |
| 424 | Result kr = ki.search(qs); |
| 425 | assertEquals((long) 1, kr.getTotalResults()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 426 | assertEquals("[[Mit \"Mann\" & \\{Ma\\\\us\\}]]", |
| 427 | kr.getMatch(0).getSnippetBrackets()); |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 428 | assertEquals( |
| 429 | "<span class=\"context-left\"></span><span class=\"match\"><mark>Mit "Mann" & {Ma\\us}</mark></span><span class=\"context-right\"></span>", |
| 430 | kr.getMatch(0).getSnippetHTML()); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 431 | assertEquals("match-c1/d1/1-p0-4", kr.getMatch(0).getID()); |
| 432 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 433 | Match km = ki.getMatchInfo("match-c1/d1/1-p0-4", "base", true, |
| 434 | (ArrayList) null, (ArrayList) null, true, true, false); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 435 | assertEquals(0, km.getStartPos()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 436 | assertEquals( |
| 437 | "<span class=\"context-left\"></span>" |
| 438 | + "<span class=\"match\"><mark><span title=\"base/t:t\">" |
| 439 | + "Mit " + "<span title=\"base/l:"Mann"\">" |
| 440 | + ""Mann"" + "</span>" + " " |
| 441 | + "<span title=\"base/l:&\">&</span>" + " " |
| 442 | + "{Ma\\us}" + "</span>" + "</mark></span>" |
| 443 | + "<span class=\"context-right\"></span>", |
| 444 | km.getSnippetHTML()); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 445 | }; |
| Akron | 8288ad0 | 2016-11-11 19:23:05 +0100 | [diff] [blame] | 446 | |
| Akron | 9afad92 | 2020-03-04 07:18:12 +0100 | [diff] [blame] | 447 | |
| 448 | @Test |
| 449 | public void checkSpanHighlights () throws IOException, QueryException { |
| 450 | |
| 451 | KrillIndex ki = new KrillIndex(); |
| 452 | |
| 453 | FieldDocument fd = new FieldDocument(); |
| 454 | fd.addString("ID", "doc-1"); |
| 455 | fd.addString("UID", "1"); |
| 456 | fd.addString("textSigle", "c1/d1/1"); |
| 457 | fd.addTV("base", "abc", |
| 458 | "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3|<>:base/t:t$<b>64<i>0<i>3<i>3<b>0]" + |
| 459 | "[(1-2)s:b|i:b|base/l:B|_1#1-2|<>:corenlp/x:a$<b>64<i>1<i>2<i>2<b0>]" + |
| 460 | "[(2-3)s:c|i:c|base/l:C|_2#2-3]"); |
| 461 | ki.addDoc(fd); |
| 462 | ki.commit(); |
| 463 | |
| 464 | QueryBuilder kq = new QueryBuilder("base"); |
| 465 | Result kr = ki |
| 466 | .search((SpanQuery) kq.tag("base/t:t").toQuery()); |
| 467 | |
| 468 | Match km = kr.getMatch(0); |
| 469 | assertEquals(km.getStartPos(), 0); |
| 470 | assertEquals(km.getEndPos(), 3); |
| 471 | assertEquals("match-c1/d1/1-p0-3",km.getID()); |
| 472 | |
| 473 | km = ki.getMatchInfo("match-c1/d1/1-p0-3", "base", true, |
| 474 | (ArrayList) null, (ArrayList) null, true, true, false); |
| 475 | assertEquals(0, km.getStartPos()); |
| 476 | assertEquals(3, km.getEndPos()); |
| 477 | assertEquals("<span class=\"context-left\"></span>" + |
| 478 | "<span class=\"match\">"+ |
| 479 | "<mark>"+ |
| 480 | "<span title=\"base/t:t\">a"+ |
| 481 | "<span title=\"base/l:B\">"+ |
| 482 | "<span title=\"corenlp/x:a\">b</span>"+ |
| 483 | "</span>"+ |
| 484 | "<span title=\"base/l:C\">c</span>"+ |
| 485 | "</span>"+ |
| 486 | "</mark>"+ |
| 487 | "</span>"+ |
| 488 | "<span class=\"context-right\"></span>", km.getSnippetHTML()); |
| 489 | }; |
| 490 | |
| Akron | 8288ad0 | 2016-11-11 19:23:05 +0100 | [diff] [blame] | 491 | |
| 492 | @Test |
| 493 | public void highlightEmptySpan () throws IOException, QueryException { |
| 494 | |
| 495 | KrillIndex ki = new KrillIndex(); |
| 496 | |
| 497 | // <>:s$<b>65<i>38<b>0 |
| 498 | // <a>x<a>y<a>zhij</a>hij</a>hij</a>hij</a> |
| 499 | FieldDocument fd = new FieldDocument(); |
| 500 | fd.addTV("base", "x y z h i j h i j h i j ", |
| 501 | "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" |
| 502 | + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" |
| 503 | + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6|<>:a$<b>65<i>6]" |
| 504 | + "[(9-12)s:h<b>0]" + "[(12-15)s:i]" + "[(15-18)s:j]" |
| 505 | + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" |
| 506 | + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]"); |
| 507 | ki.addDoc(fd); |
| 508 | |
| 509 | // Commit! |
| 510 | ki.commit(); |
| 511 | QueryBuilder kq = new QueryBuilder("base"); |
| 512 | SpanQuery q = (SpanQuery) kq.tag("a").toQuery(); |
| 513 | |
| 514 | Krill qs = new Krill(q); |
| 515 | qs.getMeta().getContext().left.setToken(true).setLength((short) 5); |
| 516 | qs.getMeta().getContext().right.setToken(true).setLength((short) 5); |
| 517 | |
| 518 | Result kr = ki.search(qs); |
| 519 | assertEquals((long) 4, kr.getTotalResults()); |
| 520 | |
| 521 | Match km = kr.getMatch(2); |
| 522 | assertEquals( |
| 523 | "<span class=\"context-left\">"+ |
| 524 | "</span>"+ |
| 525 | "<span class=\"match\">"+ |
| 526 | "<mark>x y z </mark>"+ |
| 527 | "</span><span class=\"context-right\">h i j h i j h i j </span>", |
| 528 | km.getSnippetHTML()); |
| 529 | |
| 530 | km = kr.getMatch(3); |
| 531 | assertEquals( |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 532 | "<span class=\"context-left\"><span class=\"match\"></span></span>", |
| Akron | 8288ad0 | 2016-11-11 19:23:05 +0100 | [diff] [blame] | 533 | km.getSnippetHTML()); |
| 534 | |
| 535 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 536 | }; |