| Eliza Margaretha | 269e5a6 | 2014-09-30 16:58:23 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.highlight; |
| 2 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 3 | import java.util.*; |
| 4 | import java.io.IOException; |
| 5 | |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 6 | import org.apache.lucene.search.spans.SpanQuery; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 7 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 8 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | 0339d46 | 2015-02-26 14:53:56 +0000 | [diff] [blame] | 9 | import de.ids_mannheim.korap.KrillQuery; |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 10 | import de.ids_mannheim.korap.query.QueryBuilder; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 11 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 12 | import de.ids_mannheim.korap.Krill; |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 13 | import de.ids_mannheim.korap.response.Match; |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.index.FieldDocument; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 15 | |
| Nils Diewald | be5943e | 2014-10-21 19:35:34 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.util.QueryException; |
| 17 | |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 18 | import static de.ids_mannheim.korap.TestSimple.*; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 19 | |
| 20 | import static org.junit.Assert.*; |
| 21 | import org.junit.Test; |
| 22 | import org.junit.Ignore; |
| 23 | import org.junit.runner.RunWith; |
| 24 | import org.junit.runners.JUnit4; |
| 25 | |
| 26 | @RunWith(JUnit4.class) |
| 27 | public class TestHighlight { // extends LuceneTestCase { |
| 28 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 29 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 30 | public void checkHighlights () throws IOException, QueryException { |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 31 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 32 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 33 | String json = new String("{" + " \"fields\" : [" + " { " |
| 34 | + " \"primaryData\" : \"abc\"" + " }," + " {" |
| 35 | + " \"name\" : \"tokens\"," + " \"data\" : [" |
| 36 | + " [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"]," |
| 37 | + " [ \"s:b\", \"i:b\", \"_1#1-2\" ]," |
| 38 | + " [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + " ]" |
| 39 | + " }" + " ]" + "}"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 40 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 41 | FieldDocument fd = ki.addDoc(json); |
| 42 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 43 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 44 | |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 45 | QueryBuilder kq = new QueryBuilder("tokens"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 46 | Result kr = ki |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 47 | .search((SpanQuery) kq.seq(kq.nr(1, kq.seg("s:b"))).toQuery()); |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 48 | Match km = kr.getMatch(0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 49 | assertEquals(km.getStartPos(), 1); |
| 50 | assertEquals(km.getEndPos(), 2); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 51 | assertEquals(km.getStartPos(1), 1); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 52 | assertEquals(km.getEndPos(1), 2); |
| 53 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 54 | "<span class=\"context-left\">a</span><span class=\"match\"><mark><mark class=\"class-1 level-0\">b</mark></mark></span><span class=\"context-right\">c</span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 55 | km.getSnippetHTML()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 56 | |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 57 | kr = ki.search((SpanQuery) kq.seq(kq.nr(1, kq.seg("s:b"))) |
| 58 | .append(kq.nr(2, kq.seg("s:c"))).toQuery()); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 59 | km = kr.getMatch(0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 60 | assertEquals(km.getStartPos(), 1); |
| 61 | assertEquals(km.getEndPos(), 3); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 62 | assertEquals(km.getStartPos(1), 1); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 63 | assertEquals(km.getEndPos(1), 2); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 64 | assertEquals(km.getStartPos(2), 2); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 65 | assertEquals(km.getEndPos(2), 3); |
| 66 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 67 | "<span class=\"context-left\">a</span><span class=\"match\"><mark><mark class=\"class-1 level-0\">b</mark><mark class=\"class-2 level-0\">c</mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 68 | km.getSnippetHTML()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 69 | |
| Nils Diewald | 8c22178 | 2013-12-13 19:52:58 +0000 | [diff] [blame] | 70 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 71 | kr = ki.search((SpanQuery) kq |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 72 | .seq(kq.nr(1, kq.seq(kq.seg("s:a")).append(kq.seg("s:b")))) |
| 73 | .append(kq.nr(2, kq.seg("s:c"))).toQuery()); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 74 | km = kr.getMatch(0); |
| 75 | assertEquals(km.getStartPos(), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 76 | assertEquals(km.getEndPos(), 3); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 77 | assertEquals(km.getStartPos(1), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 78 | assertEquals(km.getEndPos(1), 2); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 79 | assertEquals(km.getStartPos(2), 2); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 80 | assertEquals(km.getEndPos(2), 3); |
| 81 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 82 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-1 level-0\">ab</mark><mark class=\"class-2 level-0\">c</mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 83 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 84 | |
| 85 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 86 | kr = ki.search( |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 87 | (SpanQuery) kq.nr( |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 88 | 3, kq |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 89 | .seq(kq.nr(1, |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 90 | kq.seq(kq.seg("s:a")) |
| 91 | .append(kq.seg("s:b")))) |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 92 | .append(kq.nr(2, kq.seg("s:c")))) |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 93 | .toQuery()); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 94 | km = kr.getMatch(0); |
| 95 | assertEquals(km.getStartPos(), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 96 | assertEquals(km.getEndPos(), 3); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 97 | assertEquals(km.getStartPos(1), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 98 | assertEquals(km.getEndPos(1), 2); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 99 | assertEquals(km.getStartPos(2), 2); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 100 | assertEquals(km.getEndPos(2), 3); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 101 | assertEquals(km.getStartPos(3), 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 102 | assertEquals(km.getEndPos(3), 3); |
| 103 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 104 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-3 level-0\"><mark class=\"class-1 level-1\">ab</mark><mark class=\"class-2 level-1\">c</mark></mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 105 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 106 | }; |
| 107 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 108 | |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 109 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 110 | public void checkHighlightsManually () throws IOException, QueryException { |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 111 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 112 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 113 | String json = new String("{" + " \"fields\" : [" + " { " |
| 114 | + " \"primaryData\" : \"abc\"" + " }," + " {" |
| 115 | + " \"name\" : \"tokens\"," + " \"data\" : [" |
| 116 | + " [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"]," |
| 117 | + " [ \"s:b\", \"i:b\", \"_1#1-2\" ]," |
| 118 | + " [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + " ]" |
| 119 | + " }" + " ]" + "}"); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 120 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 121 | FieldDocument fd = ki.addDoc(json); |
| 122 | ki.commit(); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 123 | |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 124 | QueryBuilder kq = new QueryBuilder("tokens"); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 125 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 126 | Result kr = ki.search((SpanQuery) kq.seq(kq.seg("s:a")) |
| 127 | .append(kq.seg("s:b")).append(kq.seg("s:c")).toQuery()); |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 128 | Match km = kr.getMatch(0); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 129 | km.addHighlight(0, 1, (short) 7); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 130 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 131 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">ab</mark>c</mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 132 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 133 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 134 | km.addHighlight(1, 2, (short) 6); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 135 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 136 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">a<mark class=\"class-6 level-1\">b</mark></mark><mark class=\"class-6 level-1\">c</mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 137 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 138 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 139 | km.addHighlight(0, 1, (short) 5); |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 140 | assertEquals("[[{5:{7:a{6:b}}}{6:c}]]", km.getSnippetBrackets()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 141 | assertEquals( |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 142 | "<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-5 level-0\"><mark class=\"class-7 level-1\">a<mark class=\"class-6 level-2\">b</mark></mark></mark><mark class=\"class-6 level-2\">c</mark></mark></span><span class=\"context-right\"></span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 143 | km.getSnippetHTML()); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 144 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 145 | }; |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 146 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 147 | |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 148 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 149 | public void highlightMissingBug () throws IOException, QueryException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 150 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 151 | FieldDocument fd = new FieldDocument(); |
| 152 | fd.addString("ID", "doc-1"); |
| 153 | fd.addString("UID", "1"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 154 | fd.addTV("base", "abab", |
| 155 | "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" |
| 156 | + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]"); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 157 | ki.addDoc(fd); |
| 158 | fd = new FieldDocument(); |
| 159 | fd.addString("ID", "doc-2"); |
| 160 | fd.addString("UID", "2"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 161 | fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" |
| 162 | + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]"); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 163 | ki.addDoc(fd); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 164 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 165 | // Commit! |
| 166 | ki.commit(); |
| 167 | fd = new FieldDocument(); |
| 168 | fd.addString("ID", "doc-3"); |
| 169 | fd.addString("UID", "3"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 170 | fd.addTV("base", "abab", |
| 171 | "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" |
| 172 | + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]"); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 173 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 174 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 175 | // Commit! |
| 176 | ki.commit(); |
| 177 | fd = new FieldDocument(); |
| 178 | fd.addString("ID", "doc-4"); |
| 179 | fd.addString("UID", "4"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 180 | fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" |
| 181 | + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]"); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 182 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 183 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 184 | // Commit! |
| 185 | ki.commit(); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 186 | |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 187 | QueryBuilder kq = new QueryBuilder("base"); |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 188 | SpanQuery q = (SpanQuery) kq.or(kq.nr(1, kq.seg("s:a"))) |
| 189 | .or(kq.nr(2, kq.seg("s:b"))).toQuery(); |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 190 | Result kr = ki.search(q); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 191 | assertEquals((long) 14, kr.getTotalResults()); |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 192 | assertEquals("[[{1:a}]]bab", kr.getMatch(0).getSnippetBrackets()); |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 193 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 194 | assertEquals("a[[{2:b}]]ab", kr.getMatch(1).getSnippetBrackets()); |
| 195 | assertEquals("ab[[{1:a}]]b", kr.getMatch(2).getSnippetBrackets()); |
| 196 | assertEquals("aba[[{2:b}]]", kr.getMatch(3).getSnippetBrackets()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 197 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 198 | assertEquals("[[{1:a}]]ba", kr.getMatch(4).getSnippetBrackets()); |
| 199 | assertEquals("a[[{2:b}]]a", kr.getMatch(5).getSnippetBrackets()); |
| 200 | assertEquals("ab[[{1:a}]]", kr.getMatch(6).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 201 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 202 | assertEquals("[[{1:a}]]bab", kr.getMatch(7).getSnippetBrackets()); |
| 203 | assertEquals("a[[{2:b}]]ab", kr.getMatch(8).getSnippetBrackets()); |
| 204 | assertEquals("ab[[{1:a}]]b", kr.getMatch(9).getSnippetBrackets()); |
| 205 | assertEquals("aba[[{2:b}]]", kr.getMatch(10).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 206 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 207 | assertEquals("[[{1:a}]]ba", kr.getMatch(11).getSnippetBrackets()); |
| 208 | assertEquals("a[[{2:b}]]a", kr.getMatch(12).getSnippetBrackets()); |
| 209 | assertEquals("ab[[{1:a}]]", kr.getMatch(13).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 210 | |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 211 | kq = new QueryBuilder("base"); |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 212 | q = (SpanQuery) kq.or(kq.nr(1, kq.seg("i:a"))).or(kq.nr(2, kq.seg("i:c"))) |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 213 | .toQuery(); |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 214 | Krill qs = new Krill(q); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 215 | qs.getMeta().getContext().left.setToken(true).setLength((short) 1); |
| 216 | qs.getMeta().getContext().right.setToken(true).setLength((short) 1); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 217 | kr = ki.search(qs); |
| 218 | assertEquals((long) 10, kr.getTotalResults()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 219 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 220 | assertEquals("[[{1:a}]]b ...", kr.getMatch(0).getSnippetBrackets()); |
| 221 | assertEquals("... b[[{2:a}]]b", kr.getMatch(1).getSnippetBrackets()); |
| 222 | assertEquals("... a[[{1:b}]]", kr.getMatch(2).getSnippetBrackets()); |
| 223 | assertEquals("[[{1:a}]]b ...", kr.getMatch(3).getSnippetBrackets()); |
| 224 | assertEquals("... b[[{2:a}]]", kr.getMatch(4).getSnippetBrackets()); |
| 225 | assertEquals("[[{1:a}]]b ...", kr.getMatch(5).getSnippetBrackets()); |
| 226 | assertEquals("... b[[{2:a}]]b", kr.getMatch(6).getSnippetBrackets()); |
| 227 | assertEquals("... a[[{1:b}]]", kr.getMatch(7).getSnippetBrackets()); |
| 228 | assertEquals("[[{1:a}]]b ...", kr.getMatch(8).getSnippetBrackets()); |
| 229 | assertEquals("... b[[{2:a}]]", kr.getMatch(9).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 230 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 231 | qs.getMeta().getContext().left.setToken(true).setLength((short) 0); |
| 232 | qs.getMeta().getContext().right.setToken(true).setLength((short) 0); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 233 | kr = ki.search(qs); |
| 234 | assertEquals((long) 10, kr.getTotalResults()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 235 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 236 | assertEquals("[[{1:a}]] ...", kr.getMatch(0).getSnippetBrackets()); |
| 237 | assertEquals("... [[{2:a}]] ...", kr.getMatch(1).getSnippetBrackets()); |
| 238 | assertEquals("... [[{1:b}]]", kr.getMatch(2).getSnippetBrackets()); |
| 239 | assertEquals("[[{1:a}]] ...", kr.getMatch(3).getSnippetBrackets()); |
| 240 | assertEquals("... [[{2:a}]]", kr.getMatch(4).getSnippetBrackets()); |
| 241 | assertEquals("[[{1:a}]] ...", kr.getMatch(5).getSnippetBrackets()); |
| 242 | assertEquals("... [[{2:a}]] ...", kr.getMatch(6).getSnippetBrackets()); |
| 243 | assertEquals("... [[{1:b}]]", kr.getMatch(7).getSnippetBrackets()); |
| 244 | assertEquals("[[{1:a}]] ...", kr.getMatch(8).getSnippetBrackets()); |
| 245 | assertEquals("... [[{2:a}]]", kr.getMatch(9).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 246 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 247 | q = (SpanQuery) kq |
| Akron | 4f52a63 | 2018-02-09 19:02:40 +0100 | [diff] [blame] | 248 | .nr(3, kq.or(kq.nr(1, kq.seg("i:a"))).or(kq.nr(2, kq.seg("i:c")))) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 249 | .toQuery(); |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 250 | qs = new Krill(q); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 251 | qs.getMeta().getContext().left.setToken(true).setLength((short) 0); |
| 252 | qs.getMeta().getContext().right.setToken(true).setLength((short) 0); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 253 | kr = ki.search(qs); |
| 254 | assertEquals((long) 10, kr.getTotalResults()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 255 | |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 256 | assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(0).getSnippetBrackets()); |
| 257 | assertEquals("... [[{2:{3:a}}]] ...", |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 258 | kr.getMatch(1).getSnippetBrackets()); |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 259 | assertEquals("... [[{1:{3:b}}]]", kr.getMatch(2).getSnippetBrackets()); |
| 260 | assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(3).getSnippetBrackets()); |
| 261 | assertEquals("... [[{2:{3:a}}]]", kr.getMatch(4).getSnippetBrackets()); |
| 262 | assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(5).getSnippetBrackets()); |
| 263 | assertEquals("... [[{2:{3:a}}]] ...", |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 264 | kr.getMatch(6).getSnippetBrackets()); |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 265 | assertEquals("... [[{1:{3:b}}]]", kr.getMatch(7).getSnippetBrackets()); |
| 266 | assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(8).getSnippetBrackets()); |
| 267 | assertEquals("... [[{2:{3:a}}]]", kr.getMatch(9).getSnippetBrackets()); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 268 | }; |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 269 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 270 | |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 271 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 272 | public void highlightGreaterClassBug () throws IOException, QueryException { |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 273 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 274 | // Construct index |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 275 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 276 | // Indexing test files |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 277 | for (String i : new String[] { "00001", "00002" }) { |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 278 | ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 279 | true); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 280 | }; |
| 281 | ki.commit(); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 282 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 283 | // 15 |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 284 | String json = getJsonString(getClass() |
| 285 | .getResource("/queries/bugs/greater_highlights_15.jsonld") |
| 286 | .getFile()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 287 | |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 288 | Krill ks = new Krill(json); |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 289 | Result kr = ks.apply(ki); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 290 | assertEquals(kr.getSerialQuery(), "{15: tokens:s:Alphabet}"); |
| 291 | assertEquals(kr.getTotalResults(), 7); |
| 292 | assertEquals(kr.getStartIndex(), 0); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 293 | assertEquals(kr.getMatch(0).getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 294 | "... 2. Herkunft Die aus dem proto-semitischen [[{15:Alphabet}]] stammende Urform des Buchstaben ist wahrscheinlich ..."); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 295 | assertEquals(kr.getMatch(0).getSnippetHTML(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 296 | "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark><mark class=\"class-15 level-0\">Alphabet</mark></mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 297 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 298 | json = getJsonString(getClass() |
| 299 | .getResource("/queries/bugs/greater_highlights_16.jsonld") |
| 300 | .getFile()); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 301 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 302 | // 16 |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 303 | ks = new Krill(json); |
| Nils Diewald | 3aa9e69 | 2015-02-20 22:20:11 +0000 | [diff] [blame] | 304 | kr = ks.apply(ki); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 305 | assertEquals(kr.getSerialQuery(), "{16: tokens:s:Alphabet}"); |
| 306 | assertEquals(kr.getTotalResults(), 7); |
| 307 | assertEquals(kr.getStartIndex(), 0); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 308 | assertEquals(kr.getMatch(0).getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 309 | "... 2. Herkunft Die aus dem proto-semitischen [[{16:Alphabet}]] stammende Urform des Buchstaben ist wahrscheinlich ..."); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 310 | assertEquals(kr.getMatch(0).getSnippetHTML(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 311 | "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark><mark class=\"class-16 level-0\">Alphabet</mark></mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 312 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 313 | // 127 |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 314 | json = getJsonString(getClass() |
| 315 | .getResource("/queries/bugs/greater_highlights_127.jsonld") |
| 316 | .getFile()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 317 | |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 318 | ks = new Krill(json); |
| Nils Diewald | 3aa9e69 | 2015-02-20 22:20:11 +0000 | [diff] [blame] | 319 | kr = ks.apply(ki); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 320 | assertEquals(kr.getSerialQuery(), "{127: tokens:s:Alphabet}"); |
| 321 | assertEquals(kr.getTotalResults(), 7); |
| 322 | assertEquals(kr.getStartIndex(), 0); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 323 | assertEquals(kr.getMatch(0).getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 324 | "... 2. Herkunft Die aus dem proto-semitischen [[{127:Alphabet}]] stammende Urform des Buchstaben ist wahrscheinlich ..."); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 325 | assertEquals(kr.getMatch(0).getSnippetHTML(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 326 | "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark><mark class=\"class-127 level-0\">Alphabet</mark></mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 327 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 328 | // 255 |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 329 | json = getJsonString(getClass() |
| 330 | .getResource("/queries/bugs/greater_highlights_255.jsonld") |
| 331 | .getFile()); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 332 | |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 333 | ks = new Krill(json); |
| Nils Diewald | 3aa9e69 | 2015-02-20 22:20:11 +0000 | [diff] [blame] | 334 | kr = ks.apply(ki); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 335 | assertEquals(kr.getSerialQuery(), "{255: tokens:s:Alphabet}"); |
| 336 | assertEquals(kr.getTotalResults(), 7); |
| 337 | assertEquals(kr.getStartIndex(), 0); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 338 | assertEquals(kr.getMatch(0).getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 339 | "... 2. Herkunft Die aus dem proto-semitischen [[Alphabet]] stammende Urform des Buchstaben ist wahrscheinlich ..."); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 340 | assertEquals(kr.getMatch(0).getSnippetHTML(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 341 | "<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><mark>Alphabet</mark></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 342 | |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 343 | // 300 |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 344 | json = getJsonString(getClass() |
| 345 | .getResource("/queries/bugs/greater_highlights_300.jsonld") |
| 346 | .getFile()); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 347 | |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 348 | ks = new Krill(json); |
| Nils Diewald | 3aa9e69 | 2015-02-20 22:20:11 +0000 | [diff] [blame] | 349 | kr = ks.apply(ki); |
| Nils Diewald | c99ed5b | 2015-01-21 22:08:53 +0000 | [diff] [blame] | 350 | assertEquals(709, kr.getError(0).getCode()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 351 | assertEquals("Valid class numbers exceeded", |
| 352 | kr.getError(0).getMessage()); |
| Nils Diewald | c471b18 | 2014-11-19 22:51:15 +0000 | [diff] [blame] | 353 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 354 | assertEquals(kr.getError(0).getMessage(), |
| 355 | "Valid class numbers exceeded"); |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 356 | }; |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 357 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 358 | |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 359 | @Test |
| 360 | public void highlightEscapes () throws IOException, QueryException { |
| 361 | KrillIndex ki = new KrillIndex(); |
| 362 | FieldDocument fd = new FieldDocument(); |
| 363 | fd.addString("ID", "doc-1"); |
| 364 | fd.addString("UID", "1"); |
| 365 | fd.addString("textSigle", "c1/d1/1"); |
| 366 | |
| 367 | // Make this clean for HTML and Brackets! |
| 368 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 369 | fd.addTV("base", "Mit \"Mann\" & {Ma\\us}", |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 370 | "[(0-3)s:Mit|i:mit|_0#0-3|-:t$<i>4|<>:base/t:t$<b>64<i>0<i>20<i>4<b>0]" |
| 371 | + "[(4-10)s:\"Mann\"|i:\"mann\"|base/l:\"Mann\"|_1#4-10]" |
| 372 | + "[(11-12)s:&|i:&|base/l:&|_2#11-12]" |
| 373 | + "[(13-20)s:{Ma\\us}|i:{ma\\us}|_3#13-20]"); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 374 | ki.addDoc(fd); |
| 375 | |
| 376 | // Commit! |
| 377 | ki.commit(); |
| 378 | QueryBuilder kq = new QueryBuilder("base"); |
| 379 | SpanQuery q = (SpanQuery) kq.tag("base/t:t").toQuery(); |
| 380 | |
| 381 | Krill qs = new Krill(q); |
| 382 | qs.getMeta().getContext().left.setToken(true).setLength((short) 0); |
| 383 | qs.getMeta().getContext().right.setToken(true).setLength((short) 0); |
| 384 | |
| 385 | Result kr = ki.search(qs); |
| 386 | assertEquals((long) 1, kr.getTotalResults()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 387 | assertEquals("[[Mit \"Mann\" & \\{Ma\\\\us\\}]]", |
| 388 | kr.getMatch(0).getSnippetBrackets()); |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 389 | assertEquals( |
| 390 | "<span class=\"context-left\"></span><span class=\"match\"><mark>Mit "Mann" & {Ma\\us}</mark></span><span class=\"context-right\"></span>", |
| 391 | kr.getMatch(0).getSnippetHTML()); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 392 | assertEquals("match-c1/d1/1-p0-4", kr.getMatch(0).getID()); |
| 393 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 394 | Match km = ki.getMatchInfo("match-c1/d1/1-p0-4", "base", true, |
| 395 | (ArrayList) null, (ArrayList) null, true, true, false); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 396 | assertEquals(0, km.getStartPos()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 397 | assertEquals( |
| 398 | "<span class=\"context-left\"></span>" |
| 399 | + "<span class=\"match\"><mark><span title=\"base/t:t\">" |
| 400 | + "Mit " + "<span title=\"base/l:"Mann"\">" |
| 401 | + ""Mann"" + "</span>" + " " |
| 402 | + "<span title=\"base/l:&\">&</span>" + " " |
| 403 | + "{Ma\\us}" + "</span>" + "</mark></span>" |
| 404 | + "<span class=\"context-right\"></span>", |
| 405 | km.getSnippetHTML()); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 406 | }; |
| Akron | 8288ad0 | 2016-11-11 19:23:05 +0100 | [diff] [blame] | 407 | |
| 408 | |
| 409 | @Test |
| 410 | public void highlightEmptySpan () throws IOException, QueryException { |
| 411 | |
| 412 | KrillIndex ki = new KrillIndex(); |
| 413 | |
| 414 | // <>:s$<b>65<i>38<b>0 |
| 415 | // <a>x<a>y<a>zhij</a>hij</a>hij</a>hij</a> |
| 416 | FieldDocument fd = new FieldDocument(); |
| 417 | fd.addTV("base", "x y z h i j h i j h i j ", |
| 418 | "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" |
| 419 | + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" |
| 420 | + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6|<>:a$<b>65<i>6]" |
| 421 | + "[(9-12)s:h<b>0]" + "[(12-15)s:i]" + "[(15-18)s:j]" |
| 422 | + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" |
| 423 | + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]"); |
| 424 | ki.addDoc(fd); |
| 425 | |
| 426 | // Commit! |
| 427 | ki.commit(); |
| 428 | QueryBuilder kq = new QueryBuilder("base"); |
| 429 | SpanQuery q = (SpanQuery) kq.tag("a").toQuery(); |
| 430 | |
| 431 | Krill qs = new Krill(q); |
| 432 | qs.getMeta().getContext().left.setToken(true).setLength((short) 5); |
| 433 | qs.getMeta().getContext().right.setToken(true).setLength((short) 5); |
| 434 | |
| 435 | Result kr = ki.search(qs); |
| 436 | assertEquals((long) 4, kr.getTotalResults()); |
| 437 | |
| 438 | Match km = kr.getMatch(2); |
| 439 | assertEquals( |
| 440 | "<span class=\"context-left\">"+ |
| 441 | "</span>"+ |
| 442 | "<span class=\"match\">"+ |
| 443 | "<mark>x y z </mark>"+ |
| 444 | "</span><span class=\"context-right\">h i j h i j h i j </span>", |
| 445 | km.getSnippetHTML()); |
| 446 | |
| 447 | km = kr.getMatch(3); |
| 448 | assertEquals( |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 449 | "<span class=\"context-left\"><span class=\"match\"></span></span>", |
| Akron | 8288ad0 | 2016-11-11 19:23:05 +0100 | [diff] [blame] | 450 | km.getSnippetHTML()); |
| 451 | |
| 452 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 453 | }; |