| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| margaretha | 6cbe371 | 2018-10-23 13:22:49 +0200 | [diff] [blame] | 3 | import static de.ids_mannheim.korap.TestSimple.getJsonString; |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 4 | import static de.ids_mannheim.korap.TestSimple.simpleFieldDoc; |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 5 | import static org.junit.Assert.assertEquals; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 6 | |
| margaretha | 6cbe371 | 2018-10-23 13:22:49 +0200 | [diff] [blame] | 7 | import java.io.IOException; |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 8 | import java.util.Arrays; |
| 9 | import java.util.List; |
| 10 | import java.util.regex.Pattern; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 11 | |
| 12 | import org.apache.lucene.index.Term; |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 13 | import org.apache.lucene.search.RegexpQuery; |
| 14 | import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; |
| Eliza Margaretha | 8e200cd | 2014-11-13 16:00:38 +0000 | [diff] [blame] | 15 | import org.apache.lucene.search.spans.SpanQuery; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 16 | import org.apache.lucene.search.spans.SpanTermQuery; |
| Nils Diewald | 9b11a44 | 2014-11-08 20:47:17 +0000 | [diff] [blame] | 17 | import org.apache.lucene.util.automaton.RegExp; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 18 | import org.junit.Test; |
| Akron | ddbc8f5 | 2018-11-28 11:53:42 +0100 | [diff] [blame] | 19 | import org.junit.Ignore; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 20 | |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 21 | import de.ids_mannheim.korap.Krill; |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 22 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | 0339d46 | 2015-02-26 14:53:56 +0000 | [diff] [blame] | 23 | import de.ids_mannheim.korap.KrillQuery; |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 24 | import de.ids_mannheim.korap.TestSimple; |
| Akron | 747986e | 2016-02-18 17:07:12 +0100 | [diff] [blame] | 25 | import de.ids_mannheim.korap.query.QueryBuilder; |
| Eliza Margaretha | 656cb31 | 2014-08-14 12:42:26 +0000 | [diff] [blame] | 26 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 27 | import de.ids_mannheim.korap.query.SpanExpansionQuery; |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 28 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| Nils Diewald | 9b11a44 | 2014-11-08 20:47:17 +0000 | [diff] [blame] | 29 | import de.ids_mannheim.korap.query.SpanRepetitionQuery; |
| Eliza Margaretha | 8e200cd | 2014-11-13 16:00:38 +0000 | [diff] [blame] | 30 | import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper; |
| margaretha | f151c96 | 2018-11-27 17:38:59 +0100 | [diff] [blame] | 31 | import de.ids_mannheim.korap.response.Match; |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 32 | import de.ids_mannheim.korap.response.Result; |
| Eliza Margaretha | 8e200cd | 2014-11-13 16:00:38 +0000 | [diff] [blame] | 33 | import de.ids_mannheim.korap.util.QueryException; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 34 | |
| 35 | public class TestSpanExpansionIndex { |
| 36 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 37 | Result kr; |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 38 | KrillIndex ki; |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 39 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 40 | public TestSpanExpansionIndex () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 41 | ki = new KrillIndex(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 42 | ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 43 | ki.commit(); |
| 44 | } |
| 45 | |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 46 | /** Method for finding bugs. Since java matcher cannot find multiple matches |
| 47 | * from the same offset, the expected results are sometimes lower than the |
| 48 | * actual results. |
| 49 | * |
| 50 | * @throws IOException |
| 51 | * @throws QueryException |
| 52 | */ |
| margaretha | 327f2b7 | 2018-11-27 14:10:24 +0100 | [diff] [blame] | 53 | // @Test |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 54 | public void fuzzyTest () throws IOException, QueryException { |
| 55 | List<String> chars = Arrays.asList("a", "b", "c", "d", "e"); |
| 56 | |
| 57 | // c []{0,2} a |
| 58 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c")); |
| 59 | SpanTermQuery stq2 = new SpanTermQuery(new Term("base", "s:a")); |
| 60 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, 0, true); |
| 61 | SpanNextQuery snq = new SpanNextQuery(seq, stq2); |
| 62 | |
| 63 | Pattern resultPattern = Pattern.compile("c[a-e]{0,2}a"); |
| 64 | TestSimple.fuzzingTest(chars, resultPattern, snq, |
| margaretha | 327f2b7 | 2018-11-27 14:10:24 +0100 | [diff] [blame] | 65 | 6, 20, 8); |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 66 | } |
| 67 | |
| 68 | @Test |
| 69 | public void testNoExpansion () throws IOException { |
| 70 | KrillIndex ki = new KrillIndex(); |
| 71 | ki.addDoc(simpleFieldDoc("cc")); |
| 72 | ki.commit(); |
| 73 | |
| 74 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c")); |
| 75 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 0, 0, true); |
| 76 | Result kr = ki.search(seq, (short) 10); |
| 77 | |
| 78 | assertEquals(2, kr.getTotalResults()); |
| 79 | } |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 80 | |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 81 | /** |
| 82 | * Left and right expansions |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 83 | */ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 84 | @Test |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 85 | public void testLeftRightExpansions () throws IOException { |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 86 | |
| 87 | SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:des")); |
| 88 | // left |
| 89 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1, true); |
| 90 | kr = ki.search(seq, (short) 10); |
| 91 | |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 92 | // assertEquals(69,kr.getTotalResults()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 93 | assertEquals(5, kr.getMatch(0).getStartPos()); |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 94 | assertEquals(8, kr.getMatch(0).getEndPos()); |
| 95 | assertEquals(6, kr.getMatch(1).getStartPos()); |
| 96 | assertEquals(8, kr.getMatch(1).getEndPos()); |
| 97 | assertEquals(7, kr.getMatch(2).getStartPos()); |
| 98 | assertEquals(8, kr.getMatch(2).getEndPos()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 99 | |
| Eliza Margaretha | 4423a92 | 2014-09-17 10:44:01 +0000 | [diff] [blame] | 100 | // right |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 101 | seq = new SpanExpansionQuery(stq, 3, 4, 0, true); |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 102 | kr = ki.search(seq, (short) 10); |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 103 | |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 104 | assertEquals(7, kr.getMatch(0).getStartPos()); |
| 105 | assertEquals(11, kr.getMatch(0).getEndPos()); |
| 106 | assertEquals(7, kr.getMatch(1).getStartPos()); |
| 107 | assertEquals(12, kr.getMatch(1).getEndPos()); |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 108 | assertEquals(156, kr.getMatch(2).getStartPos()); |
| 109 | assertEquals(160, kr.getMatch(2).getEndPos()); |
| 110 | assertEquals(156, kr.getMatch(3).getStartPos()); |
| 111 | assertEquals(161, kr.getMatch(3).getEndPos()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 112 | } |
| 113 | |
| 114 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 115 | * Classnumber |
| Eliza Margaretha | 2dcde4f | 2015-02-10 12:02:18 +0000 | [diff] [blame] | 116 | * Check the expansion offsets |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 117 | */ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 118 | @Test |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 119 | public void testExpansionWithClassNumber () { |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 120 | byte classNumber = 1; |
| 121 | SpanExpansionQuery sq; |
| 122 | // create new payload for the expansion offsets |
| 123 | SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:des")); |
| 124 | sq = new SpanExpansionQuery(stq, 0, 2, -1, classNumber, true); |
| 125 | kr = ki.search(sq, (short) 10); |
| 126 | |
| 127 | assertEquals(5, kr.getMatch(0).getStartPos()); |
| 128 | assertEquals(8, kr.getMatch(0).getEndPos()); |
| 129 | assertEquals(5, kr.getMatch(0).getStartPos(1)); // expansion 5,7 |
| 130 | assertEquals(7, kr.getMatch(0).getEndPos(1)); |
| 131 | // expansion offsets |
| Eliza Margaretha | ad05335 | 2014-09-17 16:21:23 +0000 | [diff] [blame] | 132 | assertEquals(6, kr.getMatch(1).getStartPos(1)); |
| 133 | assertEquals(7, kr.getMatch(1).getEndPos(1)); |
| 134 | assertEquals(7, kr.getMatch(2).getStartPos(1)); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 135 | assertEquals(7, kr.getMatch(2).getEndPos(1)); |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 136 | assertEquals(154, kr.getMatch(3).getStartPos(1)); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 137 | assertEquals(156, kr.getMatch(3).getEndPos(1)); |
| 138 | |
| 139 | /* |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 140 | * for (Match km : kr.getMatches()){ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 141 | * System.out.println(km.getStartPos() +","+km.getEndPos()+" " |
| 142 | * +km.getSnippetBrackets()); } |
| 143 | */ |
| 144 | |
| 145 | // add expansion offsets to the existing payload |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 146 | SpanElementQuery seq = new SpanElementQuery("tokens", "base/s:s"); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 147 | sq = new SpanExpansionQuery(seq, 1, 2, 0, classNumber, true); |
| 148 | kr = ki.search(sq, (short) 10); |
| 149 | |
| 150 | assertEquals(13, kr.getMatch(0).getStartPos()); |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 151 | assertEquals(26, kr.getMatch(0).getEndPos()); |
| 152 | assertEquals(13, kr.getMatch(1).getStartPos()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 153 | assertEquals(27, kr.getMatch(1).getEndPos()); |
| 154 | |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 155 | assertEquals(25, kr.getMatch(2).getStartPos()); |
| 156 | assertEquals(35, kr.getMatch(2).getEndPos()); |
| 157 | assertEquals(34, kr.getMatch(2).getStartPos(1)); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 158 | assertEquals(35, kr.getMatch(2).getEndPos(1)); |
| 159 | |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 160 | assertEquals(25, kr.getMatch(3).getStartPos()); |
| 161 | assertEquals(36, kr.getMatch(3).getEndPos()); |
| 162 | assertEquals(34, kr.getMatch(3).getStartPos(1)); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 163 | assertEquals(36, kr.getMatch(3).getEndPos(1)); |
| 164 | |
| 165 | /* |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 166 | * for (Match km : kr.getMatches()){ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 167 | * System.out.println(km.getStartPos() +","+km.getEndPos()+" " |
| 168 | * +km.getSnippetBrackets()); } |
| 169 | */ |
| 170 | } |
| 171 | |
| 172 | /** |
| 173 | * Right expansion with exclusion |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 174 | */ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 175 | @Test |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 176 | public void testRightExpansionWithExclusion () throws IOException { |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 177 | byte classNumber = 1; |
| 178 | SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "tt/p:NN")); |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 179 | SpanTermQuery notQuery = |
| 180 | new SpanTermQuery(new Term("tokens", "s:Buchstabe")); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 181 | |
| 182 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 2, 3, 0, |
| 183 | classNumber, true); |
| 184 | kr = ki.search(seq, (short) 20); |
| 185 | |
| 186 | assertEquals(6, kr.getMatch(0).getStartPos()); |
| 187 | assertEquals(9, kr.getMatch(0).getEndPos()); |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 188 | assertEquals(7, kr.getMatch(0).getStartPos(1)); |
| 189 | assertEquals(9, kr.getMatch(0).getEndPos(1)); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 190 | |
| 191 | assertEquals(9, kr.getMatch(2).getStartPos()); |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 192 | assertEquals(12, kr.getMatch(2).getEndPos()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 193 | |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 194 | assertEquals(9, kr.getMatch(3).getStartPos()); |
| 195 | assertEquals(13, kr.getMatch(3).getEndPos()); |
| 196 | assertEquals(10, kr.getMatch(3).getStartPos(1)); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 197 | assertEquals(13, kr.getMatch(3).getEndPos(1)); |
| Nils Diewald | 5380aa6 | 2014-09-01 13:21:07 +0000 | [diff] [blame] | 198 | |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 199 | /* |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 200 | * for (Match km : kr.getMatches()){ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 201 | * System.out.println(km.getStartPos() +","+km.getEndPos()+" " |
| 202 | * +km.getSnippetBrackets()); } |
| 203 | */ |
| 204 | } |
| 205 | |
| 206 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 207 | * Left expansion with exclusion |
| Eliza Margaretha | 2dcde4f | 2015-02-10 12:02:18 +0000 | [diff] [blame] | 208 | * No expansion |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 209 | */ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 210 | @Test |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 211 | public void testLeftExpansionWithExclusion () throws IOException { |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 212 | byte classNumber = 1; |
| 213 | SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "tt/p:NN")); |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 214 | SpanTermQuery notQuery = |
| 215 | new SpanTermQuery(new Term("tokens", "tt/p:ADJA")); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 216 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 217 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 0, 2, -1, |
| 218 | classNumber, true); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 219 | kr = ki.search(seq, (short) 10); |
| 220 | |
| 221 | assertEquals(6, kr.getMatch(0).getStartPos()); |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 222 | assertEquals(7, kr.getMatch(0).getEndPos()); |
| 223 | assertEquals(6, kr.getMatch(0).getStartPos(1)); |
| 224 | assertEquals(6, kr.getMatch(0).getEndPos(1)); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 225 | |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 226 | assertEquals(12, kr.getMatch(4).getStartPos()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 227 | assertEquals(13, kr.getMatch(4).getEndPos()); |
| 228 | |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 229 | assertEquals(12, kr.getMatch(5).getStartPos()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 230 | assertEquals(15, kr.getMatch(5).getEndPos()); |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 231 | assertEquals(12, kr.getMatch(5).getStartPos(1)); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 232 | assertEquals(14, kr.getMatch(5).getEndPos(1)); |
| 233 | |
| Eliza Margaretha | 8578784 | 2014-09-30 17:42:09 +0000 | [diff] [blame] | 234 | assertEquals(13, kr.getMatch(6).getStartPos()); |
| 235 | assertEquals(15, kr.getMatch(6).getEndPos()); |
| 236 | assertEquals(13, kr.getMatch(6).getStartPos(1)); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 237 | assertEquals(14, kr.getMatch(6).getEndPos(1)); |
| 238 | |
| 239 | /* |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 240 | * for (Match km : kr.getMatches()){ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 241 | * System.out.println(km.getStartPos() +","+km.getEndPos()+" " |
| 242 | * +km.getSnippetBrackets()); } |
| 243 | */ |
| 244 | |
| 245 | } |
| 246 | |
| 247 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 248 | * Expansion over start and end documents start => cut to 0 |
| Eliza Margaretha | 2dcde4f | 2015-02-10 12:02:18 +0000 | [diff] [blame] | 249 | * TODO: end => to be handled in rendering process |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 250 | * |
| 251 | * @throws IOException |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 252 | */ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 253 | @Test |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 254 | public void testExpansionOverStart () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 255 | KrillIndex ki = new KrillIndex(); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 256 | ki.addDoc(createFieldDoc0()); |
| 257 | ki.commit(); |
| 258 | |
| 259 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:e")); |
| 260 | // left expansion precedes 0 |
| 261 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, 2, 2, -1, true); |
| 262 | kr = ki.search(seq, (short) 10); |
| 263 | |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 264 | assertEquals((long) 3, kr.getTotalResults()); |
| 265 | assertEquals(2, kr.getMatch(0).getStartPos()); |
| 266 | assertEquals(5, kr.getMatch(0).getEndPos()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 267 | |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 268 | // right expansion exceeds end position |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 269 | seq = new SpanExpansionQuery(stq, 3, 3, 0, true); |
| 270 | kr = ki.search(seq, (short) 10); |
| 271 | |
| 272 | assertEquals((long) 4, kr.getTotalResults()); |
| 273 | assertEquals(7, kr.getMatch(2).getStartPos()); |
| Eliza Margaretha | 39662de | 2014-09-17 14:33:50 +0000 | [diff] [blame] | 274 | assertEquals(11, kr.getMatch(2).getEndPos()); |
| 275 | assertEquals(8, kr.getMatch(3).getStartPos()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 276 | assertEquals(12, kr.getMatch(3).getEndPos()); |
| 277 | |
| 278 | /* |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 279 | * for (Match km : kr.getMatches()){ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 280 | * System.out.println(km.getStartPos() +","+km.getEndPos()+" " |
| 281 | * //+km.getSnippetBrackets() ); } |
| 282 | */ |
| 283 | } |
| 284 | |
| 285 | /** |
| 286 | * Expansion exclusion : multiple documents |
| 287 | * |
| 288 | * @throws IOException |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 289 | */ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 290 | @Test |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 291 | public void testExclusionWithMultipleDocs () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 292 | KrillIndex ki = new KrillIndex(); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 293 | ki.addDoc(createFieldDoc0()); // same doc |
| 294 | ki.addDoc(createFieldDoc1()); // only not clause |
| 295 | ki.addDoc(createFieldDoc2()); // only main clause |
| 296 | ki.commit(); |
| 297 | |
| 298 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:e")); |
| 299 | SpanTermQuery notQuery = new SpanTermQuery(new Term("base", "s:d")); |
| 300 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 301 | SpanExpansionQuery seq = |
| 302 | new SpanExpansionQuery(stq, notQuery, 2, 3, 0, true); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 303 | kr = ki.search(seq, (short) 20); |
| 304 | |
| 305 | // notClause.doc() > firstSpans.doc() |
| 306 | assertEquals(7, kr.getMatch(0).getStartPos()); |
| 307 | assertEquals(10, kr.getMatch(0).getEndPos()); |
| Eliza Margaretha | 39662de | 2014-09-17 14:33:50 +0000 | [diff] [blame] | 308 | assertEquals(7, kr.getMatch(1).getStartPos()); |
| 309 | assertEquals(11, kr.getMatch(1).getEndPos()); |
| 310 | // !hasMoreNotClause |
| 311 | assertEquals(2, kr.getMatch(4).getLocalDocID()); |
| 312 | assertEquals(1, kr.getMatch(4).getStartPos()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 313 | assertEquals(4, kr.getMatch(4).getEndPos()); |
| 314 | } |
| 315 | |
| 316 | /** |
| 317 | * Skip to |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 318 | */ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 319 | @Test |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 320 | public void testExpansionWithSkipTo () throws IOException, QueryException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 321 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 322 | ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true); |
| 323 | ki.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"), true); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 324 | ki.commit(); |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 325 | String jsonPath = |
| 326 | getClass().getResource("/queries/poly3.json").getFile(); |
| Akron | 67d2ff0 | 2018-06-19 10:51:16 +0200 | [diff] [blame] | 327 | String jsonQuery = getJsonString(jsonPath); |
| Akron | 850b46e | 2016-06-08 10:08:55 +0200 | [diff] [blame] | 328 | SpanQueryWrapper sqwi = new KrillQuery("tokens").fromKoral(jsonQuery); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 329 | |
| 330 | SpanQuery sq = sqwi.toQuery(); |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 331 | // System.out.println(sq.toString()); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 332 | kr = ki.search(sq, (short) 20); |
| 333 | |
| 334 | assertEquals(205, kr.getMatch(0).getStartPos()); |
| Eliza Margaretha | 8e200cd | 2014-11-13 16:00:38 +0000 | [diff] [blame] | 335 | assertEquals(208, kr.getMatch(0).getEndPos()); |
| Nils Diewald | 9b11a44 | 2014-11-08 20:47:17 +0000 | [diff] [blame] | 336 | |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 337 | /* |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 338 | * for (Match km : kr.getMatches()){ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 339 | * System.out.println(km.getStartPos() +","+km.getEndPos()+" " |
| 340 | * +km.getSnippetBrackets() ); } |
| 341 | */ |
| 342 | } |
| Nils Diewald | 9b11a44 | 2014-11-08 20:47:17 +0000 | [diff] [blame] | 343 | |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 344 | /** |
| Nils Diewald | 9b11a44 | 2014-11-08 20:47:17 +0000 | [diff] [blame] | 345 | * Query rewrite bug |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 346 | * |
| Akron | 30c4606 | 2016-04-22 14:24:37 +0200 | [diff] [blame] | 347 | * Warning: This is not armoured by <base/s=t>! |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 348 | * |
| 349 | * @throws IOException |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 350 | */ |
| Nils Diewald | 9b11a44 | 2014-11-08 20:47:17 +0000 | [diff] [blame] | 351 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 352 | public void testQueryRewriteBug () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 353 | KrillIndex ki = new KrillIndex(); |
| Akron | 30c4606 | 2016-04-22 14:24:37 +0200 | [diff] [blame] | 354 | ki.addDoc(createFieldDoc0()); // ceccecdeec |
| 355 | /* |
| 356 | ki.addDoc(createFieldDoc1()); // bbccdd || only not clause |
| 357 | ki.addDoc(createFieldDoc2()); // beccea | only main clause |
| 358 | */ |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 359 | ki.commit(); |
| 360 | |
| 361 | // See /queries/bugs/repetition_group_rewrite |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 362 | RegexpQuery requery = |
| 363 | new RegexpQuery(new Term("base", "s:[ac]"), RegExp.ALL); |
| 364 | SpanMultiTermQueryWrapper<RegexpQuery> query = |
| 365 | new SpanMultiTermQueryWrapper<RegexpQuery>(requery); |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 366 | SpanExpansionQuery seq = new SpanExpansionQuery(query, 1, 1, 1, true); |
| 367 | SpanRepetitionQuery rep = new SpanRepetitionQuery(seq, 2, 2, true); |
| 368 | |
| Akron | 30c4606 | 2016-04-22 14:24:37 +0200 | [diff] [blame] | 369 | // spanRepetition( |
| 370 | // spanExpansion( |
| 371 | // SpanMultiTermQueryWrapper(base:/s:[ac]/), |
| 372 | // []{1, 1}, |
| 373 | // right |
| 374 | // ){2,2} |
| 375 | // ) |
| 376 | |
| margaretha | 4cfc89e | 2016-04-25 18:01:14 +0200 | [diff] [blame] | 377 | kr = ki.search(query, (short) 20); |
| 378 | assertEquals(5, kr.getTotalResults()); |
| 379 | assertEquals(0, kr.getMatch(0).getStartPos()); |
| 380 | assertEquals(1, kr.getMatch(0).getEndPos()); |
| 381 | assertEquals(2, kr.getMatch(1).getStartPos()); |
| 382 | assertEquals(3, kr.getMatch(1).getEndPos()); |
| 383 | assertEquals(3, kr.getMatch(2).getStartPos()); |
| 384 | assertEquals(4, kr.getMatch(2).getEndPos()); |
| 385 | assertEquals(5, kr.getMatch(3).getStartPos()); |
| 386 | assertEquals(6, kr.getMatch(3).getEndPos()); |
| 387 | assertEquals(9, kr.getMatch(4).getStartPos()); |
| 388 | assertEquals(10, kr.getMatch(4).getEndPos()); |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 389 | |
| margaretha | 4cfc89e | 2016-04-25 18:01:14 +0200 | [diff] [blame] | 390 | kr = ki.search(seq, (short) 20); |
| 391 | assertEquals(5, kr.getTotalResults()); |
| 392 | assertEquals(0, kr.getMatch(0).getStartPos()); |
| 393 | assertEquals(2, kr.getMatch(0).getEndPos()); |
| 394 | assertEquals(2, kr.getMatch(1).getStartPos()); |
| 395 | assertEquals(4, kr.getMatch(1).getEndPos()); |
| 396 | assertEquals(3, kr.getMatch(2).getStartPos()); |
| 397 | assertEquals(5, kr.getMatch(2).getEndPos()); |
| 398 | assertEquals(5, kr.getMatch(3).getStartPos()); |
| 399 | assertEquals(7, kr.getMatch(3).getEndPos()); |
| 400 | assertEquals(9, kr.getMatch(4).getStartPos()); |
| 401 | assertEquals(11, kr.getMatch(4).getEndPos()); |
| 402 | |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 403 | kr = ki.search(rep, (short) 20); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 404 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 405 | assertEquals("[[cecc]]ecdeec", kr.getMatch(0).getSnippetBrackets()); |
| 406 | assertEquals("cec[[cecd]]eec", kr.getMatch(1).getSnippetBrackets()); |
| Akron | 30c4606 | 2016-04-22 14:24:37 +0200 | [diff] [blame] | 407 | assertEquals((long) 2, kr.getTotalResults()); |
| Eliza Margaretha | 8e200cd | 2014-11-13 16:00:38 +0000 | [diff] [blame] | 408 | } |
| Nils Diewald | 9b11a44 | 2014-11-08 20:47:17 +0000 | [diff] [blame] | 409 | |
| Akron | 747986e | 2016-02-18 17:07:12 +0100 | [diff] [blame] | 410 | /** |
| 411 | * Query rewrite bug |
| 412 | * |
| 413 | * @throws IOException |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 414 | */ |
| Akron | 747986e | 2016-02-18 17:07:12 +0100 | [diff] [blame] | 415 | @Test |
| 416 | public void testExpansionQueryBug3 () throws IOException, QueryException { |
| 417 | KrillIndex ki = new KrillIndex(); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 418 | ki.addDoc(createFieldDoc3()); |
| 419 | ki.addDoc(createFieldDoc4()); |
| Akron | 747986e | 2016-02-18 17:07:12 +0100 | [diff] [blame] | 420 | ki.commit(); |
| Akron | 67d2ff0 | 2018-06-19 10:51:16 +0200 | [diff] [blame] | 421 | String jsonPath = getClass() |
| 422 | .getResource("/queries/bugs/expansion_bug_3.jsonld").getFile(); |
| 423 | String json = getJsonString(jsonPath); |
| Akron | 747986e | 2016-02-18 17:07:12 +0100 | [diff] [blame] | 424 | KrillQuery kq = new KrillQuery("base"); |
| Akron | 850b46e | 2016-06-08 10:08:55 +0200 | [diff] [blame] | 425 | SpanQuery sq = kq.fromKoral(json).toQuery(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 426 | assertEquals(sq.toString(), |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 427 | "focus(254: spanContain(<base:base/s:t />, {254: spanExpansion(base:s:c, []{0, 4}, right)}))"); |
| Akron | 747986e | 2016-02-18 17:07:12 +0100 | [diff] [blame] | 428 | |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 429 | kr = ki.search(sq, (short) 10); |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 430 | assertEquals("[[c]]ab", kr.getMatch(0).getSnippetBrackets()); |
| 431 | assertEquals("[[ca]]b", kr.getMatch(1).getSnippetBrackets()); |
| 432 | assertEquals("[[cab]]", kr.getMatch(2).getSnippetBrackets()); |
| 433 | assertEquals("[[c]]e", kr.getMatch(3).getSnippetBrackets()); |
| Akron | 63cd32f | 2016-04-21 17:56:06 +0200 | [diff] [blame] | 434 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 435 | assertEquals("[[ce]]", kr.getMatch(4).getSnippetBrackets()); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 436 | assertEquals(5, kr.getTotalResults()); |
| Akron | 63cd32f | 2016-04-21 17:56:06 +0200 | [diff] [blame] | 437 | |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 438 | sq = kq.builder().tag("base/s:t").toQuery(); |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 439 | assertEquals(sq.toString(), "<base:base/s:t />"); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 440 | kr = ki.search(sq, (short) 5); |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 441 | assertEquals("[[cab]]", kr.getMatch(0).getSnippetBrackets()); |
| 442 | assertEquals("[[ce]]", kr.getMatch(1).getSnippetBrackets()); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 443 | assertEquals(2, kr.getTotalResults()); |
| Akron | 747986e | 2016-02-18 17:07:12 +0100 | [diff] [blame] | 444 | } |
| 445 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 446 | @Test |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 447 | public void indexRegexSequence () throws Exception { |
| 448 | KrillIndex ki = new KrillIndex(); |
| 449 | ki.addDoc(createFieldDoc5()); |
| 450 | ki.commit(); |
| 451 | |
| 452 | QueryBuilder kq = new QueryBuilder("base"); |
| 453 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 454 | SpanQueryWrapper sq = kq.seq(kq.or("s:baumgarten", "s:steingarten")) |
| 455 | .append(kq.seg().without(kq.or("s:franz", "s:hans"))); |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 456 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 457 | // Expected to find [baumgarten steingarten] |
| 458 | Krill ks = _newKrill(sq); |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 459 | Result kr = ki.search(ks); |
| 460 | |
| 461 | assertEquals((long) 1, kr.getTotalResults()); |
| 462 | |
| 463 | assertEquals("... baum [[baumgarten steingarten]] franz ...", |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 464 | kr.getMatch(0).getSnippetBrackets()); |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 465 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 466 | // The same result should be shown for: |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 467 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 468 | sq = kq.seq(kq.re("s:.*garten")) |
| 469 | .append(kq.seg().without(kq.re("s:.*an.*"))); |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 470 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 471 | ks = _newKrill(sq); |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 472 | kr = ki.search(ks); |
| 473 | |
| 474 | assertEquals((long) 1, kr.getTotalResults()); |
| 475 | |
| 476 | assertEquals("... baum [[baumgarten steingarten]] franz ...", |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 477 | kr.getMatch(0).getSnippetBrackets()); |
| 478 | }; |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 479 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 480 | @Test |
| 481 | public void testBugRegexExpandLeftNoMoreSpan () throws IOException { |
| 482 | KrillIndex ki = new KrillIndex(); |
| 483 | ki.addDoc(createFieldDoc6()); |
| 484 | ki.commit(); |
| 485 | |
| 486 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:a")); |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 487 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 488 | RegexpQuery requery = |
| 489 | new RegexpQuery(new Term("base", "s:[bc]"), RegExp.ALL); |
| 490 | SpanMultiTermQueryWrapper<RegexpQuery> notQuery = |
| 491 | new SpanMultiTermQueryWrapper<RegexpQuery>(requery); |
| 492 | |
| 493 | byte classNumber = 1; |
| 494 | // left expansion |
| 495 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 0, 1, -1, |
| 496 | classNumber, true); |
| 497 | |
| 498 | kr = ki.search(seq, (short) 20); |
| 499 | |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 500 | assertEquals(9, kr.getMatches().size()); |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 501 | |
| margaretha | 7f4fd65 | 2018-11-22 18:00:02 +0100 | [diff] [blame] | 502 | } |
| Akron | a59252d | 2018-10-10 19:18:42 +0200 | [diff] [blame] | 503 | |
| 504 | @Test |
| 505 | public void indexExpansionWithNegationDifferentFragments () throws Exception { |
| 506 | KrillIndex ki = new KrillIndex(); |
| 507 | |
| 508 | // Add to the index in a single fragment |
| 509 | FieldDocument fd = new FieldDocument(); |
| 510 | fd.addTV("base", |
| 511 | "a B c", |
| 512 | "[(0-1)s:a|i:a|_1$<i>0<i>1]" |
| 513 | + "[(1-2)s:B|i:b|_2$<i>1<i>2|]" |
| 514 | + "[(2-3)s:c|i:c|_3$<i>2<i>3]"); |
| 515 | ki.addDoc(fd); |
| 516 | ki.commit(); |
| 517 | fd.addTV("base", |
| 518 | "a b c", |
| 519 | "[(0-1)s:a|i:a|_1$<i>0<i>1]" |
| 520 | + "[(1-2)s:b|i:b|_2$<i>1<i>2|]" |
| 521 | + "[(2-3)s:c|i:c|_3$<i>2<i>3]"); |
| 522 | ki.addDoc(fd); |
| 523 | ki.commit(); |
| 524 | |
| 525 | QueryBuilder kq = new QueryBuilder("base"); |
| 526 | SpanQuery sq = kq.seq(kq.seg("s:a")).append(kq.seg().without("s:B")).append(kq.seg("s:c")).toQuery(); |
| 527 | assertEquals("spanNext(base:s:a, spanExpansion(base:s:c, !base:s:B{1, 1}, left))", sq.toString()); |
| 528 | Krill ks = new Krill(sq); |
| 529 | ks.getMeta().getContext().left.setToken(true).setLength(0); |
| 530 | ks.getMeta().getContext().right.setToken(true).setLength(0); |
| 531 | |
| 532 | Result kr = ki.search(ks); |
| 533 | assertEquals((long) 1, kr.getTotalResults()); |
| 534 | }; |
| 535 | |
| 536 | @Test |
| 537 | public void indexExpansionWithNegationSameFragmentBug () throws Exception { |
| 538 | KrillIndex ki = new KrillIndex(); |
| 539 | |
| 540 | // Add to the index in a single fragment |
| 541 | FieldDocument fd = new FieldDocument(); |
| 542 | fd.addTV("base", |
| 543 | "a B c", |
| 544 | "[(0-1)s:a|i:a|_1$<i>0<i>1]" |
| 545 | + "[(1-2)s:B|i:b|_2$<i>1<i>2|]" |
| 546 | + "[(2-3)s:c|i:c|_3$<i>2<i>3]"); |
| 547 | ki.addDoc(fd); |
| 548 | fd.addTV("base", |
| 549 | "a b c", |
| 550 | "[(0-1)s:a|i:a|_1$<i>0<i>1]" |
| 551 | + "[(1-2)s:b|i:b|_2$<i>1<i>2|]" |
| 552 | + "[(2-3)s:c|i:c|_3$<i>2<i>3]"); |
| 553 | ki.addDoc(fd); |
| 554 | ki.commit(); |
| 555 | |
| 556 | QueryBuilder kq = new QueryBuilder("base"); |
| 557 | SpanQuery sq = kq.seq(kq.seg("s:a")).append(kq.seg().without("s:B")).append(kq.seg("s:c")).toQuery(); |
| 558 | assertEquals("spanNext(base:s:a, spanExpansion(base:s:c, !base:s:B{1, 1}, left))", sq.toString()); |
| 559 | Krill ks = new Krill(sq); |
| 560 | ks.getMeta().getContext().left.setToken(true).setLength(0); |
| 561 | ks.getMeta().getContext().right.setToken(true).setLength(0); |
| 562 | |
| 563 | Result kr = ki.search(ks); |
| 564 | assertEquals((long) 1, kr.getTotalResults()); |
| 565 | }; |
| 566 | |
| Akron | 4204734 | 2018-11-27 15:15:38 +0100 | [diff] [blame] | 567 | |
| 568 | @Test |
| Akron | 4204734 | 2018-11-27 15:15:38 +0100 | [diff] [blame] | 569 | public void indexExpansionLeftWithWrongSorting () throws IOException { |
| 570 | KrillIndex ki = new KrillIndex(); |
| 571 | ki.addDoc(simpleFieldDoc("abcc")); |
| 572 | ki.commit(); |
| 573 | |
| 574 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c")); |
| 575 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1, true); |
| 576 | assertEquals("spanExpansion(base:s:c, []{0, 2}, left)", seq.toString()); |
| 577 | Result kr = ki.search(seq, (short) 10); |
| 578 | |
| margaretha | f151c96 | 2018-11-27 17:38:59 +0100 | [diff] [blame] | 579 | assertEquals("a[[bc]]c", kr.getMatch(1).getSnippetBrackets()); |
| 580 | assertEquals(1, kr.getMatch(1).getStartPos()); |
| 581 | assertEquals(3, kr.getMatch(1).getEndPos()); |
| 582 | assertEquals("a[[bcc]]", kr.getMatch(2).getSnippetBrackets()); |
| 583 | assertEquals(1, kr.getMatch(2).getStartPos()); |
| 584 | assertEquals(4, kr.getMatch(2).getEndPos()); |
| Akron | 4204734 | 2018-11-27 15:15:38 +0100 | [diff] [blame] | 585 | assertEquals(6, kr.getTotalResults()); |
| 586 | } |
| 587 | |
| Akron | ddbc8f5 | 2018-11-28 11:53:42 +0100 | [diff] [blame] | 588 | @Test |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 589 | public void indexExpansionMultipleStartsWithCorrectSorting () throws IOException { |
| Akron | ddbc8f5 | 2018-11-28 11:53:42 +0100 | [diff] [blame] | 590 | KrillIndex ki = new KrillIndex(); |
| 591 | ki.addDoc(simpleFieldDoc("abccef")); |
| 592 | ki.commit(); |
| 593 | |
| 594 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c")); |
| 595 | SpanExpansionQuery seqL = new SpanExpansionQuery(stq, 0, 2, -1, true); |
| 596 | SpanExpansionQuery seqR = new SpanExpansionQuery(seqL, 0, 1, 0, true); |
| 597 | assertEquals( |
| 598 | "spanExpansion(spanExpansion(base:s:c, []{0, 2}, left), []{0, 1}, right)", |
| 599 | seqR.toString()); |
| 600 | Result kr = ki.search(seqR, (short) 20); |
| 601 | |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 602 | // for (Match km : kr.getMatches()) { |
| 603 | // System.out.println(km.getStartPos() + "," + km.getEndPos() + " " + |
| 604 | // km.getSnippetBrackets()); |
| 605 | // }; |
| Akron | ddbc8f5 | 2018-11-28 11:53:42 +0100 | [diff] [blame] | 606 | |
| 607 | // TODO: These are duplicate results that may be restricted with a wrapper |
| 608 | assertEquals("a[[bcc]]ef", kr.getMatch(3).getSnippetBrackets()); |
| 609 | assertEquals("a[[bcc]]ef", kr.getMatch(4).getSnippetBrackets()); |
| 610 | assertEquals(12, kr.getTotalResults()); |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 611 | } |
| Akron | ddbc8f5 | 2018-11-28 11:53:42 +0100 | [diff] [blame] | 612 | |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 613 | @Test |
| 614 | public void testRightExpansionWithWrongSorting () |
| 615 | throws IOException { |
| 616 | KrillIndex ki = new KrillIndex(); |
| 617 | ki.addDoc(simpleFieldDoc("abccef")); |
| 618 | ki.commit(); |
| 619 | |
| 620 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c")); |
| 621 | SpanExpansionQuery seqL = new SpanExpansionQuery(stq, 0, 2, -1, true); |
| 622 | kr = ki.search(seqL, (short) 20); |
| 623 | // for (Match km : kr.getMatches()) { |
| 624 | // System.out.println(km.getStartPos() + "," + km.getEndPos() + " " + |
| 625 | // km.getSnippetBrackets()); |
| 626 | // }; |
| 627 | |
| 628 | SpanExpansionQuery seqR = new SpanExpansionQuery(seqL, 0, 2, 0, true); |
| Akron | ddbc8f5 | 2018-11-28 11:53:42 +0100 | [diff] [blame] | 629 | assertEquals( |
| 630 | "spanExpansion(spanExpansion(base:s:c, []{0, 2}, left), []{0, 2}, right)", |
| 631 | seqR.toString()); |
| 632 | kr = ki.search(seqR, (short) 20); |
| 633 | |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 634 | |
| 635 | // for (Match km : kr.getMatches()) { |
| 636 | // System.out.println(km.getStartPos() + "," + km.getEndPos() + " " + |
| 637 | // km.getSnippetBrackets()); |
| 638 | // }; |
| 639 | |
| Akron | ddbc8f5 | 2018-11-28 11:53:42 +0100 | [diff] [blame] | 640 | assertEquals("a[[bcc]]ef", kr.getMatch(5).getSnippetBrackets()); |
| 641 | assertEquals("a[[bcce]]f", kr.getMatch(6).getSnippetBrackets()); |
| 642 | assertEquals(18, kr.getTotalResults()); |
| 643 | } |
| Akron | 7a7319a | 2018-11-28 17:08:56 +0100 | [diff] [blame] | 644 | |
| 645 | |
| 646 | @Test |
| 647 | public void testRightExpansionWithTextBoundary () throws IOException, QueryException { |
| 648 | KrillIndex ki = new KrillIndex(); |
| 649 | ki.addDoc(simpleFieldDoc("aabcd")); |
| 650 | ki.commit(); |
| 651 | |
| 652 | QueryBuilder kq = new QueryBuilder("base"); |
| 653 | |
| 654 | // a[ab]?[]{0,2} |
| 655 | SpanQuery sq = kq.seq(kq.seg("s:a")).append(kq.opt(kq.or("s:a","s:b"))).append(kq.repeat(kq.empty(),0,5)).toQuery(); |
| 656 | assertEquals( |
| 657 | "focus(254: spanContain(<base:base/s:t />, {254: "+ |
| 658 | "spanExpansion(spanOr([base:s:a, spanNext(base:s:a, spanOr([base:s:a, base:s:b]))]), []{0, 5}, right)"+ |
| 659 | "}))", sq.toString()); |
| 660 | |
| 661 | Result kr = ki.search(sq, (short) 25); |
| 662 | assertEquals("[[aabcd]]", kr.getMatch(8).getSnippetBrackets()); |
| 663 | assertEquals("a[[a]]bcd", kr.getMatch(9).getSnippetBrackets()); |
| 664 | assertEquals(16, kr.getTotalResults()); |
| 665 | } |
| 666 | |
| Akron | ddbc8f5 | 2018-11-28 11:53:42 +0100 | [diff] [blame] | 667 | |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 668 | @Test |
| margaretha | 52a0d11 | 2018-11-28 12:58:55 +0100 | [diff] [blame] | 669 | public void testLeftExpansionWrongSorting () throws IOException { |
| 670 | KrillIndex ki = new KrillIndex(); |
| 671 | ki.addDoc(simpleFieldDoc("B u d B R a d m d Z z s B d v", " ")); |
| 672 | ki.commit(); |
| 673 | |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 674 | // d positions: 2-3, 6-7, 8-9, 13-14 |
| margaretha | 52a0d11 | 2018-11-28 12:58:55 +0100 | [diff] [blame] | 675 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:d")); |
| 676 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 8, -1, true); |
| 677 | |
| 678 | Result kr = ki.search(seq, (short) 25); |
| 679 | // for (Match km : kr.getMatches()){ |
| 680 | // System.out.println(km.getStartPos() +","+km.getEndPos()+" " |
| 681 | // +km.getSnippetBrackets()); } |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 682 | |
| margaretha | 52a0d11 | 2018-11-28 12:58:55 +0100 | [diff] [blame] | 683 | assertEquals("BudBR[[admdZzsBd]]v", kr.getMatch(15).getSnippetBrackets()); |
| 684 | assertEquals(28, kr.getTotalResults()); |
| 685 | } |
| Akron | 4204734 | 2018-11-27 15:15:38 +0100 | [diff] [blame] | 686 | |
| margaretha | f151c96 | 2018-11-27 17:38:59 +0100 | [diff] [blame] | 687 | /** Tests left expansion over start doc boundary. Redundant matches should |
| 688 | * be omitted. |
| 689 | * @throws IOException |
| 690 | */ |
| 691 | @Test |
| 692 | public void testLeftExpansionRedundantMatches () throws IOException { |
| 693 | KrillIndex ki = new KrillIndex(); |
| 694 | ki.addDoc(simpleFieldDoc("A d F ü d T F u d m", " ")); |
| 695 | ki.commit(); |
| 696 | |
| 697 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:d")); |
| 698 | SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 6, -1, true); |
| 699 | Result kr = ki.search(seq, (short) 20); |
| 700 | |
| margaretha | 21e4ca2 | 2018-11-28 14:25:46 +0100 | [diff] [blame] | 701 | // for (Match km : kr.getMatches()) { |
| 702 | // System.out.println(km.getStartPos() + "," + km.getEndPos() + " " + |
| 703 | // km.getSnippetBrackets()); |
| 704 | // }; |
| 705 | |
| margaretha | f151c96 | 2018-11-27 17:38:59 +0100 | [diff] [blame] | 706 | Match m = kr.getMatch(5); |
| 707 | assertEquals(2, m.getStartPos()); |
| 708 | assertEquals(9, m.getEndPos()); |
| 709 | assertEquals(14, kr.getTotalResults()); |
| 710 | |
| 711 | } |
| 712 | |
| margaretha | 52a0d11 | 2018-11-28 12:58:55 +0100 | [diff] [blame] | 713 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 714 | private FieldDocument createFieldDoc6 () { |
| 715 | FieldDocument fd = new FieldDocument(); |
| 716 | fd.addString("ID", "doc-6"); |
| 717 | fd.addTV("base", "baaaaaa", |
| 718 | "[(0-1)s:b|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>10<i>10<b>0]" |
| 719 | + "[(1-2)s:a|_1$<i>1<i>2]" + "[(2-3)s:c|_2$<i>2<i>3]" |
| 720 | + "[(3-4)s:a|s:d|_3$<i>3<i>4]" |
| 721 | + "[(4-5)s:a|_4$<i>4<i>5]" + "[(5-6)s:c|_5$<i>5<i>6]" |
| 722 | + "[(6-7)s:a|_6$<i>6<i>7]" + "[(7-8)s:d|_7$<i>7<i>8]" |
| 723 | + "[(8-9)s:a|_8$<i>8<i>9]" |
| 724 | + "[(9-10)s:a|_9$<i>9<i>10]"); |
| 725 | return fd; |
| 726 | } |
| Akron | 747986e | 2016-02-18 17:07:12 +0100 | [diff] [blame] | 727 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 728 | private FieldDocument createFieldDoc0 () { |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 729 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 39662de | 2014-09-17 14:33:50 +0000 | [diff] [blame] | 730 | fd.addString("ID", "doc-0"); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 731 | fd.addTV("base", "ceccecdeec", |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 732 | "[(0-1)s:c|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>10<i>10<b>0]" |
| 733 | + "[(1-2)s:e|_1$<i>1<i>2]" + "[(2-3)s:c|_2$<i>2<i>3]" |
| 734 | + "[(3-4)s:c|s:d|_3$<i>3<i>4]" |
| 735 | + "[(4-5)s:e|_4$<i>4<i>5]" + "[(5-6)s:c|_5$<i>5<i>6]" |
| 736 | + "[(6-7)s:d|_6$<i>6<i>7]" + "[(7-8)s:e|_7$<i>7<i>8]" |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 737 | + "[(8-9)s:e|_8$<i>8<i>9]" |
| 738 | + "[(9-10)s:c|_9$<i>9<i>10]"); |
| Eliza Margaretha | 39662de | 2014-09-17 14:33:50 +0000 | [diff] [blame] | 739 | return fd; |
| 740 | } |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 741 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 742 | private FieldDocument createFieldDoc1 () { |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 743 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 39662de | 2014-09-17 14:33:50 +0000 | [diff] [blame] | 744 | fd.addString("ID", "doc-1"); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 745 | fd.addTV("base", "bbccdd", |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 746 | "[(0-1)s:b|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>6<i>6<b>0]]" |
| 747 | + "[(1-2)s:b|_1$<i>1<i>2]" + "[(2-3)s:c|_2$<i>2<i>3]" |
| 748 | + "[(3-4)s:c|_3$<i>3<i>4]" + "[(4-5)s:d|_4$<i>4<i>5]" |
| 749 | + "[(5-6)s:d|_5$<i>5<i>6]"); |
| Eliza Margaretha | 39662de | 2014-09-17 14:33:50 +0000 | [diff] [blame] | 750 | return fd; |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 751 | } |
| 752 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 753 | private FieldDocument createFieldDoc2 () { |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 754 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 39662de | 2014-09-17 14:33:50 +0000 | [diff] [blame] | 755 | fd.addString("ID", "doc-2"); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 756 | fd.addTV("base", "beccea", |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 757 | "[(0-1)s:b|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>6<i>6<b>0]]" |
| 758 | + "[(1-2)s:e|_1$<i>1<i>2]" + "[(2-3)s:c|_2$<i>2<i>3]" |
| 759 | + "[(3-4)s:c|_3$<i>3<i>4]" + "[(4-5)s:e|_4$<i>4<i>5]" |
| 760 | + "[(5-6)s:a|_5$<i>5<i>6]"); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 761 | return fd; |
| 762 | } |
| 763 | |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 764 | private FieldDocument createFieldDoc3 () { |
| 765 | FieldDocument fd = new FieldDocument(); |
| 766 | fd.addString("ID", "doc-3"); |
| 767 | fd.addTV("base", "cab", |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 768 | "[(0-1)s:c|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>3<i>3<b>0]]" |
| 769 | + "[(1-2)s:a|_1$<i>1<i>2]" + "[(2-3)s:b|_2$<i>2<i>3]"); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 770 | return fd; |
| 771 | } |
| 772 | |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 773 | private FieldDocument createFieldDoc4 () { |
| 774 | FieldDocument fd = new FieldDocument(); |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 775 | fd.addString("ID", "doc-4"); |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 776 | fd.addTV("base", "ce", |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 777 | "[(0-1)s:c|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>2<i>2<b>0]]" |
| 778 | + "[(1-2)s:e|_1$<i>1<i>2]"); |
| Eliza Margaretha | 39662de | 2014-09-17 14:33:50 +0000 | [diff] [blame] | 779 | return fd; |
| Eliza Margaretha | 942dcf3 | 2015-01-22 15:13:00 +0000 | [diff] [blame] | 780 | } |
| 781 | |
| margaretha | e43c5e5 | 2018-03-20 15:24:53 +0100 | [diff] [blame] | 782 | private FieldDocument createFieldDoc5 () { |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 783 | FieldDocument fd = new FieldDocument(); |
| 784 | fd.addString("ID", "doc-5"); |
| 785 | fd.addTV("base", |
| 786 | "affe afffe baum baumgarten steingarten franz hans haus efeu effe", |
| 787 | "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10|<>:base/s:t$<b>64<i>0<i>9<i>9<b>0]" |
| 788 | + "[(5-10)s:afffe|_1$<i>5<i>10]" |
| 789 | + "[(11-15)s:baum|_2$<i>11<i>15]" |
| 790 | + "[(16-26)s:baumgarten|_3$<i>16<i>26]" |
| 791 | + "[(27-38)s:steingarten|_4$<i>27<i>38]" |
| 792 | + "[(39-44)s:franz|_5$<i>39<i>44]" |
| 793 | + "[(45-49)s:hans|_6$<i>45<i>49]" |
| 794 | + "[(50-54)s:haus|_7$<i>50<i>54]" |
| 795 | + "[(55-59)s:efeu|_8$<i>55<i>59]" |
| 796 | + "[(60-64)s:effe|_9$<i>60<i>64]"); |
| 797 | return fd; |
| 798 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 799 | |
| Akron | d6611cd | 2018-01-05 19:45:35 +0100 | [diff] [blame] | 800 | private Krill _newKrill (SpanQueryWrapper query) { |
| 801 | Krill ks = new Krill(query); |
| 802 | ks.getMeta().getContext().left.setToken(true).setLength(1); |
| 803 | ks.getMeta().getContext().right.setToken(true).setLength(1); |
| 804 | return ks; |
| 805 | }; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 806 | } |