| Eliza Margaretha | 0192918 | 2014-02-19 11:48:59 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 3 | import static org.junit.Assert.assertEquals; |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 4 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 5 | import org.apache.lucene.search.spans.SpanQuery; |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 6 | import org.junit.Test; |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 7 | import org.junit.runner.RunWith; |
| 8 | import org.junit.runners.JUnit4; |
| 9 | |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 10 | import de.ids_mannheim.korap.Krill; |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 11 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 12 | import de.ids_mannheim.korap.query.QueryBuilder; |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 13 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 14 | |
| 15 | @RunWith(JUnit4.class) |
| 16 | public class TestRegexWildcardIndex { |
| 17 | |
| 18 | @Test |
| Nils Diewald | be5943e | 2014-10-21 19:35:34 +0000 | [diff] [blame] | 19 | public void indexRegex () throws Exception { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 20 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 21 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 22 | // abcabcabac |
| 23 | FieldDocument fd = new FieldDocument(); |
| 24 | fd.addTV( |
| 25 | "base", |
| 26 | "affe afffe baum baumgarten steingarten franz hans haus efeu effe", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 27 | "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" |
| 28 | + "[(5-10)s:afffe|_1$<i>5<i>10]" |
| 29 | + "[(11-15)s:baum|_2$<i>11<i>15]" |
| 30 | + "[(16-26)s:baumgarten|_3$<i>16<i>26]" |
| 31 | + "[(27-38)s:steingarten|_4$<i>27<i>38]" |
| 32 | + "[(39-44)s:franz|_5$<i>39<i>44]" |
| 33 | + "[(45-49)s:hans|_6$<i>45<i>49]" |
| 34 | + "[(50-54)s:haus|_7$<i>50<i>54]" |
| 35 | + "[(55-59)s:efeu|_8$<i>55<i>59]" |
| 36 | + "[(60-64)s:effe|_9$<i>60<i>64]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 37 | ki.addDoc(fd); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 38 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 39 | ki.commit(); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 40 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 41 | QueryBuilder kq = new QueryBuilder("base"); |
| 42 | SpanQuery sq = kq.re("s:af*e").toQuery(); |
| 43 | assertEquals("SpanMultiTermQueryWrapper(base:/s:af*e/)", sq.toString()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 44 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 45 | Krill ks = new Krill(sq); |
| 46 | ks.getMeta().getContext().left.setToken(true).setLength(1); |
| 47 | ks.getMeta().getContext().right.setToken(true).setLength(1); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 48 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 49 | Result kr = ki.search(ks); |
| 50 | assertEquals((long) 2, kr.getTotalResults()); |
| 51 | assertEquals("[affe] afffe ...", kr.getMatch(0).getSnippetBrackets()); |
| 52 | assertEquals("affe [afffe] baum ...", kr.getMatch(1) |
| 53 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 54 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 55 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re("s:baum.*") |
| 56 | .toQuery())); |
| 57 | assertEquals((long) 2, kr.getTotalResults()); |
| 58 | assertEquals("... afffe [baum] baumgarten ...", kr.getMatch(0) |
| 59 | .getSnippetBrackets()); |
| 60 | assertEquals("... baum [baumgarten] steingarten ...", kr.getMatch(1) |
| 61 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 62 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 63 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re( |
| 64 | "s:.....?garten").toQuery())); |
| 65 | assertEquals((long) 2, kr.getTotalResults()); |
| 66 | assertEquals("... baum [baumgarten] steingarten ...", kr.getMatch(0) |
| 67 | .getSnippetBrackets()); |
| 68 | assertEquals("... baumgarten [steingarten] franz ...", kr.getMatch(1) |
| 69 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 70 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 71 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re("s:ha.s") |
| 72 | .toQuery())); |
| 73 | assertEquals((long) 2, kr.getTotalResults()); |
| 74 | assertEquals("... franz [hans] haus ...", kr.getMatch(0) |
| 75 | .getSnippetBrackets()); |
| 76 | assertEquals("... hans [haus] efeu ...", kr.getMatch(1) |
| 77 | .getSnippetBrackets()); |
| 78 | |
| 79 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re("s:.*ff.*") |
| 80 | .toQuery())); |
| 81 | assertEquals((long) 3, kr.getTotalResults()); |
| 82 | assertEquals("[affe] afffe ...", kr.getMatch(0).getSnippetBrackets()); |
| 83 | assertEquals("affe [afffe] baum ...", kr.getMatch(1) |
| 84 | .getSnippetBrackets()); |
| 85 | assertEquals("... efeu [effe]", kr.getMatch(2).getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 86 | }; |
| 87 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 88 | |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 89 | @Test |
| Nils Diewald | be5943e | 2014-10-21 19:35:34 +0000 | [diff] [blame] | 90 | public void indexWildcard () throws Exception { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 91 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 92 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 93 | // abcabcabac |
| 94 | FieldDocument fd = new FieldDocument(); |
| 95 | fd.addTV( |
| 96 | "base", |
| 97 | "affe afffe baum baumgarten steingarten franz hans haus efeu effe", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 98 | "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" |
| 99 | + "[(5-10)s:afffe|_1$<i>5<i>10]" |
| 100 | + "[(11-15)s:baum|_2$<i>11<i>15]" |
| 101 | + "[(16-26)s:baumgarten|_3$<i>16<i>26]" |
| 102 | + "[(27-38)s:steingarten|_4$<i>27<i>38]" |
| 103 | + "[(39-44)s:franz|_5$<i>39<i>44]" |
| 104 | + "[(45-49)s:hans|_6$<i>45<i>49]" |
| 105 | + "[(50-54)s:haus|_7$<i>50<i>54]" |
| 106 | + "[(55-59)s:efeu|_8$<i>55<i>59]" |
| 107 | + "[(60-64)s:effe|_9$<i>60<i>64]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 108 | ki.addDoc(fd); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 109 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 110 | ki.commit(); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 111 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 112 | QueryBuilder kq = new QueryBuilder("base"); |
| 113 | SpanQuery sq = kq.wc("s:af*e").toQuery(); |
| 114 | assertEquals("SpanMultiTermQueryWrapper(base:s:af*e)", sq.toString()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 115 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 116 | Krill ks = new Krill(sq); |
| 117 | ks.getMeta().getContext().left.setToken(true).setLength(1); |
| 118 | ks.getMeta().getContext().right.setToken(true).setLength(1); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 119 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 120 | Result kr = ki.search(ks); |
| 121 | assertEquals((long) 2, kr.getTotalResults()); |
| 122 | assertEquals("[affe] afffe ...", kr.getMatch(0).getSnippetBrackets()); |
| 123 | assertEquals("affe [afffe] baum ...", kr.getMatch(1) |
| 124 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 125 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 126 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").wc("s:baum.*") |
| 127 | .toQuery())); |
| 128 | assertEquals((long) 0, kr.getTotalResults()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 129 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 130 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").wc("s:baum*") |
| 131 | .toQuery())); |
| 132 | assertEquals((long) 2, kr.getTotalResults()); |
| 133 | assertEquals("... afffe [baum] baumgarten ...", kr.getMatch(0) |
| 134 | .getSnippetBrackets()); |
| 135 | assertEquals("... baum [baumgarten] steingarten ...", kr.getMatch(1) |
| 136 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 137 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 138 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").wc("s:*garten") |
| 139 | .toQuery())); |
| 140 | assertEquals((long) 2, kr.getTotalResults()); |
| 141 | assertEquals("... baum [baumgarten] steingarten ...", kr.getMatch(0) |
| 142 | .getSnippetBrackets()); |
| 143 | assertEquals("... baumgarten [steingarten] franz ...", kr.getMatch(1) |
| 144 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 145 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 146 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").wc("s:ha?s") |
| 147 | .toQuery())); |
| 148 | assertEquals((long) 2, kr.getTotalResults()); |
| 149 | assertEquals("... franz [hans] haus ...", kr.getMatch(0) |
| 150 | .getSnippetBrackets()); |
| 151 | assertEquals("... hans [haus] efeu ...", kr.getMatch(1) |
| 152 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 153 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 154 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").wc("s:?ff?") |
| 155 | .toQuery())); |
| 156 | assertEquals((long) 2, kr.getTotalResults()); |
| 157 | assertEquals("[affe] afffe ...", kr.getMatch(0).getSnippetBrackets()); |
| 158 | assertEquals("... efeu [effe]", kr.getMatch(1).getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 159 | }; |
| 160 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 161 | |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 162 | @Test |
| Nils Diewald | be5943e | 2014-10-21 19:35:34 +0000 | [diff] [blame] | 163 | public void indexRegexCaseInsensitive () throws Exception { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 164 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 165 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 166 | // abcabcabac |
| 167 | FieldDocument fd = new FieldDocument(); |
| 168 | fd.addTV( |
| 169 | "base", |
| 170 | "AfFe aFfFE Baum Baumgarten SteinGarten franZ HaNs Haus Efeu effe", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 171 | "[(0-4)s:AfFe|i:affe|_0$<i>0<i>4|-:t$<i>10]" |
| 172 | + "[(5-10)s:aFfFE|i:afffe|_1$<i>5<i>10]" |
| 173 | + "[(11-15)s:Baum|i:baum|_2$<i>11<i>15]" |
| 174 | + "[(16-26)s:Baumgarten|i:baumgarten|_3$<i>16<i>26]" |
| 175 | + "[(27-38)s:SteinGarten|i:steingarten|_4$<i>27<i>38]" |
| 176 | + "[(39-44)s:franZ|i:franz|_5$<i>39<i>44]" |
| 177 | + "[(45-49)s:HaNs|i:hans|_6$<i>45<i>49]" |
| 178 | + "[(50-54)s:Haus|i:haus|_7$<i>50<i>54]" |
| 179 | + "[(55-59)s:Efeu|i:efeu|_8$<i>55<i>59]" |
| 180 | + "[(60-64)s:effe|i:effe|_9$<i>60<i>64]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 181 | ki.addDoc(fd); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 182 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 183 | ki.commit(); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 184 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 185 | QueryBuilder kq = new QueryBuilder("base"); |
| 186 | SpanQuery sq = kq.re("s:Af*e", true).toQuery(); |
| 187 | assertEquals("SpanMultiTermQueryWrapper(base:/i:af*e/)", sq.toString()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 188 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 189 | Krill ks = new Krill(sq); |
| 190 | ks.getMeta().getContext().left.setToken(true).setLength(1); |
| 191 | ks.getMeta().getContext().right.setToken(true).setLength(1); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 192 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 193 | Result kr = ki.search(ks); |
| 194 | assertEquals((long) 2, kr.getTotalResults()); |
| 195 | assertEquals("[AfFe] aFfFE ...", kr.getMatch(0).getSnippetBrackets()); |
| 196 | assertEquals("AfFe [aFfFE] Baum ...", kr.getMatch(1) |
| 197 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 198 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 199 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re("s:Af.*e") |
| 200 | .toQuery())); |
| 201 | assertEquals((long) 1, kr.getTotalResults()); |
| 202 | assertEquals("[AfFe] aFfFE ...", kr.getMatch(0).getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 203 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 204 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re("s:baum.*", |
| 205 | true).toQuery())); |
| 206 | assertEquals((long) 2, kr.getTotalResults()); |
| 207 | assertEquals("... aFfFE [Baum] Baumgarten ...", kr.getMatch(0) |
| 208 | .getSnippetBrackets()); |
| 209 | assertEquals("... Baum [Baumgarten] SteinGarten ...", kr.getMatch(1) |
| 210 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 211 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 212 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re( |
| 213 | "s:.*garten", true).toQuery())); |
| 214 | assertEquals((long) 2, kr.getTotalResults()); |
| 215 | assertEquals("... Baum [Baumgarten] SteinGarten ...", kr.getMatch(0) |
| 216 | .getSnippetBrackets()); |
| 217 | assertEquals("... Baumgarten [SteinGarten] franZ ...", kr.getMatch(1) |
| 218 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 219 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 220 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re( |
| 221 | "s:.*garten", false).toQuery())); |
| 222 | assertEquals((long) 1, kr.getTotalResults()); |
| 223 | assertEquals("... Baum [Baumgarten] SteinGarten ...", kr.getMatch(0) |
| 224 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 225 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 226 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re("s:ha.s", |
| 227 | true).toQuery())); |
| 228 | assertEquals((long) 2, kr.getTotalResults()); |
| 229 | assertEquals("... franZ [HaNs] Haus ...", kr.getMatch(0) |
| 230 | .getSnippetBrackets()); |
| 231 | assertEquals("... HaNs [Haus] Efeu ...", kr.getMatch(1) |
| 232 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 233 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 234 | kr = ki.search(ks.setSpanQuery(new QueryBuilder("base").re("s:.*f*e", |
| 235 | true).toQuery())); |
| 236 | assertEquals((long) 3, kr.getTotalResults()); |
| 237 | assertEquals("[AfFe] aFfFE ...", kr.getMatch(0).getSnippetBrackets()); |
| 238 | assertEquals("AfFe [aFfFE] Baum ...", kr.getMatch(1) |
| 239 | .getSnippetBrackets()); |
| 240 | assertEquals("... Efeu [effe]", kr.getMatch(2).getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 241 | }; |
| 242 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 243 | |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 244 | @Test |
| Nils Diewald | be5943e | 2014-10-21 19:35:34 +0000 | [diff] [blame] | 245 | public void indexRegexCombined () throws Exception { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 246 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 247 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 248 | // abcabcabac |
| 249 | FieldDocument fd = new FieldDocument(); |
| 250 | fd.addTV( |
| 251 | "base", |
| 252 | "affe afffe baum baumgarten steingarten franz hans haus efeu effe", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 253 | "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" |
| 254 | + "[(5-10)s:afffe|_1$<i>5<i>10]" |
| 255 | + "[(11-15)s:baum|_2$<i>11<i>15]" |
| 256 | + "[(16-26)s:baumgarten|_3$<i>16<i>26]" |
| 257 | + "[(27-38)s:steingarten|_4$<i>27<i>38]" |
| 258 | + "[(39-44)s:franz|_5$<i>39<i>44]" |
| 259 | + "[(45-49)s:hans|_6$<i>45<i>49]" |
| 260 | + "[(50-54)s:haus|_7$<i>50<i>54]" |
| 261 | + "[(55-59)s:efeu|_8$<i>55<i>59]" |
| 262 | + "[(60-64)s:effe|_9$<i>60<i>64]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 263 | ki.addDoc(fd); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 264 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 265 | ki.commit(); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 266 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 267 | QueryBuilder kq = new QueryBuilder("base"); |
| 268 | SpanQuery sq = kq.seq(kq.seg("s:affe")).append(kq.re("s:af*e")) |
| 269 | .toQuery(); |
| 270 | assertEquals( |
| 271 | "spanNext(base:s:affe, SpanMultiTermQueryWrapper(base:/s:af*e/))", |
| 272 | sq.toString()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 273 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 274 | Krill ks = new Krill(sq); |
| 275 | ks.getMeta().getContext().left.setToken(true).setLength(1); |
| 276 | ks.getMeta().getContext().right.setToken(true).setLength(1); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 277 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 278 | Result kr = ki.search(ks); |
| 279 | assertEquals((long) 1, kr.getTotalResults()); |
| 280 | assertEquals("[affe afffe] baum ...", kr.getMatch(0) |
| 281 | .getSnippetBrackets()); |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 282 | }; |
| Nils Diewald | b3a09db | 2013-12-21 00:22:02 +0000 | [diff] [blame] | 283 | |
| Nils Diewald | ea12520 | 2014-09-19 15:12:06 +0000 | [diff] [blame] | 284 | |
| 285 | @Test |
| Nils Diewald | be5943e | 2014-10-21 19:35:34 +0000 | [diff] [blame] | 286 | public void indexRegexWithinRewrite () throws Exception { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 287 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | ea12520 | 2014-09-19 15:12:06 +0000 | [diff] [blame] | 288 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 289 | // abcabcabac |
| 290 | FieldDocument fd = new FieldDocument(); |
| 291 | fd.addTV( |
| 292 | "base", |
| 293 | "affe afffe baum baumgarten steingarten franz hans haus efeu effe", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 294 | "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" |
| 295 | + "[(5-10)s:afffe|_1$<i>5<i>10]" |
| 296 | + "[(11-15)s:baum|_2$<i>11<i>15]" |
| 297 | + "[(16-26)s:baumgarten|_3$<i>16<i>26]" |
| 298 | + "[(27-38)s:steingarten|_4$<i>27<i>38]" |
| 299 | + "[(39-44)s:franz|_5$<i>39<i>44]" |
| 300 | + "[(45-49)s:hans|_6$<i>45<i>49]" |
| 301 | + "[(50-54)s:haus|_7$<i>50<i>54]" |
| 302 | + "[(55-59)s:efeu|_8$<i>55<i>59]" |
| 303 | + "[(60-64)s:effe|_9$<i>60<i>64]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 304 | ki.addDoc(fd); |
| Nils Diewald | ea12520 | 2014-09-19 15:12:06 +0000 | [diff] [blame] | 305 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 306 | ki.commit(); |
| Nils Diewald | ea12520 | 2014-09-19 15:12:06 +0000 | [diff] [blame] | 307 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 308 | QueryBuilder kq = new QueryBuilder("base"); |
| 309 | SpanQuery sq = kq.contains( |
| 310 | kq.seq(kq.re("s:a.*e")).append(kq.re("s:af*e")), |
| 311 | kq.seg("s:affe")).toQuery(); |
| 312 | assertEquals( |
| 313 | "spanContain(spanNext(SpanMultiTermQueryWrapper(base:/s:a.*e/), SpanMultiTermQueryWrapper(base:/s:af*e/)), base:s:affe)", |
| 314 | sq.toString()); |
| 315 | Krill ks = new Krill(sq); |
| 316 | ks.getMeta().getContext().left.setToken(true).setLength(1); |
| 317 | ks.getMeta().getContext().right.setToken(true).setLength(1); |
| Nils Diewald | ea12520 | 2014-09-19 15:12:06 +0000 | [diff] [blame] | 318 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 319 | Result kr = ki.search(ks); |
| 320 | assertEquals((long) 1, kr.getTotalResults()); |
| 321 | assertEquals("[affe afffe] baum ...", kr.getMatch(0) |
| 322 | .getSnippetBrackets()); |
| Nils Diewald | ea12520 | 2014-09-19 15:12:06 +0000 | [diff] [blame] | 323 | }; |
| Nils Diewald | b0dd955 | 2013-12-20 02:28:34 +0000 | [diff] [blame] | 324 | }; |