| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 3 | import java.util.*; |
| 4 | import java.io.*; |
| 5 | |
| 6 | import org.apache.lucene.util.Version; |
| 7 | import org.apache.lucene.util.BytesRef; |
| 8 | import org.apache.lucene.util.Bits; |
| 9 | |
| 10 | import static org.junit.Assert.*; |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 11 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 12 | import org.junit.Test; |
| 13 | import org.junit.Ignore; |
| 14 | import org.junit.runner.RunWith; |
| 15 | import org.junit.runners.JUnit4; |
| 16 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 17 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 18 | import de.ids_mannheim.korap.response.Match; |
| Nils Diewald | 0339d46 | 2015-02-26 14:53:56 +0000 | [diff] [blame] | 19 | import de.ids_mannheim.korap.KrillQuery; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 20 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 21 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| 22 | import de.ids_mannheim.korap.index.FieldDocument; |
| Nils Diewald | e4986d7 | 2015-02-27 17:35:00 +0000 | [diff] [blame] | 23 | import de.ids_mannheim.korap.index.MultiTermTokenStream; |
| Eliza Margaretha | a849171 | 2014-07-25 13:27:54 +0000 | [diff] [blame] | 24 | |
| 25 | import org.apache.lucene.search.spans.SpanOrQuery; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 26 | import org.apache.lucene.search.spans.SpanQuery; |
| 27 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 28 | |
| 29 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 30 | import de.ids_mannheim.korap.query.SpanSegmentQuery; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 31 | import de.ids_mannheim.korap.query.SpanWithinQuery; |
| 32 | |
| Nils Diewald | f5f29ff | 2014-02-14 12:24:34 +0000 | [diff] [blame] | 33 | import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper; |
| 34 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 35 | import org.apache.lucene.index.Term; |
| 36 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 37 | @RunWith(JUnit4.class) |
| 38 | public class TestNextIndex { |
| 39 | |
| 40 | // Todo: primary data as a non-indexed field separated. |
| 41 | |
| 42 | @Test |
| 43 | public void indexExample1 () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 44 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 45 | |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 46 | // abcabcabac |
| 47 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 48 | fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" |
| 49 | + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]" |
| 50 | + "[(3-4)s:a|i:a|_3#3-4]" + "[(4-5)s:b|i:b|_4#4-5]" |
| 51 | + "[(5-6)s:c|i:c|_5#5-6]" + "[(6-7)s:a|i:a|_6#6-7]" |
| 52 | + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]" |
| 53 | + "[(9-10)s:c|i:c|_9#9-10]"); |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 54 | ki.addDoc(fd); |
| 55 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 56 | |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 57 | SpanQuery sq; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 58 | Result kr; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 59 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 60 | sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")), |
| 61 | new SpanTermQuery(new Term("base", "s:b"))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 62 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 63 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 64 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 65 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 66 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 67 | assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos); |
| 68 | assertEquals("StartPos (1)", 3, kr.getMatch(1).startPos); |
| 69 | assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos); |
| 70 | assertEquals("StartPos (2)", 6, kr.getMatch(2).startPos); |
| 71 | assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 72 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 73 | sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:b")), |
| 74 | new SpanTermQuery(new Term("base", "s:c"))); |
| 75 | |
| 76 | kr = ki.search(sq, (short) 10); |
| 77 | |
| 78 | assertEquals("totalResults", kr.getTotalResults(), 2); |
| 79 | assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos); |
| 80 | assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos); |
| 81 | assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos); |
| 82 | assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos); |
| 83 | |
| 84 | assertEquals(1, ki.numberOf("base", "documents")); |
| 85 | assertEquals(10, ki.numberOf("base", "t")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 86 | |
| 87 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 88 | sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")), |
| 89 | new SpanNextQuery(new SpanTermQuery(new Term("base", "s:b")), |
| 90 | new SpanTermQuery(new Term("base", "s:c")))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 91 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 92 | kr = ki.search(sq, (short) 2); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 93 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 94 | assertEquals("totalResults", kr.getTotalResults(), 2); |
| 95 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 96 | assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos); |
| 97 | assertEquals("StartPos (1)", 3, kr.getMatch(1).startPos); |
| 98 | assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos); |
| 99 | |
| 100 | assertEquals(1, ki.numberOf("base", "documents")); |
| 101 | assertEquals(10, ki.numberOf("base", "t")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 102 | |
| 103 | }; |
| 104 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 105 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 106 | @Test |
| 107 | public void indexExample2 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 108 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 109 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 110 | // abcabcabac |
| 111 | FieldDocument fd = new FieldDocument(); |
| 112 | fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" |
| 113 | + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]" |
| 114 | + "[(3-4)s:a|i:a|_3#3-4|<>:x#3-4$<i>4|<>:x#3-7$<i>7]" |
| 115 | + "[(4-5)s:b|i:b|_4#4-5]" + "[(5-6)s:c|i:c|_5#5-6]" |
| 116 | + "[(6-7)s:a|i:a|_6#6-7]" + "[(7-8)s:b|i:b|_7#7-8]" |
| 117 | + "[(8-9)s:a|i:a|_8#8-9]" + "[(9-10)s:c|i:c|_9#9-10]"); |
| 118 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 119 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 120 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 121 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 122 | SpanQuery sq; |
| 123 | Result kr; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 124 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 125 | sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:c")), |
| 126 | new SpanElementQuery("base", "x")); |
| 127 | |
| 128 | kr = ki.search(sq, (short) 10); |
| 129 | assertEquals("ab[cabca]bac", kr.getMatch(1).getSnippetBrackets()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 130 | |
| 131 | }; |
| 132 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 133 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 134 | @Test |
| 135 | public void indexExample3 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 136 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 137 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 138 | // abcabcabac |
| 139 | FieldDocument fd = new FieldDocument(); |
| 140 | fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" |
| 141 | + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]" |
| 142 | + "[(3-4)s:a|i:a|_3#3-4|<>:x#3-7$<i>7]" |
| 143 | + "[(4-5)s:b|i:b|_4#4-5]" + "[(5-6)s:c|i:c|_5#5-6]" |
| 144 | + "[(6-7)s:a|i:a|_6#6-7]" + "[(7-8)s:b|i:b|_7#7-8]" |
| 145 | + "[(8-9)s:a|i:a|_8#8-9]" + "[(9-10)s:c|i:c|_9#9-10]"); |
| 146 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 147 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 148 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 149 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 150 | SpanQuery sq; |
| 151 | Result kr; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 152 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 153 | sq = new SpanNextQuery(new SpanElementQuery("base", "x"), |
| 154 | new SpanTermQuery(new Term("base", "s:b"))); |
| 155 | |
| 156 | kr = ki.search(sq, (short) 10); |
| 157 | assertEquals("abc[abcab]ac", kr.getMatch(0).getSnippetBrackets()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 158 | }; |
| 159 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 160 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 161 | @Test |
| 162 | public void indexExample4 () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 163 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 164 | |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 165 | // abcabcabac |
| 166 | // abc<x>abc<x>a</x>b</x>ac |
| 167 | FieldDocument fd = new FieldDocument(); |
| 168 | fd.addString("ID", "doc-1"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 169 | fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" |
| 170 | + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]" |
| 171 | + "[(3-4)s:a|i:a|_3#3-4|<>:x#3-7$<i>7]" |
| 172 | + "[(4-5)s:b|i:b|_4#4-5]" + "[(5-6)s:c|i:c|_5#5-6]" |
| 173 | + "[(6-7)s:a|i:a|_6#6-7]<>:x#6-8$<i>8]" |
| 174 | + "[(7-8)s:b|i:b|_7#7-8]" + "[(8-9)s:a|i:a|_8#8-9]" |
| 175 | + "[(9-10)s:c|i:c|_9#9-10]"); |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 176 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 177 | |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 178 | // xbz<x>xbzx</x>bxz |
| 179 | fd = new FieldDocument(); |
| 180 | fd.addString("ID", "doc-2"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 181 | fd.addTV("base", "xbzxbzxbxz", "[(0-1)s:x|i:x|_0#0-1|-:t$<i>10]" |
| 182 | + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:z|i:z|_2#2-3]" |
| 183 | + "[(3-4)s:x|i:x|_3#3-4|<>:x#3-7$<i>7]" |
| 184 | + "[(4-5)s:b|i:b|_4#4-5]" + "[(5-6)s:z|i:z|_5#5-6]" |
| 185 | + "[(6-7)s:x|i:x|_6#6-7]" + "[(7-8)s:b|i:b|_7#7-8]" |
| 186 | + "[(8-9)s:x|i:x|_8#8-9]" + "[(9-10)s:z|i:z|_9#9-10]"); |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 187 | ki.addDoc(fd); |
| 188 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 189 | |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 190 | SpanQuery sq; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 191 | Result kr; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 192 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 193 | sq = new SpanNextQuery(new SpanElementQuery("base", "x"), |
| 194 | new SpanTermQuery(new Term("base", "s:b"))); |
| 195 | |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 196 | kr = ki.search(sq, (short) 10); |
| 197 | assertEquals("TotalResults", kr.getTotalResults(), 2); |
| 198 | assertEquals("abc[abcab]ac", kr.getMatch(0).getSnippetBrackets()); |
| 199 | assertEquals("xbz[xbzxb]xz", kr.getMatch(1).getSnippetBrackets()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 200 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 201 | sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:c")), |
| 202 | new SpanElementQuery("base", "x")); |
| 203 | |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 204 | kr = ki.search(sq, (short) 10); |
| 205 | assertEquals(kr.getTotalResults(), 1); |
| 206 | assertEquals("ab[cabca]bac", kr.getMatch(0).getSnippetBrackets()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 207 | |
| 208 | sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:z")), |
| 209 | new SpanElementQuery("base", "x")); |
| 210 | |
| Nils Diewald | e4db128 | 2015-02-05 22:42:33 +0000 | [diff] [blame] | 211 | kr = ki.search(sq, (short) 10); |
| 212 | assertEquals(1, kr.getTotalResults()); |
| 213 | assertEquals("xb[zxbzx]bxz", kr.getMatch(0).getSnippetBrackets()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 214 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 215 | |
| 216 | |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 217 | /** |
| 218 | * Multiple atomic indices |
| 219 | * Skip to a greater doc# |
| 220 | * */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 221 | @Test |
| 222 | public void indexExample5 () throws IOException { |
| 223 | KrillIndex ki = new KrillIndex(); |
| 224 | ki.addDoc(createFieldDoc1()); |
| 225 | ki.addDoc(createFieldDoc2()); |
| 226 | ki.commit(); |
| 227 | ki.addDoc(createFieldDoc3()); |
| 228 | ki.commit(); |
| Nils Diewald | f5f29ff | 2014-02-14 12:24:34 +0000 | [diff] [blame] | 229 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 230 | SpanQuery sq = new SpanNextQuery(new SpanTermQuery(new Term("base", |
| 231 | "s:d")), new SpanTermQuery(new Term("base", "s:b"))); |
| 232 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | f5f29ff | 2014-02-14 12:24:34 +0000 | [diff] [blame] | 233 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 234 | assertEquals("totalResults", kr.getTotalResults(), 2); |
| 235 | // Match #0 |
| 236 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 237 | assertEquals("StartPos", 4, kr.getMatch(0).startPos); |
| 238 | assertEquals("EndPos", 6, kr.getMatch(0).endPos); |
| 239 | // Match #1 |
| 240 | assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID()); |
| 241 | assertEquals("StartPos", 1, kr.getMatch(1).startPos); |
| 242 | assertEquals("EndPos", 3, kr.getMatch(1).endPos); |
| Nils Diewald | f5f29ff | 2014-02-14 12:24:34 +0000 | [diff] [blame] | 243 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 244 | sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:b")), |
| 245 | new SpanTermQuery(new Term("base", "s:d"))); |
| 246 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | f5f29ff | 2014-02-14 12:24:34 +0000 | [diff] [blame] | 247 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 248 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| 249 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 250 | assertEquals("StartPos", 2, kr.getMatch(0).startPos); |
| 251 | assertEquals("EndPos", 4, kr.getMatch(0).endPos); |
| 252 | } |
| Nils Diewald | f5f29ff | 2014-02-14 12:24:34 +0000 | [diff] [blame] | 253 | |
| Nils Diewald | f5f29ff | 2014-02-14 12:24:34 +0000 | [diff] [blame] | 254 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 255 | /** Skip to NextSpan */ |
| 256 | @Test |
| 257 | public void indexExample6 () throws IOException { |
| 258 | KrillIndex ki = new KrillIndex(); |
| 259 | ki.addDoc(createFieldDoc1()); |
| 260 | ki.addDoc(createFieldDoc2()); |
| 261 | ki.addDoc(createFieldDoc3()); |
| 262 | ki.commit(); |
| 263 | |
| 264 | SpanQuery sq = new SpanNextQuery(new SpanTermQuery(new Term("base", |
| 265 | "s:c")), new SpanNextQuery(new SpanTermQuery(new Term("base", |
| 266 | "s:d")), new SpanTermQuery(new Term("base", "s:b")))); |
| 267 | |
| 268 | Result kr = ki.search(sq, (short) 10); |
| 269 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| 270 | assertEquals("doc-number", 2, kr.getMatch(0).getLocalDocID()); |
| 271 | assertEquals("StartPos", 0, kr.getMatch(0).startPos); |
| 272 | assertEquals("EndPos", 3, kr.getMatch(0).endPos); |
| 273 | |
| 274 | } |
| 275 | |
| 276 | |
| 277 | @Test |
| 278 | public void indexExample7Distances () throws Exception { |
| 279 | KrillIndex ki = new KrillIndex(); |
| 280 | ki.addDoc(createFieldDoc1()); |
| 281 | ki.addDoc(createFieldDoc2()); |
| 282 | ki.addDoc(createFieldDoc3()); |
| 283 | ki.addDoc(createFieldDoc4()); |
| 284 | ki.commit(); |
| 285 | |
| 286 | SpanSequenceQueryWrapper sq = new SpanSequenceQueryWrapper("base"); |
| 287 | sq.append("i:b").append("i:d").withConstraint(1, 3); |
| 288 | |
| 289 | Result kr = ki.search(sq.toQuery(), (short) 10); |
| 290 | |
| 291 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 292 | assertEquals("doc-number", "match-doc-0-p2-5", kr.getMatch(0).getID()); |
| 293 | assertEquals("doc-number", "match-doc-2-p2-4", kr.getMatch(1).getID()); |
| 294 | assertEquals("doc-number", "match-doc-3-p2-5", kr.getMatch(2).getID()); |
| 295 | }; |
| 296 | |
| 297 | |
| 298 | @Test |
| 299 | public void indexExample8Distances () throws Exception { |
| 300 | KrillIndex ki = new KrillIndex(); |
| 301 | ki.addDoc(createFieldDoc1()); |
| 302 | ki.addDoc(createFieldDoc2()); |
| 303 | ki.addDoc(createFieldDoc3()); |
| 304 | ki.addDoc(createFieldDoc4()); |
| 305 | ki.commit(); |
| 306 | |
| 307 | SpanSequenceQueryWrapper sq = new SpanSequenceQueryWrapper("base"); |
| 308 | sq.append("i:a").append("i:b").withConstraint(0, 3, "e"); |
| 309 | |
| 310 | Result kr = ki.search(sq.toQuery(), (short) 10); |
| 311 | |
| 312 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 313 | assertEquals("doc-number", "match-doc-0-p3-6", kr.getMatch(0).getID()); |
| 314 | assertEquals("doc-number", "match-doc-1-p1-3", kr.getMatch(1).getID()); |
| 315 | assertEquals("doc-number", "match-doc-3-p3-6", kr.getMatch(2).getID()); |
| 316 | }; |
| 317 | |
| 318 | |
| 319 | @Test |
| 320 | public void indexExample9 () throws IOException { |
| 321 | KrillIndex ki = new KrillIndex(); |
| 322 | ki.addDoc(createFieldDoc1()); |
| 323 | ki.commit(); |
| 324 | |
| 325 | SpanQuery sq = new SpanNextQuery(new SpanOrQuery(new SpanTermQuery( |
| 326 | new Term("base", "s:a")), new SpanTermQuery(new Term("base", |
| 327 | "s:b"))), new SpanTermQuery(new Term("base", "s:c"))); |
| 328 | |
| 329 | Result kr = ki.search(sq, (short) 10); |
| 330 | |
| 331 | assertEquals(0, kr.getMatch(0).getStartPos()); |
| 332 | assertEquals(2, kr.getMatch(0).getEndPos()); |
| 333 | assertEquals(3, kr.getMatch(1).getStartPos()); |
| 334 | assertEquals(5, kr.getMatch(1).getEndPos()); |
| 335 | } |
| 336 | |
| 337 | |
| 338 | private FieldDocument createFieldDoc1 () { |
| 339 | FieldDocument fd = new FieldDocument(); |
| 340 | fd.addString("ID", "doc-0"); |
| 341 | fd.addTV("base", "bcbadb", "[(0-1)s:b|i:b|_0#0-1]" |
| 342 | + "[(1-2)s:c|i:c|s:b|_1#1-2]" + "[(2-3)s:b|i:b|_2#2-3]" |
| 343 | + "[(3-4)s:a|i:a|_3#3-4|<>:e#3-6$<i>6]" |
| 344 | + "[(4-5)s:d|i:d|s:c|_4#4-5]" + "[(5-6)s:b|i:b|_5#5-6]"); |
| 345 | return fd; |
| 346 | } |
| 347 | |
| 348 | |
| 349 | private FieldDocument createFieldDoc2 () { |
| 350 | FieldDocument fd = new FieldDocument(); |
| 351 | fd.addString("ID", "doc-1"); |
| 352 | fd.addTV("base", "caba", "[(0-1)s:c|i:c|_0#0-1]" |
| 353 | + "[(1-2)s:a|i:a|s:c|_1#1-2|<>:e#1-3$<i>3]" |
| 354 | + "[(2-3)s:b|i:b|s:a|_2#2-3]" + "[(3-4)s:a|i:a|_3#3-4]"); |
| 355 | return fd; |
| 356 | } |
| 357 | |
| 358 | |
| 359 | private FieldDocument createFieldDoc3 () { |
| 360 | FieldDocument fd = new FieldDocument(); |
| 361 | fd.addString("ID", "doc-2"); |
| 362 | fd.addTV("base", "cdbd", "[(0-1)s:c|i:c|_0#0-1]" |
| 363 | + "[(1-2)s:d|i:d|_1#1-2]" + "[(2-3)s:b|i:b|s:a|_2#2-3]" |
| 364 | + "[(3-4)s:d|i:d|_3#3-4]"); |
| 365 | |
| 366 | return fd; |
| 367 | } |
| 368 | |
| 369 | |
| 370 | private FieldDocument createFieldDoc4 () { |
| 371 | FieldDocument fd = new FieldDocument(); |
| 372 | fd.addString("ID", "doc-3"); |
| 373 | fd.addTV("base", "bcbadb", "[(0-1)s:b|i:b|_0#0-1]" |
| 374 | + "[(1-2)s:c|i:c|s:b|<>:s#1-3$<i>3|_1#1-2]" |
| 375 | + "[(2-3)s:b|i:b|_2#2-3]" |
| 376 | + "[(3-4)s:a|i:a|_3#3-4|<>:e#3-6$<i>6]" |
| 377 | + "[(4-5)s:d|i:d|s:c|_4#4-5]" + "[(5-6)s:b|i:b|_5#5-6]"); |
| 378 | return fd; |
| 379 | } |
| 380 | |
| 381 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 382 | }; |