| Eliza Margaretha | 0192918 | 2014-02-19 11:48:59 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 3 | import java.util.*; |
| 4 | import java.io.*; |
| 5 | |
| 6 | import org.apache.lucene.util.Version; |
| 7 | import org.apache.lucene.util.BytesRef; |
| 8 | import org.apache.lucene.util.Bits; |
| 9 | |
| 10 | import static org.junit.Assert.*; |
| Eliza Margaretha | 2289898 | 2014-11-04 17:10:21 +0000 | [diff] [blame] | 11 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 12 | import org.junit.Test; |
| 13 | import org.junit.Ignore; |
| 14 | import org.junit.runner.RunWith; |
| 15 | import org.junit.runners.JUnit4; |
| 16 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 17 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 18 | import de.ids_mannheim.korap.response.Match; |
| Nils Diewald | 0339d46 | 2015-02-26 14:53:56 +0000 | [diff] [blame] | 19 | import de.ids_mannheim.korap.KrillQuery; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 20 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 21 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 22 | import de.ids_mannheim.korap.query.SpanWithinQuery; |
| 23 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| 24 | import de.ids_mannheim.korap.query.SpanClassQuery; |
| Eliza Margaretha | 2289898 | 2014-11-04 17:10:21 +0000 | [diff] [blame] | 25 | import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper; |
| 26 | import de.ids_mannheim.korap.util.QueryException; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 27 | import de.ids_mannheim.korap.index.FieldDocument; |
| Nils Diewald | e4986d7 | 2015-02-27 17:35:00 +0000 | [diff] [blame] | 28 | import de.ids_mannheim.korap.index.MultiTermTokenStream; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 29 | import org.apache.lucene.search.spans.SpanQuery; |
| 30 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 31 | import org.apache.lucene.index.Term; |
| 32 | |
| 33 | import java.nio.ByteBuffer; |
| 34 | |
| 35 | // mvn -Dtest=TestWithinIndex#indexExample1 test |
| 36 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 37 | |
| 38 | /** |
| 39 | * @author diewald |
| 40 | * @author margaretha |
| 41 | */ |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 42 | @RunWith(JUnit4.class) |
| 43 | public class TestWithinIndex { |
| 44 | |
| 45 | // Todo: primary data as a non-indexed field separated. |
| 46 | |
| 47 | @Test |
| 48 | public void indexExample1a () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 49 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 50 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 51 | // <a>x<a>y<a>zhij</a>hij</a>hij</a> |
| 52 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 53 | fd.addTV("base", "x y z h i j h i j h i j ", |
| 54 | "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1 |
| 55 | "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2 |
| 56 | "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3 |
| 57 | "[(9-12)s:h]" + // 4 |
| 58 | "[(12-15)s:i]" + // 5 |
| 59 | "[(15-18)s:j]" + // 6 |
| 60 | "[(18-21)s:h]" + // 7 |
| 61 | "[(21-24)s:i]" + // 8 |
| 62 | "[(24-27)s:j]" + // 9 |
| 63 | "[(27-30)s:h]" + // 10 |
| 64 | "[(30-33)s:i]" + // 11 |
| 65 | "[(33-36)s:j]"); // 12 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 66 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 67 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 68 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 69 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 70 | SpanQuery sq; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 71 | Result kr; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 72 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 73 | sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 74 | new SpanTermQuery(new Term("base", "s:h"))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 75 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 76 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 77 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 78 | assertEquals("totalResults", kr.getTotalResults(), 6); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 79 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 80 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 81 | assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos); |
| 82 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 83 | assertEquals("EndPos (1)", 12, kr.getMatch(1).endPos); |
| 84 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| 85 | assertEquals("EndPos (2)", 12, kr.getMatch(2).endPos); |
| 86 | assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos); |
| 87 | assertEquals("EndPos (3)", 9, kr.getMatch(3).endPos); |
| 88 | assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos); |
| 89 | assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos); |
| 90 | assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos); |
| 91 | assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 92 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 93 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 94 | }; |
| 95 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 96 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 97 | @Test |
| 98 | public void indexExample1b () throws IOException { |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 99 | // Cases 9, 12, 13 |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 100 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 101 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 102 | // <a>x<a>y<a>zhij</a>hij</a>hij</a> |
| 103 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 104 | fd.addTV("base", "x y z h i j h i j h i j ", |
| 105 | "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1 |
| 106 | "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2 |
| 107 | "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3 |
| 108 | "[(9-12)s:h]" + // 4 |
| 109 | "[(12-15)s:i]" + // 5 |
| 110 | "[(15-18)s:j]" + // 6 |
| 111 | "[(18-21)s:h]" + // 7 |
| 112 | "[(21-24)s:i]" + // 8 |
| 113 | "[(24-27)s:j]" + // 9 |
| 114 | "[(27-30)s:h]" + // 10 |
| 115 | "[(30-33)s:i]" + // 11 |
| 116 | "[(33-36)s:j]"); // 12 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 117 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 118 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 119 | // <a>x<a>y<a>zhij</a>hij</a>hij</a> |
| 120 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 121 | fd.addTV("base", "x y z h i j h i j h i j ", |
| 122 | "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1 |
| 123 | "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2 |
| 124 | "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3 |
| 125 | "[(9-12)s:h]" + // 4 |
| 126 | "[(12-15)s:i]" + // 5 |
| 127 | "[(15-18)s:j]" + // 6 |
| 128 | "[(18-21)s:h]" + // 7 |
| 129 | "[(21-24)s:i]" + // 8 |
| 130 | "[(24-27)s:j]" + // 9 |
| 131 | "[(27-30)s:h]" + // 10 |
| 132 | "[(30-33)s:i]" + // 11 |
| 133 | "[(33-36)s:j]"); // 12 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 134 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 135 | |
| 136 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 137 | // Save documents |
| 138 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 139 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 140 | SpanQuery sq; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 141 | Result kr; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 142 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 143 | sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 144 | new SpanTermQuery(new Term("base", "s:h"))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 145 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 146 | kr = ki.search(sq, (short) 15); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 147 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 148 | assertEquals("totalResults", kr.getTotalResults(), 12); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 149 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 150 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 151 | assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos); |
| 152 | assertEquals("Doc (0)", 0, kr.getMatch(0).internalDocID); |
| 153 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 154 | assertEquals("EndPos (1)", 12, kr.getMatch(1).endPos); |
| 155 | assertEquals("Doc (1)", 0, kr.getMatch(1).internalDocID); |
| 156 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| 157 | assertEquals("EndPos (2)", 12, kr.getMatch(2).endPos); |
| 158 | assertEquals("Doc (2)", 0, kr.getMatch(2).internalDocID); |
| 159 | assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos); |
| 160 | assertEquals("EndPos (3)", 9, kr.getMatch(3).endPos); |
| 161 | assertEquals("Doc (3)", 0, kr.getMatch(3).internalDocID); |
| 162 | assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos); |
| 163 | assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos); |
| 164 | assertEquals("Doc (4)", 0, kr.getMatch(4).internalDocID); |
| 165 | assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos); |
| 166 | assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos); |
| 167 | assertEquals("Doc (5)", 0, kr.getMatch(5).internalDocID); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 168 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 169 | assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos); |
| 170 | assertEquals("EndPos (6)", 12, kr.getMatch(6).endPos); |
| 171 | assertEquals("Doc (6)", 1, kr.getMatch(6).internalDocID); |
| 172 | assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos); |
| 173 | assertEquals("EndPos (7)", 12, kr.getMatch(7).endPos); |
| 174 | assertEquals("Doc (7)", 1, kr.getMatch(7).internalDocID); |
| 175 | assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos); |
| 176 | assertEquals("EndPos (8)", 12, kr.getMatch(8).endPos); |
| 177 | assertEquals("Doc (8)", 1, kr.getMatch(8).internalDocID); |
| 178 | assertEquals("StartPos (9)", 1, kr.getMatch(9).startPos); |
| 179 | assertEquals("EndPos (9)", 9, kr.getMatch(9).endPos); |
| 180 | assertEquals("Doc (9)", 1, kr.getMatch(9).internalDocID); |
| 181 | assertEquals("StartPos (10)", 1, kr.getMatch(10).startPos); |
| 182 | assertEquals("EndPos (10)", 9, kr.getMatch(10).endPos); |
| 183 | assertEquals("Doc (10)", 1, kr.getMatch(10).internalDocID); |
| 184 | assertEquals("StartPos (11)", 2, kr.getMatch(11).startPos); |
| 185 | assertEquals("EndPos (11)", 6, kr.getMatch(11).endPos); |
| 186 | assertEquals("Doc (11)", 1, kr.getMatch(11).internalDocID); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 187 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 188 | /* |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 189 | for (Match km : kr.getMatches()){ |
| 190 | System.out.println(km.getStartPos() +","+km.getEndPos()+" " |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 191 | +km.getSnippetBrackets()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 192 | }; |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 193 | */ |
| 194 | |
| 195 | assertEquals(2, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 196 | }; |
| 197 | |
| 198 | |
| 199 | @Test |
| 200 | public void indexExample1c () throws IOException { |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 201 | // Cases 9, 12, 13 |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 202 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 203 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 204 | // <a>x<a>y<a>zhij</a>hij</a>hij</a> |
| 205 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 206 | fd.addTV("base", "x y z h i j h i j h i j ", |
| 207 | "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1 |
| 208 | "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2 |
| 209 | "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3 |
| 210 | "[(9-12)s:h]" + // 4 |
| 211 | "[(12-15)s:i]" + // 5 |
| 212 | "[(15-18)s:j]" + // 6 |
| 213 | "[(18-21)s:h]" + // 7 |
| 214 | "[(21-24)s:i]" + // 8 |
| 215 | "[(24-27)s:j]" + // 9 |
| 216 | "[(27-30)s:h]" + // 10 |
| 217 | "[(30-33)s:i]" + // 11 |
| 218 | "[(33-36)s:j]"); // 12 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 219 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 220 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 221 | // <a>x<a>y<a>zabc</a>abc</a>abc</a> |
| 222 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 223 | fd.addTV("base", "x y z a b c a b c a b c ", |
| 224 | "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1 |
| 225 | "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2 |
| 226 | "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3 |
| 227 | "[(9-12)s:a]" + // 4 |
| 228 | "[(12-15)s:b]" + // 5 |
| 229 | "[(15-18)s:c]" + // 6 |
| 230 | "[(18-21)s:a]" + // 7 |
| 231 | "[(21-24)s:b]" + // 8 |
| 232 | "[(24-27)s:c]" + // 9 |
| 233 | "[(27-30)s:a]" + // 10 |
| 234 | "[(30-33)s:b]" + // 11 |
| 235 | "[(33-36)s:c]"); // 12 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 236 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 237 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 238 | // Save documents |
| 239 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 240 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 241 | SpanQuery sq; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 242 | Result kr; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 243 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 244 | sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 245 | new SpanTermQuery(new Term("base", "s:h"))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 246 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 247 | kr = ki.search(sq, (short) 15); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 248 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 249 | assertEquals("totalResults", kr.getTotalResults(), 6); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 250 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 251 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 252 | assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos); |
| 253 | assertEquals("Doc (0)", 0, kr.getMatch(0).internalDocID); |
| 254 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 255 | assertEquals("EndPos (1)", 12, kr.getMatch(1).endPos); |
| 256 | assertEquals("Doc (1)", 0, kr.getMatch(1).internalDocID); |
| 257 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| 258 | assertEquals("EndPos (2)", 12, kr.getMatch(2).endPos); |
| 259 | assertEquals("Doc (2)", 0, kr.getMatch(2).internalDocID); |
| 260 | assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos); |
| 261 | assertEquals("EndPos (3)", 9, kr.getMatch(3).endPos); |
| 262 | assertEquals("Doc (3)", 0, kr.getMatch(3).internalDocID); |
| 263 | assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos); |
| 264 | assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos); |
| 265 | assertEquals("Doc (4)", 0, kr.getMatch(4).internalDocID); |
| 266 | assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos); |
| 267 | assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos); |
| 268 | assertEquals("Doc (5)", 0, kr.getMatch(5).internalDocID); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 269 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 270 | assertEquals(2, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 271 | }; |
| 272 | |
| 273 | |
| 274 | @Test |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 275 | public void indexExample1d () throws IOException { |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 276 | // Cases 9, 12, 13 |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 277 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 278 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 279 | // <a>x<a>y<a>zhij</a>hij</a>hij</a> |
| 280 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 281 | fd.addTV("base", "x y z h i j h i j h i j ", |
| 282 | "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1 |
| 283 | "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2 |
| 284 | "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3 |
| 285 | "[(9-12)s:h]" + // 4 |
| 286 | "[(12-15)s:i]" + // 5 |
| 287 | "[(15-18)s:j]" + // 6 |
| 288 | "[(18-21)s:h]" + // 7 |
| 289 | "[(21-24)s:i]" + // 8 |
| 290 | "[(24-27)s:j]" + // 9 |
| 291 | "[(27-30)s:h]" + // 10 |
| 292 | "[(30-33)s:i]" + // 11 |
| 293 | "[(33-36)s:j]"); // 12 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 294 | ki.addDoc(fd); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 295 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 296 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 297 | fd.addTV("base", "x y z h ", "[(0-3)s:x]" + // 1 |
| 298 | "[(3-6)s:y]" + // 2 |
| 299 | "[(6-9)s:z]" + // 3 |
| 300 | "[(9-12)s:h]"); // 4 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 301 | ki.addDoc(fd); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 302 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 303 | // <a>x<a>y<a>zabc</a>abc</a>abc</a> |
| 304 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 305 | fd.addTV("base", "x y z a b c a b c a b c ", |
| 306 | "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1 |
| 307 | "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2 |
| 308 | "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3 |
| 309 | "[(9-12)s:a]" + // 4 |
| 310 | "[(12-15)s:b]" + // 5 |
| 311 | "[(15-18)s:c]" + // 6 |
| 312 | "[(18-21)s:a]" + // 7 |
| 313 | "[(21-24)s:b]" + // 8 |
| 314 | "[(24-27)s:c]" + // 9 |
| 315 | "[(27-30)s:a]" + // 10 |
| 316 | "[(30-33)s:b]" + // 11 |
| 317 | "[(33-36)s:c]"); // 12 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 318 | ki.addDoc(fd); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 319 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 320 | // Save documents |
| 321 | ki.commit(); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 322 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 323 | SpanQuery sq; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 324 | Result kr; |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 325 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 326 | sq = new SpanElementQuery("base", "a"); |
| 327 | kr = ki.search(sq, (short) 15); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 328 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 329 | sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 330 | new SpanTermQuery(new Term("base", "s:h"))); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 331 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 332 | kr = ki.search(sq, (short) 15); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 333 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 334 | assertEquals("totalResults", kr.getTotalResults(), 6); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 335 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 336 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 337 | assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos); |
| 338 | assertEquals("Doc (0)", 0, kr.getMatch(0).internalDocID); |
| 339 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 340 | assertEquals("EndPos (1)", 12, kr.getMatch(1).endPos); |
| 341 | assertEquals("Doc (1)", 0, kr.getMatch(1).internalDocID); |
| 342 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| 343 | assertEquals("EndPos (2)", 12, kr.getMatch(2).endPos); |
| 344 | assertEquals("Doc (2)", 0, kr.getMatch(2).internalDocID); |
| 345 | assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos); |
| 346 | assertEquals("EndPos (3)", 9, kr.getMatch(3).endPos); |
| 347 | assertEquals("Doc (3)", 0, kr.getMatch(3).internalDocID); |
| 348 | assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos); |
| 349 | assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos); |
| 350 | assertEquals("Doc (4)", 0, kr.getMatch(4).internalDocID); |
| 351 | assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos); |
| 352 | assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos); |
| 353 | assertEquals("Doc (5)", 0, kr.getMatch(5).internalDocID); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 354 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 355 | assertEquals(3, ki.numberOf("documents")); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 356 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 357 | |
| 358 | |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 359 | @Test |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 360 | public void indexExample2a () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 361 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 362 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 363 | // <a><a><a>h</a>hij</a>hij</a> |
| 364 | FieldDocument fd = new FieldDocument(); |
| 365 | fd.addTV("base", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 366 | // <a><a>hhij</a>hijh</a>ij</a> |
| 367 | "h h i j h i j h i j ", |
| 368 | "[s:h|_0#0-3|<>:a#0-24$<i>7|<>:a#0-12$<i>3|<>:a#0-30$<i>9]" + // 1 |
| 369 | "[s:h|_1#3-6]" + // 2 |
| 370 | "[s:i|_2#6-9]" + // 3 |
| 371 | "[s:j|_3#9-12]" + // 4 |
| 372 | "[s:h|_4#12-15]" + // 5 |
| 373 | "[s:i|_5#15-18]" + // 6 |
| 374 | "[s:j|_6#18-21]" + // 7 |
| 375 | "[s:h|_7#21-24]" + // 8 |
| 376 | "[s:i|_8#24-27]" + // 9 |
| 377 | "[s:j|_9#27-30]"); // 10 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 378 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 379 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 380 | // Save documents |
| 381 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 382 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 383 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 384 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 385 | SpanQuery sq; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 386 | Result kr; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 387 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 388 | sq = new SpanElementQuery("base", "a"); |
| 389 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 390 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 391 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 392 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 393 | assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 394 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 395 | assertEquals("EndPos (1)", 7, kr.getMatch(1).endPos); |
| 396 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 397 | assertEquals("EndPos (2)", 9, kr.getMatch(2).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 398 | |
| 399 | sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 400 | new SpanTermQuery(new Term("base", "s:h"))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 401 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 402 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 403 | |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 404 | assertEquals("totalResults", kr.getTotalResults(), 10); |
| 405 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 406 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 407 | assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 408 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 409 | assertEquals("EndPos (1)", 3, kr.getMatch(1).endPos); |
| 410 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 411 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| 412 | assertEquals("EndPos (2)", 7, kr.getMatch(2).endPos); |
| 413 | assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos); |
| 414 | assertEquals("EndPos (3)", 7, kr.getMatch(3).endPos); |
| 415 | assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos); |
| 416 | assertEquals("EndPos (4)", 7, kr.getMatch(4).endPos); |
| 417 | assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 418 | assertEquals("EndPos (5)", 7, kr.getMatch(5).endPos); |
| 419 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 420 | assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 421 | assertEquals("EndPos (6)", 9, kr.getMatch(6).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 422 | assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 423 | assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 424 | assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 425 | assertEquals("EndPos (8)", 9, kr.getMatch(8).endPos); |
| 426 | assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos); |
| 427 | assertEquals("EndPos (9)", 9, kr.getMatch(9).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 428 | }; |
| 429 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 430 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 431 | @Test |
| 432 | public void indexExample2b () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 433 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 434 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 435 | // 6,9,12 |
| 436 | // <a><a><a>h</a>hij</a>hij</a>h |
| 437 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 438 | fd.addTV("base", "h h i j h i j h i j h ", |
| 439 | "[(0-3)s:h|<>:a#0-21$<i>6|<>:a#0-12$<i>3|<>:a#0-30$<i>9]" + // 1 |
| 440 | "[(3-6)s:h]" + // 2 |
| 441 | "[(6-9)s:i]" + // 3 |
| 442 | "[(9-12)s:j]" + // 4 |
| 443 | "[(12-15)s:h]" + // 5 |
| 444 | "[(15-18)s:i]" + // 6 |
| 445 | "[(18-21)s:j]" + // 7 |
| 446 | "[(21-24)s:h]" + // 8 |
| 447 | "[(24-27)s:i]" + // 9 |
| 448 | "[(27-30)s:j]" + // 10 |
| 449 | "[(30-33)s:h]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 450 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 451 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 452 | // Save documents |
| 453 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 454 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 455 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 456 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 457 | SpanQuery sq = new SpanElementQuery("base", "a"); |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 458 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 459 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 460 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 461 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 462 | assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 463 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 464 | assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 465 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 466 | assertEquals("EndPos (2)", 9, kr.getMatch(2).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 467 | |
| 468 | sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 469 | new SpanTermQuery(new Term("base", "s:h"))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 470 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 471 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 472 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 473 | assertEquals("totalResults", kr.getTotalResults(), 9); |
| 474 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 475 | assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 476 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 477 | assertEquals("EndPos (1)", 3, kr.getMatch(1).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 478 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 479 | assertEquals("EndPos (2)", 6, kr.getMatch(2).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 480 | assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 481 | assertEquals("EndPos (3)", 6, kr.getMatch(3).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 482 | assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 483 | assertEquals("EndPos (4)", 6, kr.getMatch(4).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 484 | assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 485 | assertEquals("EndPos (5)", 9, kr.getMatch(5).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 486 | assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 487 | assertEquals("EndPos (6)", 9, kr.getMatch(6).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 488 | assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 489 | assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 490 | assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 491 | assertEquals("EndPos (8)", 9, kr.getMatch(8).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 492 | }; |
| 493 | |
| 494 | |
| 495 | @Test |
| 496 | public void indexExample2c () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 497 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 498 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 499 | // 2, 6, 9, 12 |
| 500 | // <a><a><a>h</a>hij</a>hij</a>h<a>i</i> |
| 501 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 502 | fd.addTV("base", "h h i j h i j h i j h i ", |
| 503 | "[(0-3)s:h|<>:a#0-21$<i>7|<>:a#0-15$<i>4|<>:a#0-30$<i>10]" + // 1 |
| 504 | "[(3-6)s:h]" + // 2 |
| 505 | "[(6-9)s:i]" + // 3 |
| 506 | "[(9-12)s:j]" + // 4 |
| 507 | "[(12-15)s:h]" + // 5 |
| 508 | "[(15-18)s:i]" + // 6 |
| 509 | "[(18-21)s:j]" + // 7 |
| 510 | "[(21-24)s:h]" + // 8 |
| 511 | "[(24-27)s:i]" + // 9 |
| 512 | "[(27-30)s:j]" + // 10 |
| 513 | "[(30-33)s:h]" + // 11 |
| 514 | "[(33-36)s:i|<>:a#33-36$<i>12]"); // 12 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 515 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 516 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 517 | // Save documents |
| 518 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 519 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 520 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 521 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 522 | SpanQuery sq = new SpanElementQuery("base", "a"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 523 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 524 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 525 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 526 | assertEquals("totalResults", kr.getTotalResults(), 4); |
| 527 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 528 | assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos); |
| 529 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 530 | assertEquals("EndPos (1)", 7, kr.getMatch(1).endPos); |
| 531 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| 532 | assertEquals("EndPos (2)", 10, kr.getMatch(2).endPos); |
| 533 | assertEquals("StartPos (3)", 11, kr.getMatch(3).startPos); |
| 534 | assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 535 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 536 | sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 537 | new SpanTermQuery(new Term("base", "s:h"))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 538 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 539 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 540 | |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 541 | assertEquals("totalResults", kr.getTotalResults(), 11); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 542 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 543 | assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos); |
| 544 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 545 | assertEquals("EndPos (1)", 4, kr.getMatch(1).endPos); |
| 546 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 547 | assertEquals("EndPos (2)", 4, kr.getMatch(2).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 548 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 549 | assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos); |
| 550 | assertEquals("EndPos (3)", 7, kr.getMatch(3).endPos); |
| 551 | assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos); |
| 552 | assertEquals("EndPos (4)", 7, kr.getMatch(4).endPos); |
| 553 | assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 554 | assertEquals("EndPos (5)", 7, kr.getMatch(5).endPos); |
| 555 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 556 | assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos); |
| 557 | assertEquals("EndPos (6)", 10, kr.getMatch(6).endPos); |
| 558 | assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos); |
| 559 | assertEquals("EndPos (7)", 10, kr.getMatch(7).endPos); |
| 560 | assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos); |
| 561 | assertEquals("EndPos (8)", 10, kr.getMatch(8).endPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 562 | assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos); |
| 563 | assertEquals("EndPos (9)", 10, kr.getMatch(9).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 564 | }; |
| 565 | |
| 566 | |
| 567 | @Test |
| 568 | public void indexExample2d () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 569 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 570 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 571 | // 2, 6, 9, 12, 7 |
| 572 | // <a><a><a>h</a>hij</a>hij</a>h<a>h</h> |
| 573 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 574 | fd.addTV("base", "h h i j h i j h i j h h ", |
| 575 | "[(0-3)s:h|_0#0-3|<>:a#0-18$<i>6|<>:a#0-15$<i>4|<>:a#0-27$<i>8]" |
| 576 | + // 1 |
| 577 | "[(3-6)s:h|_1#3-6]" + // 2 |
| 578 | "[(6-9)s:i|_2#6-9]" + // 3 |
| 579 | "[(9-12)s:j|_3#9-12]" + // 4 |
| 580 | "[(12-15)s:h|_4#12-15]" + // 5 |
| 581 | "[(15-18)s:i|_5#15-18]" + // 6 |
| 582 | "[(18-21)s:j|_6#18-21]" + // 7 |
| 583 | "[(21-24)s:h|_7#21-24]" + // 8 |
| 584 | "[(24-27)s:i|_8#24-27]" + // 9 |
| 585 | "[(27-30)s:j|_9#27-30]" + // 10 |
| 586 | "[(30-33)s:h|_10#30-33|<>:a#30-36$<i>12]" + // 11 |
| 587 | "[(33-36)s:h|_11#33-36|<>:a#33-36$<i>12]"); // 12 |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 588 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 589 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 590 | // Save documents |
| 591 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 592 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 593 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 594 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 595 | SpanQuery sq = new SpanElementQuery("base", "a"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 596 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 597 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 598 | |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 599 | assertEquals("totalResults", kr.getTotalResults(), 5); |
| 600 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 601 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 602 | assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos); |
| 603 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 604 | assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 605 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 606 | assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos); |
| 607 | assertEquals("StartPos (3)", 10, kr.getMatch(3).startPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 608 | assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 609 | assertEquals("StartPos (4)", 11, kr.getMatch(4).startPos); |
| 610 | assertEquals("EndPos (4)", 12, kr.getMatch(4).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 611 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 612 | sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 613 | new SpanTermQuery(new Term("base", "s:h"))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 614 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 615 | kr = ki.search(sq, (short) 15); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 616 | |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 617 | assertEquals("totalResults", kr.getTotalResults(), 13); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 618 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 619 | assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos); |
| 620 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 621 | assertEquals("EndPos (1)", 4, kr.getMatch(1).endPos); |
| 622 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 623 | assertEquals("EndPos (2)", 4, kr.getMatch(2).endPos); |
| 624 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 625 | assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 626 | assertEquals("EndPos (3)", 6, kr.getMatch(3).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 627 | assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 628 | assertEquals("EndPos (4)", 6, kr.getMatch(4).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 629 | assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 630 | assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos); |
| 631 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 632 | assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 633 | assertEquals("EndPos (6)", 8, kr.getMatch(6).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 634 | assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 635 | assertEquals("EndPos (7)", 8, kr.getMatch(7).endPos); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 636 | assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 637 | assertEquals("EndPos (8)", 8, kr.getMatch(8).endPos); |
| 638 | assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos); |
| 639 | assertEquals("EndPos (9)", 8, kr.getMatch(9).endPos); |
| 640 | |
| 641 | assertEquals("StartPos (10)", 10, kr.getMatch(10).startPos); |
| 642 | assertEquals("EndPos (10)", 12, kr.getMatch(10).endPos); |
| 643 | assertEquals("StartPos (11)", 10, kr.getMatch(11).startPos); |
| 644 | assertEquals("EndPos (11)", 12, kr.getMatch(11).endPos); |
| 645 | |
| 646 | assertEquals("StartPos (12)", 11, kr.getMatch(12).startPos); |
| 647 | assertEquals("EndPos (12)", 12, kr.getMatch(12).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 648 | }; |
| 649 | |
| 650 | |
| 651 | @Test |
| 652 | public void indexExample3 () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 653 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 654 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 655 | // <a><a><a>u</a></a></a> |
| 656 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 657 | fd.addTV("base", "xyz", |
| 658 | "[(0-3)s:xyz|<>:a#0-3$<i>0|<>:a#0-3$<i>0|<>:a#0-3$<i>0|<>:b#0-3$<i>0]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 659 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 660 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 661 | // <a><b>x<a>y<a>zcde</a>cde</a>cde</b></a> |
| 662 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 663 | fd.addTV("base", "x y z c d e c d e c d e ", |
| 664 | "[(0-3)s:x|<>:a#0-36$<i>12|<>:b#0-36$<i>12]" |
| 665 | + "[(3-6)s:y|<>:a#3-27$<i>9]" |
| 666 | + "[(6-9)s:z|<>:a#6-18$<i>6]" + "[(9-12)s:c]" |
| 667 | + "[(12-15)s:d]" + "[(15-18)s:e]" + "[(18-21)s:c]" |
| 668 | + "[(21-24)s:d]" + "[(24-27)s:e]" + "[(27-30)s:c]" |
| 669 | + "[(30-33)s:d]" + "[(33-36)s:e]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 670 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 671 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 672 | // xyz |
| 673 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 674 | fd.addTV("base", "x y z ", "[(0-3)s:x]" + "[(3-6)s:y]" |
| 675 | + "[(6-9)s:z]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 676 | ki.addDoc(fd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 677 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 678 | // <a>x<a><b>y<a>zcde</a>cde</b></a>cde</a> |
| 679 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 680 | fd.addTV("base", "x y z k l m k l m k l m ", |
| 681 | "[(0-3)s:x|<>:a#0-3$<i>12]" |
| 682 | + "[(3-6)s:y|<>:a#3-6$<i>9|<>:b#3-6$<i>9]" |
| 683 | + "[(6-9)s:z|<>:a#6-9$<i>6]" + "[(9-12)s:k]" |
| 684 | + "[(12-15)s:l]" + "[(15-18)s:m]" + "[(18-21)s:k]" |
| 685 | + "[(21-24)s:l]" + "[(24-27)s:m]" + "[(27-30)s:k]" |
| 686 | + "[(30-33)s:l]" + "[(33-36)s:m]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 687 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 688 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 689 | // <a><a><a>h</a>hhij</a>hij</a>hij</a> |
| 690 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 691 | fd.addTV("base", "h i j h i j h i j ", |
| 692 | "[(0-3)s:h|<>:a#0-27$<i>6|<>:a#0-18$<i>3|<>:a#0-36$<i>9]" |
| 693 | + "[(3-6)s:h]" + "[(12-15)s:i]" + "[(15-18)s:j]" |
| 694 | + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" |
| 695 | + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 696 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 697 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 698 | // xyz |
| 699 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 700 | fd.addTV("base", "a b c ", "[(0-3)s:a]" + "[(3-6)s:b]" |
| 701 | + "[(6-9)s:c]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 702 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 703 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 704 | // Save documents |
| 705 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 706 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 707 | assertEquals(6, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 708 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 709 | SpanQuery sq = new SpanElementQuery("base", "a"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 710 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 711 | Result kr = ki.search(sq, (short) 15); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 712 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 713 | assertEquals("totalResults", kr.getTotalResults(), 12); |
| 714 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 715 | assertEquals("EndPos (0)", 0, kr.getMatch(0).endPos); |
| 716 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 717 | assertEquals("EndPos (1)", 0, kr.getMatch(1).endPos); |
| 718 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| 719 | assertEquals("EndPos (2)", 0, kr.getMatch(2).endPos); |
| 720 | assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos); |
| 721 | assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos); |
| 722 | assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos); |
| 723 | assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos); |
| 724 | assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos); |
| 725 | assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 726 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 727 | assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos); |
| 728 | assertEquals("EndPos (6)", 12, kr.getMatch(6).endPos); |
| 729 | assertEquals("StartPos (7)", 1, kr.getMatch(7).startPos); |
| 730 | assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos); |
| 731 | assertEquals("StartPos (8)", 2, kr.getMatch(8).startPos); |
| 732 | assertEquals("EndPos (8)", 6, kr.getMatch(8).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 733 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 734 | assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos); |
| 735 | assertEquals("EndPos (9)", 3, kr.getMatch(9).endPos); |
| 736 | assertEquals("StartPos (10)", 0, kr.getMatch(10).startPos); |
| 737 | assertEquals("EndPos (10)", 6, kr.getMatch(10).endPos); |
| 738 | assertEquals("StartPos (11)", 0, kr.getMatch(11).startPos); |
| 739 | assertEquals("EndPos (11)", 9, kr.getMatch(11).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 740 | }; |
| 741 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 742 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 743 | @Test |
| 744 | public void indexExample3Offsets () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 745 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 746 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 747 | // Er schrie: <s>"Das war ich!"</s> |
| 748 | FieldDocument fd = new FieldDocument(); |
| 749 | fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 750 | fd.addTV("base", "Er schrie: \"Das war ich!\" und ging.", |
| 751 | "[(0-2)s:Er|_0#0-3]" + "[(3-9)s:schrie|_1#3-9]" |
| 752 | + "[(12-15)s:Das|_2#12-15|<>:sentence#11-25$<i>5]" |
| 753 | + "[(16-19)s:war|_3#16-19]" + "[(20-23)s:ich|_4#20-23]" |
| 754 | + "[(26-29)s:und|_5#26-29]" |
| 755 | + "[(30-34)s:ging|_6#30-34]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 756 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 757 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 758 | // Save documents |
| 759 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 760 | |
| 761 | SpanQuery sq = new SpanClassQuery(new SpanElementQuery("base", |
| 762 | "sentence"), (byte) 3); |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 763 | Result kr; |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 764 | kr = ki.search(sq, 0, (short) 15, true, (short) 1, true, (short) 1); |
| 765 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 766 | |
| 767 | assertEquals("... schrie: [\"{3:Das war ich}!\"] und ...", |
| 768 | kr.getMatch(0).getSnippetBrackets()); |
| 769 | assertEquals( |
| 770 | "<span class=\"context-left\"><span class=\"more\"></span>schrie: </span><mark>"<mark class=\"class-3 level-0\">Das war ich</mark>!"</mark><span class=\"context-right\"> und<span class=\"more\"></span></span>", |
| 771 | kr.getMatch(0).getSnippetHTML()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 772 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 773 | kr = ki.search(sq, 0, (short) 15, true, (short) 0, true, (short) 0); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 774 | assertEquals("... [\"{3:Das war ich}!\"] ...", kr.getMatch(0) |
| 775 | .getSnippetBrackets()); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 776 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 777 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 778 | kr = ki.search(sq, 0, (short) 15, true, (short) 6, true, (short) 6); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 779 | assertEquals("Er schrie: [\"{3:Das war ich}!\"] und ging.", kr |
| 780 | .getMatch(0).getSnippetBrackets()); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 781 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 782 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 783 | kr = ki.search(sq, 0, (short) 15, true, (short) 2, true, (short) 2); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 784 | assertEquals("Er schrie: [\"{3:Das war ich}!\"] und ging ...", kr |
| 785 | .getMatch(0).getSnippetBrackets()); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 786 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 787 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 788 | sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery( |
| 789 | "base", "sentence"), new SpanClassQuery(new SpanTermQuery( |
| 790 | new Term("base", "s:Das")), (byte) 2)), (byte) 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 791 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 792 | kr = ki.search(sq, (short) 15); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 793 | assertEquals("Er schrie: [\"{1:{2:Das} war ich}!\"] und ging.", kr |
| 794 | .getMatch(0).getSnippetBrackets()); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 795 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 796 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 797 | sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery( |
| 798 | "base", "sentence"), new SpanClassQuery(new SpanTermQuery( |
| 799 | new Term("base", "s:war")), (byte) 2)), (byte) 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 800 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 801 | kr = ki.search(sq, (short) 15); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 802 | assertEquals("Er schrie: [\"{1:Das {2:war} ich}!\"] und ging.", kr |
| 803 | .getMatch(0).getSnippetBrackets()); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 804 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 805 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 806 | sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery( |
| 807 | "base", "sentence"), new SpanClassQuery(new SpanTermQuery( |
| 808 | new Term("base", "s:ich")), (byte) 2)), (byte) 1); |
| 809 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 810 | kr = ki.search(sq, (short) 15); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 811 | assertEquals("Er schrie: [\"{1:Das war {2:ich}}!\"] und ging.", kr |
| 812 | .getMatch(0).getSnippetBrackets()); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 813 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 814 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 815 | sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery( |
| 816 | "base", "sentence"), new SpanClassQuery(new SpanTermQuery( |
| 817 | new Term("base", "s:und")), (byte) 2)), (byte) 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 818 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 819 | kr = ki.search(sq, (short) 15); |
| 820 | assertEquals("totalResults", kr.getTotalResults(), 0); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 821 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 822 | sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery( |
| 823 | "base", "sentence"), new SpanClassQuery(new SpanTermQuery( |
| 824 | new Term("base", "s:schrie")), (byte) 2)), (byte) 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 825 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 826 | kr = ki.search(sq, (short) 15); |
| 827 | assertEquals("totalResults", kr.getTotalResults(), 0); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 828 | }; |
| 829 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 830 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 831 | @Test |
| 832 | public void indexExample4 () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 833 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 834 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 835 | // Case 1, 6, 7, 13 |
| 836 | // xy<a><a>x</a>b<a>c</a></a>x |
| 837 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 838 | fd.addTV("base", "x y x b c x ", "[(0-3)s:x|_0#0-3]" |
| 839 | + "[(3-6)s:y|_1#3-6]" |
| 840 | + "[(6-9)s:x|_2#6-9|<>:a#6-15$<i>5|<>:a#6-9$<i>3]" |
| 841 | + "[(9-12)s:b|_3#9-12]" |
| 842 | + "[(12-15)s:c|_4#12-15|<>:a#12-15$<i>5]" |
| 843 | + "[(15-18)s:x|_5#15-18]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 844 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 845 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 846 | // Save documents |
| 847 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 848 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 849 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 850 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 851 | SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 852 | new SpanTermQuery(new Term("base", "s:x"))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 853 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 854 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 855 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 856 | assertEquals("totalResults", kr.getTotalResults(), 2); |
| 857 | assertEquals("StartPos (0)", 2, kr.getMatch(0).startPos); |
| 858 | assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos); |
| 859 | assertEquals("StartPos (1)", 2, kr.getMatch(1).startPos); |
| 860 | assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 861 | }; |
| 862 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 863 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 864 | @Test |
| 865 | public void indexExample5 () throws IOException { |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 866 | // 1,2,3,6,9,10,12 |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 867 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 868 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 869 | // hij<a>hi<a>h<a>ij</a></a>hi</a> |
| 870 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 871 | fd.addTV("base", "hijhihijhi", |
| 872 | "[(0-1)s:h|i:h|_0#0-1|-:a$<i>3|-:t$<i>10]" |
| 873 | + "[(1-2)s:i|i:i|_1#1-2]" + "[(2-3)s:j|i:j|_2#2-3]" |
| 874 | + "[(3-4)s:h|i:h|_3#3-4|<>:a#3-10$<i>10]" |
| 875 | + "[(4-5)s:i|i:i|_4#4-5]" |
| 876 | + "[(5-6)s:h|i:h|_5#5-6|<>:a#5-8$<i>8]" |
| 877 | + "[(6-7)s:i|i:i|_6#6-7|<>:a#6-8$<i>8]" |
| 878 | + "[(7-8)s:j|i:j|_7#7-8]" + "[(8-9)s:h|i:h|_8#8-9]" |
| 879 | + "[(9-10)s:i|i:i|_9#9-10]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 880 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 881 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 882 | // Save documents |
| 883 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 884 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 885 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 886 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 887 | SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 888 | new SpanNextQuery(new SpanTermQuery(new Term("base", "s:h")), |
| 889 | new SpanTermQuery(new Term("base", "s:i")))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 890 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 891 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 892 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 893 | assertEquals("totalResults", kr.getTotalResults(), 4); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 894 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 895 | assertEquals("StartPos (0)", 3, kr.getMatch(0).startPos); |
| 896 | assertEquals("EndPos (0)", 10, kr.getMatch(0).endPos); |
| 897 | assertEquals("StartPos (1)", 3, kr.getMatch(1).startPos); |
| 898 | assertEquals("EndPos (1)", 10, kr.getMatch(1).endPos); |
| 899 | assertEquals("StartPos (2)", 3, kr.getMatch(2).startPos); |
| 900 | assertEquals("EndPos (2)", 10, kr.getMatch(2).endPos); |
| 901 | assertEquals("StartPos (3)", 5, kr.getMatch(3).startPos); |
| 902 | assertEquals("EndPos (3)", 8, kr.getMatch(3).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 903 | }; |
| 904 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 905 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 906 | @Test |
| 907 | public void indexExample6 () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 908 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 909 | // 2,5,8,12,13 |
| 910 | // h<a><a>i</a>j</a><a>h</a>i j<a>h i</a>j |
| 911 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 912 | fd.addTV("base", "hijhi jh ij", |
| 913 | "[(0-1)s:h|i:h|_0#0-1|-:a$<i>4|-:t$<i>9]" |
| 914 | + "[(1-2)s:i|i:i|_1#1-2|<>:a#1-2$<i>2|<>:a#1-3$<i>3]" |
| 915 | + "[(2-3)s:j|i:j|_2#2-3]" |
| 916 | + "[(3-4)s:h|i:h|_3#3-4|<>:a#3-4$<i>4]" |
| 917 | + "[(4-5)s:i|i:i|_4#4-5]" + "[(6-7)s:j|i:j|_5#6-7]" |
| 918 | + "[(7-8)s:h|i:h|_6#7-8|<>:a#7-10$<i>8]" |
| 919 | + "[(9-10)s:i|i:i|_7#9-10]" |
| 920 | + "[(10-11)s:j|i:j|_8#10-11]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 921 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 922 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 923 | // Save documents |
| 924 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 925 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 926 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 927 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 928 | SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 929 | new SpanNextQuery(new SpanTermQuery(new Term("base", "s:h")), |
| 930 | new SpanNextQuery(new SpanTermQuery(new Term("base", |
| 931 | "s:i")), new SpanTermQuery(new Term("base", |
| 932 | "s:j"))))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 933 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 934 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 935 | assertEquals("totalResults", kr.getTotalResults(), 0); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 936 | }; |
| 937 | |
| 938 | |
| 939 | @Test |
| 940 | public void indexExample7 () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 941 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 942 | // 4,5,11,13 |
| 943 | // x<a>x h</a>i j h<a>i j</a> |
| 944 | FieldDocument fd = new FieldDocument(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 945 | fd.addTV("base", "xx hi j hi j", |
| 946 | "[(0-1)s:x|i:x|_0#0-1|-:a$<i>2|-:t$<i>8]" |
| 947 | + "[(1-2)s:x|i:x|_1#1-2|<>:a#1-4$<i>3]" |
| 948 | + "[(3-4)s:h|i:h|_2#3-4]" + "[(4-5)s:i|i:i|_3#4-5]" |
| 949 | + "[(6-7)s:j|i:j|_4#6-7]" + "[(8-9)s:h|i:h|_5#8-9]" |
| 950 | + "[(9-10)s:i|i:i|_6#9-10|<>:a#9-12$<i>8]" |
| 951 | + "[(11-12)s:j|i:j|_7#11-12]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 952 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 953 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 954 | // Save documents |
| 955 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 956 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 957 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 958 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 959 | SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), |
| 960 | new SpanNextQuery(new SpanTermQuery(new Term("base", "s:h")), |
| 961 | new SpanNextQuery(new SpanTermQuery(new Term("base", |
| 962 | "s:i")), new SpanTermQuery(new Term("base", |
| 963 | "s:j"))))); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 964 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 965 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 966 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 967 | assertEquals("totalResults", kr.getTotalResults(), 0); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 968 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 969 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 970 | |
| 971 | /** SpanElementQueries */ |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 972 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 973 | public void indexExample8 () throws IOException { |
| 974 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 975 | FieldDocument fd = new FieldDocument(); |
| 976 | // <a>xx <e>hi j <e>hi j</e></e></a> |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 977 | fd.addTV("base", "xx hi j hi j", "[(0-1)s:x|i:x|_0#0-1|<>:a#1-12$<i>8]" |
| 978 | + "[(1-2)s:x|i:x|_1#1-2]" |
| 979 | + "[(3-4)s:h|i:h|_2#3-4|<>:e#3-12$<i>8]" |
| 980 | + "[(4-5)s:i|i:i|_3#4-5]" + "[(6-7)s:j|i:j|_4#6-7]" |
| 981 | + "[(8-9)s:h|i:h|_5#8-9|<>:e#8-9$<i>8]" |
| 982 | + "[(9-10)s:i|i:i|_6#9-10]" + "[(11-12)s:j|i:j|_7#11-12]"); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 983 | ki.addDoc(fd); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 984 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 985 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 986 | |
| 987 | // contains(<s>, (es wird | wird es)) |
| Nils Diewald | 7d32064 | 2014-11-12 17:39:42 +0000 | [diff] [blame] | 988 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 989 | public void queryJSONpoly2 () throws QueryException, IOException { |
| 990 | String jsonPath = getClass().getResource("/queries/poly2.json") |
| 991 | .getFile(); |
| 992 | String jsonPQuery = readFile(jsonPath); |
| Nils Diewald | 0339d46 | 2015-02-26 14:53:56 +0000 | [diff] [blame] | 993 | SpanQueryWrapper sqwi = new KrillQuery("tokens").fromJson(jsonPQuery); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 994 | |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 995 | SpanWithinQuery sq = (SpanWithinQuery) sqwi.toQuery(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 996 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 997 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | 886d321 | 2014-11-14 01:27:23 +0000 | [diff] [blame] | 998 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 999 | ki.addDoc(getClass().getResourceAsStream("/wiki/DDD-08370.json.gz"), |
| 1000 | true); |
| 1001 | ki.addDoc(getClass().getResourceAsStream("/wiki/PPP-02924.json.gz"), |
| 1002 | true); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 1003 | |
| 1004 | ki.commit(); |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 1005 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 1006 | assertEquals(2, kr.getTotalResults()); |
| 1007 | assertEquals(0, kr.getMatch(0).getLocalDocID()); |
| 1008 | assertEquals(76, kr.getMatch(0).getStartPos()); |
| 1009 | assertEquals(93, kr.getMatch(0).getEndPos()); |
| 1010 | assertEquals(1, kr.getMatch(1).getLocalDocID()); |
| 1011 | assertEquals(237, kr.getMatch(1).getStartPos()); |
| 1012 | assertEquals(252, kr.getMatch(1).getEndPos()); |
| 1013 | |
| 1014 | /* |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1015 | for (Match km : kr.getMatches()){ |
| 1016 | System.out.println(km.getStartPos() +","+km.getEndPos()+" " |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 1017 | +km.getSnippetBrackets()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1018 | }; |
| Nils Diewald | e7a820b | 2015-02-12 21:34:50 +0000 | [diff] [blame] | 1019 | */ |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 1020 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1021 | |
| 1022 | |
| 1023 | private String readFile (String path) { |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 1024 | StringBuilder sb = new StringBuilder(); |
| 1025 | try { |
| 1026 | BufferedReader in = new BufferedReader(new FileReader(path)); |
| 1027 | String str; |
| 1028 | while ((str = in.readLine()) != null) { |
| 1029 | sb.append(str); |
| 1030 | }; |
| 1031 | in.close(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1032 | } |
| 1033 | catch (IOException e) { |
| Nils Diewald | 83c9b16 | 2015-02-03 21:05:07 +0000 | [diff] [blame] | 1034 | fail(e.getMessage()); |
| 1035 | } |
| 1036 | return sb.toString(); |
| Nils Diewald | 11e9186 | 2014-11-12 16:29:18 +0000 | [diff] [blame] | 1037 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1038 | }; |