| Eliza Margaretha | 45b5be1 | 2014-02-04 11:22:46 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 3 | import static org.junit.Assert.assertEquals; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 4 | |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 5 | import java.io.IOException; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 6 | |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 7 | import org.apache.lucene.search.spans.SpanQuery; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 8 | import org.junit.Test; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 9 | import org.junit.runner.RunWith; |
| 10 | import org.junit.runners.JUnit4; |
| 11 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 12 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 13 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 14 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 15 | |
| 16 | |
| 17 | @RunWith(JUnit4.class) |
| 18 | public class TestElementIndex { |
| 19 | |
| 20 | // Todo: primary data as a non-indexed field separated. |
| 21 | |
| 22 | @Test |
| 23 | public void indexExample1 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 24 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 25 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 26 | // <a>x<a>y<a>zhij</a>hij</a>hij</a>hij</a> |
| 27 | FieldDocument fd = new FieldDocument(); |
| 28 | fd.addTV("base", "x y z h i j h i j h i j ", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 29 | "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" |
| 30 | + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" |
| 31 | + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6]" |
| Akron | 4299355 | 2016-02-04 13:24:24 +0100 | [diff] [blame] | 32 | + "[(9-12)s:h<b>0]" + "[(12-15)s:i]" + "[(15-18)s:j]" |
| 33 | + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" |
| 34 | + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 35 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 36 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 37 | // <a>x<a>y<a>zcde</a>cde</a>cde</a>cde</a> |
| 38 | fd = new FieldDocument(); |
| 39 | fd.addTV("base", "x y z c d e c d e c d e ", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 40 | "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" |
| 41 | + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" |
| 42 | + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6]" |
| Akron | 4299355 | 2016-02-04 13:24:24 +0100 | [diff] [blame] | 43 | + "[(9-12)s:c<b>0]" + "[(12-15)s:d]" + "[(15-18)s:e]" |
| 44 | + "[(18-21)s:c]" + "[(21-24)s:d]" + "[(24-27)s:e]" |
| 45 | + "[(27-30)s:c]" + "[(30-33)s:d]" + "[(33-36)s:e]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 46 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 47 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 48 | // Save documents |
| 49 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 50 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 51 | assertEquals(2, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 52 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 53 | SpanQuery sq = new SpanElementQuery("base", "a"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 54 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 55 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 56 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 57 | assertEquals("totalResults", kr.getTotalResults(), 6); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 58 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 59 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 60 | assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos); |
| 61 | assertEquals("StartPos (1)", 1, kr.getMatch(1).startPos); |
| 62 | assertEquals("EndPos (1)", 9, kr.getMatch(1).endPos); |
| 63 | assertEquals("StartPos (2)", 2, kr.getMatch(2).startPos); |
| 64 | assertEquals("EndPos (2)", 6, kr.getMatch(2).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 65 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 66 | assertEquals("StartPos (0)", 0, kr.getMatch(3).startPos); |
| 67 | assertEquals("EndPos (0)", 12, kr.getMatch(3).endPos); |
| 68 | assertEquals("StartPos (1)", 1, kr.getMatch(4).startPos); |
| 69 | assertEquals("EndPos (1)", 9, kr.getMatch(4).endPos); |
| 70 | assertEquals("StartPos (2)", 2, kr.getMatch(5).startPos); |
| 71 | assertEquals("EndPos (2)", 6, kr.getMatch(5).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 72 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 73 | // System.err.println(kr.toJSON()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 74 | }; |
| 75 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 76 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 77 | @Test |
| 78 | public void indexExample2 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 79 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 80 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 81 | // <a><a><a>h</a>hhij</a>hij</a>hij</a> |
| 82 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 83 | fd.addTV("base", "h h i j h i j h i j ", |
| 84 | "[(0-3)s:h|" + "<>:a$<b>64<i>0<i>18<i>3<b>0|" |
| 85 | + "<>:a$<b>64<i>0<i>27<i>6<b>0|" |
| 86 | + "<>:a$<b>64<i>0<i>36<i>9]" + "[(3-6)s:h]" |
| 87 | + "[(12-15)s:i<b>0]" + "[(15-18)s:j]" + "[(18-21)s:h]" |
| 88 | + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" |
| 89 | + "[(30-33)s:i]" + "[(33-36)s:j]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 90 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 91 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 92 | // Save documents |
| 93 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 94 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 95 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 96 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 97 | SpanQuery sq = new SpanElementQuery("base", "a"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 98 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 99 | Result kr = ki.search(sq, (short) 10); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 100 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 101 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 102 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 103 | assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos); |
| 104 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 105 | assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos); |
| 106 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| 107 | assertEquals("EndPos (2)", 9, kr.getMatch(2).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 108 | }; |
| 109 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 110 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 111 | @Test |
| 112 | public void indexExample3 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 113 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 114 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 115 | // <a><a><a>u</a></a></a> |
| 116 | FieldDocument fd = new FieldDocument(); |
| Akron | 4299355 | 2016-02-04 13:24:24 +0100 | [diff] [blame] | 117 | fd.addTV("base", "xyz", "[(0-3)s:xyz|<>:a$<b>64<i>0<i>3<i>0<b>0|" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 118 | + "<>:a$<b>64<i>0<i>3<i>0<b>0|" |
| 119 | + "<>:a$<b>64<i>0<i>3<i>0<b>0|<>:b$<b>64<i>0<i>3<i>0<b>0]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 120 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 121 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 122 | // <a><b>x<a>y<a>zcde</a>cde</a>cde</b></a> |
| 123 | fd = new FieldDocument(); |
| 124 | fd.addTV("base", "x y z c d e c d e c d e ", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 125 | "[(0-3)s:x|<>:a$<b>64<i>0<i>36<i>12<b>0|<>:b$<b>64<i>0<i>36<i>12<b>0]" |
| 126 | + "[(3-6)s:y|<>:a$<b>64<i>3<i>27<i>9<b>0]" |
| 127 | + "[(6-9)s:z|<>:a$<b>64<i>6<i>18<i>6]" |
| Akron | 4299355 | 2016-02-04 13:24:24 +0100 | [diff] [blame] | 128 | + "[(9-12)s:c<b>0]" + "[(12-15)s:d]" + "[(15-18)s:e]" |
| 129 | + "[(18-21)s:c]" + "[(21-24)s:d]" + "[(24-27)s:e]" |
| 130 | + "[(27-30)s:c]" + "[(30-33)s:d]" + "[(33-36)s:e]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 131 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 132 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 133 | // xyz |
| 134 | fd = new FieldDocument(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 135 | fd.addTV("base", "x y z ", |
| 136 | "[(0-3)s:x]" + "[(3-6)s:y]" + "[(6-9)s:z]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 137 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 138 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 139 | // <a>x<a><b>y<a>zcde</a>cde</b></a>cde</a> |
| 140 | fd = new FieldDocument(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 141 | fd.addTV("base", "x y z k l m k l m k l m ", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 142 | "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" |
| 143 | + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0|<>:b$<b>64<i>3<i>6<i>9<b>0]" |
| 144 | + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6<b>0]" |
| Akron | 4299355 | 2016-02-04 13:24:24 +0100 | [diff] [blame] | 145 | + "[(9-12)s:k<b>0]" + "[(12-15)s:l]" + "[(15-18)s:m]" |
| 146 | + "[(18-21)s:k]" + "[(21-24)s:l]" + "[(24-27)s:m]" |
| 147 | + "[(27-30)s:k]" + "[(30-33)s:l]" + "[(33-36)s:m]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 148 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 149 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 150 | // <a><a><a>h</a>hhij</a>hij</a>hij</a> |
| 151 | fd = new FieldDocument(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 152 | fd.addTV("base", "h h i j h i j h i j ", |
| 153 | "[(0-3)s:h|" + "<>:a$<b>64<i>0<i>18<i>3<b>0|" |
| 154 | + "<>:a$<b>64<i>0<i>27<i>6<b>0|" |
| 155 | + "<>:a$<b>64<i>0<i>36<i>9<b>0]" + "[(3-6)s:h]" |
| 156 | + "[(12-15)s:i]" + "[(15-18)s:j]" + "[(18-21)s:h]" |
| 157 | + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" |
| 158 | + "[(30-33)s:i]" + "[(33-36)s:j]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 159 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 160 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 161 | // xyz |
| 162 | fd = new FieldDocument(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 163 | fd.addTV("base", "a b c ", |
| 164 | "[(0-3)s:a]" + "[(3-6)s:b]" + "[(6-9)s:c]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 165 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 166 | |
| 167 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 168 | // Save documents |
| 169 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 170 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 171 | assertEquals(6, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 172 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 173 | SpanQuery sq = new SpanElementQuery("base", "a"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 174 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 175 | Result kr = ki.search(sq, (short) 15); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 176 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 177 | // System.err.println(kr.toJSON()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 178 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 179 | assertEquals("totalResults", kr.getTotalResults(), 12); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 180 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 181 | assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos); |
| 182 | assertEquals("EndPos (0)", 0, kr.getMatch(0).endPos); |
| 183 | assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos); |
| 184 | assertEquals("EndPos (1)", 0, kr.getMatch(1).endPos); |
| 185 | assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos); |
| 186 | assertEquals("EndPos (2)", 0, kr.getMatch(2).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 187 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 188 | assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos); |
| 189 | assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos); |
| 190 | assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos); |
| 191 | assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos); |
| 192 | assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos); |
| 193 | assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 194 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 195 | assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos); |
| 196 | assertEquals("EndPos (6)", 12, kr.getMatch(6).endPos); |
| 197 | assertEquals("StartPos (7)", 1, kr.getMatch(7).startPos); |
| 198 | assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos); |
| 199 | assertEquals("StartPos (8)", 2, kr.getMatch(8).startPos); |
| 200 | assertEquals("EndPos (8)", 6, kr.getMatch(8).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 201 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 202 | assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos); |
| 203 | assertEquals("EndPos (9)", 3, kr.getMatch(9).endPos); |
| 204 | assertEquals("StartPos (10)", 0, kr.getMatch(10).startPos); |
| 205 | assertEquals("EndPos (10)", 6, kr.getMatch(10).endPos); |
| 206 | assertEquals("StartPos (11)", 0, kr.getMatch(11).startPos); |
| 207 | assertEquals("EndPos (11)", 9, kr.getMatch(11).endPos); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 208 | }; |
| 209 | |
| 210 | |
| 211 | @Test |
| 212 | public void indexExample4 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 213 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 214 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 215 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 216 | fd.addTV("base", "111111ccc222222fff333333iiijjj", |
| 217 | "[(0-3)s:a|_0$<i>0<i>3]" + "[(3-6)s:b|_1$<i>3<i>6]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 218 | + "[(6-9)s:c|_2$<i>6<i>9]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 219 | + "[(9-12)s:d|_3$<i>9<i>12|<>:a$<b>64<i>9<i>15<i>4<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 220 | + "[(12-15)s:e|_4$<i>12<i>15]" |
| 221 | + "[(15-18)s:f|_5$<i>15<i>18]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 222 | + "[(18-21)s:g|_6$<i>18<i>21|<>:a$<b>64<i>18<i>24<i>8<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 223 | + "[(21-24)s:h|_7$<i>21<i>24]" |
| 224 | + "[(24-27)s:i|_8$<i>24<i>27]" |
| 225 | + "[(27-30)s:j|_9$<i>27<i>30]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 226 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 227 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 228 | // Save documents |
| 229 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 230 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 231 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 232 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 233 | SpanQuery sq = new SpanElementQuery("base", "a"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 234 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 235 | Result kr = ki.search(sq, 0, (short) 15, false, (short) 3, false, |
| 236 | (short) 3); |
| 237 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 238 | assertEquals("... ccc[[222222]]fff ...", |
| 239 | kr.getMatch(0).getSnippetBrackets()); |
| 240 | assertEquals("... fff[[333333]]iii ...", |
| 241 | kr.getMatch(1).getSnippetBrackets()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 242 | }; |
| 243 | |
| 244 | |
| 245 | @Test |
| 246 | public void indexExample5 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 247 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 248 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 249 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 250 | fd.addTV("base", "111111ccc222222fff333333iiijjj", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 251 | "[(0-3)s:a|_0$<i>0<i>3|<>:a$<b>64<i>0<i>6<i>1<b>0]" |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 252 | + "[(3-6)s:b|_1$<i>3<i>6]" + "[(6-9)s:c|_2$<i>6<i>9]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 253 | + "[(9-12)s:d|_3$<i>9<i>12|<>:a$<b>64<i>9<i>15<i>4<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 254 | + "[(12-15)s:e|_4$<i>12<i>15]" |
| 255 | + "[(15-18)s:f|_5$<i>15<i>18]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 256 | + "[(18-21)s:g|_6$<i>18<i>21|<>:a$<b>64<i>18<i>24<i>8<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 257 | + "[(21-24)s:h|_7$<i>21<i>24]" |
| 258 | + "[(24-27)s:i|_8$<i>24<i>27]" |
| 259 | + "[(27-30)s:j|_9$<i>27<i>30]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 260 | ki.addDoc(fd); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 261 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 262 | // Save documents |
| 263 | ki.commit(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 264 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 265 | assertEquals(1, ki.numberOf("documents")); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 266 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 267 | SpanQuery sq = new SpanElementQuery("base", "a"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 268 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 269 | Result kr = ki.search(sq, 0, (short) 15, false, (short) 3, false, |
| 270 | (short) 3); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 271 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 272 | assertEquals("[[111111]]ccc ...", kr.getMatch(0).getSnippetBrackets()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 273 | assertEquals("... ccc[[222222]]fff ...", |
| 274 | kr.getMatch(1).getSnippetBrackets()); |
| 275 | assertEquals("... fff[[333333]]iii ...", |
| 276 | kr.getMatch(2).getSnippetBrackets()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 277 | }; |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 278 | |
| 279 | |
| 280 | @Test |
| 281 | public void indexExample6 () throws IOException { |
| 282 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 283 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 284 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 285 | // <a>x<a>y<a>zhij</a>hij</a>hij</a> |
| 286 | FieldDocument fd = new FieldDocument(); |
| 287 | fd.addTV("base", "x y z h i j h i j h i j ", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 288 | "[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 1 |
| 289 | "[(3-6)s:y|_1$<i>3<i>6|<>:a$<b>64<i>3<i>27<i>9<b>0]" + // 2 |
| 290 | "[(6-9)s:z|_2$<i>6<i>9|<>:a$<b>64<i>6<i>18<i>6<b>0]" + // 3 |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 291 | "[(9-12)s:h|_3$<i>9<i>12]" + // 4 |
| 292 | "[(12-15)s:i|_4$<i>12<i>15]" + // 5 |
| 293 | "[(15-18)s:j|_5$<i>15<i>18]" + // 6 |
| 294 | "[(18-21)s:h|_6$<i>18<i>21]" + // 7 |
| 295 | "[(21-24)s:i|_7$<i>21<i>24]" + // 8 |
| 296 | "[(24-27)s:j|_8$<i>24<i>27]" + // 9 |
| 297 | "[(27-30)s:h|_9$<i>27<i>30]" + // 10 |
| 298 | "[(30-33)s:i|_10$<i>30<i>33]" + // 11 |
| 299 | "[(33-36)s:j|_11$<i>33<i>36]"); // 12 |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 300 | ki.addDoc(fd); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 301 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 302 | fd = new FieldDocument(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 303 | fd.addTV("base", "x y z h ", |
| 304 | "[(0-3)s:x|_0$<i>0<i>3]" + // 1 |
| 305 | "[(3-6)s:y|_1$<i>3<i>6]" + // 2 |
| 306 | "[(6-9)s:z|_2$<i>6<i>9]" + // 3 |
| 307 | "[(9-12)s:h|_3$<i>9<i>12]"); // 4 |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 308 | ki.addDoc(fd); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 309 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 310 | // Here is a larger offset than expected |
| 311 | fd = new FieldDocument(); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 312 | fd.addTV("base", "x y z h ", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 313 | "[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 1 |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 314 | "[(3-6)s:y|_1$<i>3<i>6]" + // 2 |
| 315 | "[(6-9)s:z|_2$<i>6<i>9]" + // 3 |
| 316 | "[(9-12)s:h|_3$<i>9<i>12]"); // 4 |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 317 | ki.addDoc(fd); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 318 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 319 | // <a>x<a>y<a>zabc</a>abc</a>abc</a> |
| 320 | fd = new FieldDocument(); |
| 321 | fd.addTV("base", "x y z a b c a b c a b c ", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 322 | "[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 1 |
| 323 | "[(3-6)s:y|_1$<i>3<i>6|<>:a$<b>64<i>3<i>27<i>9<b>0]" + // 2 |
| 324 | "[(6-9)s:z|_2$<i>6<i>9|<>:a$<b>64<i>6<i>18<i>6<b>0]" + // 3 |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 325 | "[(9-12)s:a|_3$<i>9<i>12]" + // 4 |
| 326 | "[(12-15)s:b|_4$<i>12<i>15]" + // 5 |
| 327 | "[(15-18)s:c|_5$<i>15<i>18]" + // 6 |
| 328 | "[(18-21)s:a|_6$<i>18<i>21]" + // 7 |
| 329 | "[(21-24)s:b|_7$<i>21<i>24]" + // 8 |
| 330 | "[(24-27)s:c|_8$<i>24<i>27]" + // 9 |
| 331 | "[(27-30)s:a|_9$<i>27<i>30]" + // 10 |
| 332 | "[(30-33)s:b|_10$<i>30<i>33]" + // 11 |
| 333 | "[(33-36)s:c|_11$<i>33<i>36]"); // 12 |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 334 | ki.addDoc(fd); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 335 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 336 | fd = new FieldDocument(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 337 | fd.addTV("base", "x y z h ", |
| 338 | "[(0-3)s:x|_0$<i>0<i>3]" + // 1 |
| 339 | "[(3-6)s:y|_1$<i>3<i>6]" + // 2 |
| 340 | "[(6-9)s:z|_2$<i>6<i>9]" + // 3 |
| 341 | "[(9-12)s:h|_3$<i>9<i>12]"); // 4 |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 342 | ki.addDoc(fd); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 343 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 344 | // Save documents |
| 345 | ki.commit(); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 346 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 347 | SpanQuery sq; |
| 348 | Result kr; |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 349 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 350 | sq = new SpanElementQuery("base", "a"); |
| 351 | kr = ki.search(sq, (short) 15); |
| 352 | |
| 353 | // System.err.println(kr.toJSON()); |
| 354 | |
| 355 | assertEquals(5, ki.numberOf("documents")); |
| 356 | assertEquals("totalResults", kr.getTotalResults(), 7); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 357 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 358 | }; |