| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanQuery; |
| 9 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 10 | import org.junit.Test; |
| 11 | import org.junit.runner.RunWith; |
| 12 | import org.junit.runners.JUnit4; |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 13 | import org.slf4j.Logger; |
| 14 | import org.slf4j.LoggerFactory; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 15 | |
| 16 | import de.ids_mannheim.korap.KorapIndex; |
| 17 | import de.ids_mannheim.korap.KorapResult; |
| 18 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 19 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| 20 | import de.ids_mannheim.korap.query.SpanSegmentQuery; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 21 | import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper; |
| 22 | import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 23 | |
| 24 | |
| 25 | @RunWith(JUnit4.class) |
| 26 | public class TestSegmentIndex { |
| 27 | private SpanQuery sq; |
| 28 | private KorapIndex ki; |
| 29 | private KorapResult kr; |
| 30 | private FieldDocument fd; |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 31 | private Logger log; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 32 | |
| 33 | public TestSegmentIndex() throws IOException { |
| 34 | ki = new KorapIndex(); |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 35 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 36 | ki.addDoc(createFieldDoc1()); |
| 37 | ki.addDoc(createFieldDoc2()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 38 | ki.commit(); |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 39 | |
| 40 | log = LoggerFactory.getLogger(getClass()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 41 | } |
| 42 | |
| 43 | /** Multiple matches in one document. */ |
| 44 | @Test |
| 45 | public void testCase1() throws IOException { |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 46 | // log.trace("Testcase1"); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 47 | sq = new SpanSegmentQuery( |
| 48 | new SpanTermQuery(new Term("base","s:b")), |
| 49 | new SpanTermQuery(new Term("base","s:c")) |
| 50 | ); |
| 51 | |
| 52 | kr = ki.search(sq, (short) 10); |
| 53 | ki.close(); |
| 54 | |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 55 | assertEquals("totalResults", 3, kr.totalResults()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 56 | assertEquals("StartPos (0)", 1, kr.match(0).startPos); |
| 57 | assertEquals("EndPos (0)", 2, kr.match(0).endPos); |
| 58 | assertEquals("StartPos (1)", 4, kr.match(1).startPos); |
| 59 | assertEquals("EndPos (1)", 5, kr.match(1).endPos); |
| 60 | } |
| 61 | |
| 62 | /** Matches in multiple documents. |
| 63 | * Ensure the same document. The current secondspan is skipped to |
| 64 | * the doc number of the firstspan. */ |
| 65 | @Test |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 66 | public void testCase2() throws IOException { |
| 67 | // log.trace("Testcase2"); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 68 | sq = new SpanSegmentQuery( |
| 69 | new SpanTermQuery(new Term("base","s:a")), |
| 70 | new SpanTermQuery(new Term("base","s:b")) |
| 71 | ); |
| 72 | |
| 73 | kr = ki.search(sq, (short) 10); |
| 74 | ki.close(); |
| 75 | |
| 76 | assertEquals("totalResults", 3, kr.totalResults()); |
| 77 | // Match #0 |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 78 | assertEquals("doc-number", 1, kr.match(0).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 79 | assertEquals("StartPos", 1, kr.match(0).startPos); |
| 80 | assertEquals("EndPos", 2, kr.match(0).endPos); |
| 81 | // Match #2 |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 82 | assertEquals("doc-number", 2, kr.match(2).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 83 | assertEquals("StartPos", 2, kr.match(2).startPos); |
| 84 | assertEquals("EndPos", 3, kr.match(2).endPos); |
| 85 | } |
| 86 | |
| 87 | |
| 88 | /** Ensure the same document, skip to a greater doc number */ |
| 89 | @Test |
| 90 | public void testCase3() throws IOException{ |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 91 | // log.trace("Testcase3"); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 92 | sq = new SpanSegmentQuery( |
| 93 | new SpanTermQuery(new Term("base","s:d")), |
| 94 | new SpanTermQuery(new Term("base","s:b")) |
| 95 | ); |
| 96 | |
| 97 | kr = ki.search(sq, (short) 10); |
| 98 | ki.close(); |
| 99 | |
| 100 | assertEquals("totalResults", 1, kr.totalResults()); |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 101 | assertEquals("doc-number", 2, kr.match(0).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 102 | assertEquals("StartPos (0)", 1, kr.match(0).startPos); |
| 103 | assertEquals("EndPos (0)", 2, kr.match(0).endPos); |
| 104 | } |
| 105 | |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 106 | /** Matching a SpanElementQuery and a SpanNextQuery |
| 107 | * Multiple atomic indices |
| 108 | * */ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 109 | @Test |
| 110 | public void testCase4() throws IOException{ |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 111 | // log.trace("Testcase4"); |
| 112 | |
| 113 | ki = new KorapIndex(); |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 114 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 115 | ki.commit(); |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 116 | ki.addDoc(createFieldDoc1()); |
| 117 | ki.addDoc(createFieldDoc2()); |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 118 | ki.commit(); |
| 119 | |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 120 | sq = new SpanSegmentQuery( |
| 121 | new SpanElementQuery("base","e"), |
| 122 | new SpanNextQuery( |
| 123 | new SpanTermQuery(new Term("base","s:a")), |
| 124 | new SpanTermQuery(new Term("base","s:b")) |
| 125 | ) |
| 126 | ); |
| 127 | |
| 128 | kr = ki.search(sq, (short) 10); |
| 129 | ki.close(); |
| 130 | |
| 131 | assertEquals("totalResults", 2, kr.totalResults()); |
| 132 | // Match #0 |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 133 | assertEquals("doc-number", 0, kr.match(0).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 134 | assertEquals("StartPos", 3, kr.match(0).startPos); |
| 135 | assertEquals("EndPos", 5, kr.match(0).endPos); |
| 136 | // Match #1 |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 137 | assertEquals("doc-number", 0, kr.match(1).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 138 | assertEquals("StartPos", 1, kr.match(1).startPos); |
| 139 | assertEquals("EndPos", 3, kr.match(1).endPos); |
| 140 | } |
| 141 | |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 142 | /** Matching SpanElementQueries */ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 143 | @Test |
| 144 | public void testCase5() throws IOException{ |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 145 | // log.trace("Testcase5"); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 146 | sq = new SpanSegmentQuery( |
| 147 | new SpanElementQuery("base","e"), |
| 148 | new SpanElementQuery("base","e2") |
| 149 | ); |
| 150 | |
| 151 | kr = ki.search(sq, (short) 10); |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 152 | ki.close(); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 153 | |
| 154 | assertEquals("totalResults", 1, kr.totalResults()); |
| 155 | // Match #0 |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 156 | assertEquals("doc-number", 0, kr.match(0).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 157 | assertEquals("StartPos", 3, kr.match(0).startPos); |
| 158 | assertEquals("EndPos", 5, kr.match(0).endPos); |
| 159 | } |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 160 | |
| 161 | /** Skip to SegmentSpan */ |
| 162 | @Test |
| 163 | public void testcase6() throws IOException{ |
| 164 | ki.addDoc(createFieldDoc4()); |
| 165 | ki.commit(); |
| 166 | sq = new SpanNextQuery( |
| 167 | new SpanSegmentQuery( |
| 168 | new SpanTermQuery(new Term("base","s:b")), |
| 169 | new SpanTermQuery(new Term("base","s:c")) |
| 170 | ), |
| 171 | new SpanTermQuery(new Term("base","s:d")) |
| 172 | ); |
| 173 | |
| 174 | kr = ki.search(sq, (short) 10); |
| 175 | ki.close(); |
| 176 | |
| 177 | assertEquals("totalResults", 2, kr.totalResults()); |
| 178 | // Match #0 |
| 179 | assertEquals("doc-number", 0, kr.match(0).getLocalDocID()); |
| 180 | assertEquals("StartPos (0)", 4, kr.match(0).startPos); |
| 181 | assertEquals("EndPos (0)", 6, kr.match(0).endPos); |
| 182 | // Match #1 in the other atomic index |
| 183 | assertEquals("doc-number", 0, kr.match(1).getLocalDocID()); |
| 184 | assertEquals("StartPos (0)", 0, kr.match(1).startPos); |
| 185 | assertEquals("EndPos (0)", 2, kr.match(1).endPos); |
| 186 | } |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 187 | |
| 188 | |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 189 | private FieldDocument createFieldDoc0(){ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 190 | fd = new FieldDocument(); |
| 191 | fd.addString("ID", "doc-0"); |
| 192 | fd.addTV("base", |
| 193 | "bcbabd", |
| 194 | "[(0-1)s:b|i:b|_1#0-1]" + |
| 195 | "[(1-2)s:c|i:c|s:b|_2#1-2]" + |
| 196 | "[(2-3)s:b|i:b|_3#2-3|<>:e#2-4$<i>4]" + |
| 197 | "[(3-4)s:a|i:a|_4#3-4|<>:e#3-5$<i>5|<>:e2#3-5$<i>5]" + |
| 198 | "[(4-5)s:b|i:b|s:c|_5#4-5]" + |
| 199 | "[(5-6)s:d|i:d|_6#5-6|<>:e2#5-6$<i>6]"); |
| 200 | return fd; |
| 201 | } |
| 202 | |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 203 | private FieldDocument createFieldDoc1(){ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 204 | fd = new FieldDocument(); |
| 205 | fd.addString("ID", "doc-1"); |
| 206 | fd.addTV("base", |
| 207 | "babaa", |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 208 | "[(0-1)s:b|i:b|s:c|_1#0-1]" + |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 209 | "[(1-2)s:a|i:a|s:b|_2#1-2|<>:e#1-3$<i>3]" + |
| 210 | "[(2-3)s:b|i:b|s:a|_3#2-3]" + |
| 211 | "[(3-4)s:a|i:a|_4#3-4]" + |
| 212 | "[(4-5)s:a|i:a|_5#4-5]"); |
| 213 | return fd; |
| 214 | } |
| 215 | |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 216 | private FieldDocument createFieldDoc2(){ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 217 | fd = new FieldDocument(); |
| 218 | fd.addString("ID", "doc-2"); |
| 219 | fd.addTV("base", |
| 220 | "bdb", |
| 221 | "[(0-1)s:b|i:b|_1#0-1]" + |
| 222 | "[(1-2)s:d|i:d|s:b|_2#1-2]"+ |
| 223 | "[(2-3)s:b|i:b|s:a|_3#2-3]"); |
| 224 | return fd; |
| 225 | } |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 226 | |
| 227 | private FieldDocument createFieldDoc4(){ |
| 228 | fd = new FieldDocument(); |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 229 | fd.addString("ID", "doc-4"); |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 230 | fd.addTV("base", |
| 231 | "bdb", |
| 232 | "[(0-1)s:b|i:b|s:c|_1#0-1]" + |
| 233 | "[(1-2)s:d|_2#1-2]"+ |
| 234 | "[(2-3)s:d|i:d|_3#2-3]"); |
| 235 | return fd; |
| 236 | } |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 237 | } |