| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanQuery; |
| 9 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 10 | import org.junit.Test; |
| 11 | import org.junit.runner.RunWith; |
| 12 | import org.junit.runners.JUnit4; |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 13 | import org.slf4j.Logger; |
| 14 | import org.slf4j.LoggerFactory; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 15 | |
| 16 | import de.ids_mannheim.korap.KorapIndex; |
| 17 | import de.ids_mannheim.korap.KorapResult; |
| 18 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 19 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| 20 | import de.ids_mannheim.korap.query.SpanSegmentQuery; |
| 21 | |
| 22 | |
| 23 | @RunWith(JUnit4.class) |
| 24 | public class TestSegmentIndex { |
| 25 | private SpanQuery sq; |
| 26 | private KorapIndex ki; |
| 27 | private KorapResult kr; |
| 28 | private FieldDocument fd; |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 29 | private Logger log; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 30 | |
| 31 | public TestSegmentIndex() throws IOException { |
| 32 | ki = new KorapIndex(); |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 33 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 34 | ki.addDoc(createFieldDoc1()); |
| 35 | ki.addDoc(createFieldDoc2()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 36 | ki.commit(); |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 37 | |
| 38 | log = LoggerFactory.getLogger(getClass()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 39 | } |
| 40 | |
| 41 | /** Multiple matches in one document. */ |
| 42 | @Test |
| 43 | public void testCase1() throws IOException { |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 44 | // log.trace("Testcase1"); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 45 | sq = new SpanSegmentQuery( |
| 46 | new SpanTermQuery(new Term("base","s:b")), |
| 47 | new SpanTermQuery(new Term("base","s:c")) |
| 48 | ); |
| 49 | |
| 50 | kr = ki.search(sq, (short) 10); |
| 51 | ki.close(); |
| 52 | |
| 53 | assertEquals("totalResults", 2, kr.totalResults()); |
| 54 | assertEquals("StartPos (0)", 1, kr.match(0).startPos); |
| 55 | assertEquals("EndPos (0)", 2, kr.match(0).endPos); |
| 56 | assertEquals("StartPos (1)", 4, kr.match(1).startPos); |
| 57 | assertEquals("EndPos (1)", 5, kr.match(1).endPos); |
| 58 | } |
| 59 | |
| 60 | /** Matches in multiple documents. |
| 61 | * Ensure the same document. The current secondspan is skipped to |
| 62 | * the doc number of the firstspan. */ |
| 63 | @Test |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 64 | public void testCase2() throws IOException { |
| 65 | // log.trace("Testcase2"); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 66 | sq = new SpanSegmentQuery( |
| 67 | new SpanTermQuery(new Term("base","s:a")), |
| 68 | new SpanTermQuery(new Term("base","s:b")) |
| 69 | ); |
| 70 | |
| 71 | kr = ki.search(sq, (short) 10); |
| 72 | ki.close(); |
| 73 | |
| 74 | assertEquals("totalResults", 3, kr.totalResults()); |
| 75 | // Match #0 |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 76 | assertEquals("doc-number", 1, kr.match(0).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 77 | assertEquals("StartPos", 1, kr.match(0).startPos); |
| 78 | assertEquals("EndPos", 2, kr.match(0).endPos); |
| 79 | // Match #2 |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 80 | assertEquals("doc-number", 2, kr.match(2).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 81 | assertEquals("StartPos", 2, kr.match(2).startPos); |
| 82 | assertEquals("EndPos", 3, kr.match(2).endPos); |
| 83 | } |
| 84 | |
| 85 | |
| 86 | /** Ensure the same document, skip to a greater doc number */ |
| 87 | @Test |
| 88 | public void testCase3() throws IOException{ |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 89 | // log.trace("Testcase3"); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 90 | sq = new SpanSegmentQuery( |
| 91 | new SpanTermQuery(new Term("base","s:d")), |
| 92 | new SpanTermQuery(new Term("base","s:b")) |
| 93 | ); |
| 94 | |
| 95 | kr = ki.search(sq, (short) 10); |
| 96 | ki.close(); |
| 97 | |
| 98 | assertEquals("totalResults", 1, kr.totalResults()); |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 99 | assertEquals("doc-number", 2, kr.match(0).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 100 | assertEquals("StartPos (0)", 1, kr.match(0).startPos); |
| 101 | assertEquals("EndPos (0)", 2, kr.match(0).endPos); |
| 102 | } |
| 103 | |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 104 | /** Matching a SpanElementQuery and a SpanNextQuery |
| 105 | * Multiple atomic indices |
| 106 | * */ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 107 | @Test |
| 108 | public void testCase4() throws IOException{ |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 109 | // log.trace("Testcase4"); |
| 110 | |
| 111 | ki = new KorapIndex(); |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 112 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 113 | ki.commit(); |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 114 | ki.addDoc(createFieldDoc1()); |
| 115 | ki.addDoc(createFieldDoc2()); |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 116 | ki.commit(); |
| 117 | |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 118 | sq = new SpanSegmentQuery( |
| 119 | new SpanElementQuery("base","e"), |
| 120 | new SpanNextQuery( |
| 121 | new SpanTermQuery(new Term("base","s:a")), |
| 122 | new SpanTermQuery(new Term("base","s:b")) |
| 123 | ) |
| 124 | ); |
| 125 | |
| 126 | kr = ki.search(sq, (short) 10); |
| 127 | ki.close(); |
| 128 | |
| 129 | assertEquals("totalResults", 2, kr.totalResults()); |
| 130 | // Match #0 |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 131 | assertEquals("doc-number", 0, kr.match(0).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 132 | assertEquals("StartPos", 3, kr.match(0).startPos); |
| 133 | assertEquals("EndPos", 5, kr.match(0).endPos); |
| 134 | // Match #1 |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 135 | assertEquals("doc-number", 0, kr.match(1).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 136 | assertEquals("StartPos", 1, kr.match(1).startPos); |
| 137 | assertEquals("EndPos", 3, kr.match(1).endPos); |
| 138 | } |
| 139 | |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 140 | /** Matching SpanElementQueries */ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 141 | @Test |
| 142 | public void testCase5() throws IOException{ |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 143 | // log.trace("Testcase5"); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 144 | sq = new SpanSegmentQuery( |
| 145 | new SpanElementQuery("base","e"), |
| 146 | new SpanElementQuery("base","e2") |
| 147 | ); |
| 148 | |
| 149 | kr = ki.search(sq, (short) 10); |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 150 | ki.close(); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 151 | |
| 152 | assertEquals("totalResults", 1, kr.totalResults()); |
| 153 | // Match #0 |
| Eliza Margaretha | f7bbb26 | 2014-01-14 17:17:29 +0000 | [diff] [blame] | 154 | assertEquals("doc-number", 0, kr.match(0).getLocalDocID()); |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 155 | assertEquals("StartPos", 3, kr.match(0).startPos); |
| 156 | assertEquals("EndPos", 5, kr.match(0).endPos); |
| 157 | } |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 158 | |
| 159 | /** Skip to SegmentSpan */ |
| 160 | @Test |
| 161 | public void testcase6() throws IOException{ |
| 162 | ki.addDoc(createFieldDoc4()); |
| 163 | ki.commit(); |
| 164 | sq = new SpanNextQuery( |
| 165 | new SpanSegmentQuery( |
| 166 | new SpanTermQuery(new Term("base","s:b")), |
| 167 | new SpanTermQuery(new Term("base","s:c")) |
| 168 | ), |
| 169 | new SpanTermQuery(new Term("base","s:d")) |
| 170 | ); |
| 171 | |
| 172 | kr = ki.search(sq, (short) 10); |
| 173 | ki.close(); |
| 174 | |
| 175 | assertEquals("totalResults", 2, kr.totalResults()); |
| 176 | // Match #0 |
| 177 | assertEquals("doc-number", 0, kr.match(0).getLocalDocID()); |
| 178 | assertEquals("StartPos (0)", 4, kr.match(0).startPos); |
| 179 | assertEquals("EndPos (0)", 6, kr.match(0).endPos); |
| 180 | // Match #1 in the other atomic index |
| 181 | assertEquals("doc-number", 0, kr.match(1).getLocalDocID()); |
| 182 | assertEquals("StartPos (0)", 0, kr.match(1).startPos); |
| 183 | assertEquals("EndPos (0)", 2, kr.match(1).endPos); |
| 184 | } |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 185 | |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 186 | private FieldDocument createFieldDoc0(){ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 187 | fd = new FieldDocument(); |
| 188 | fd.addString("ID", "doc-0"); |
| 189 | fd.addTV("base", |
| 190 | "bcbabd", |
| 191 | "[(0-1)s:b|i:b|_1#0-1]" + |
| 192 | "[(1-2)s:c|i:c|s:b|_2#1-2]" + |
| 193 | "[(2-3)s:b|i:b|_3#2-3|<>:e#2-4$<i>4]" + |
| 194 | "[(3-4)s:a|i:a|_4#3-4|<>:e#3-5$<i>5|<>:e2#3-5$<i>5]" + |
| 195 | "[(4-5)s:b|i:b|s:c|_5#4-5]" + |
| 196 | "[(5-6)s:d|i:d|_6#5-6|<>:e2#5-6$<i>6]"); |
| 197 | return fd; |
| 198 | } |
| 199 | |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 200 | private FieldDocument createFieldDoc1(){ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 201 | fd = new FieldDocument(); |
| 202 | fd.addString("ID", "doc-1"); |
| 203 | fd.addTV("base", |
| 204 | "babaa", |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 205 | "[(0-1)s:b|i:b|s:c_1#0-1]" + |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 206 | "[(1-2)s:a|i:a|s:b|_2#1-2|<>:e#1-3$<i>3]" + |
| 207 | "[(2-3)s:b|i:b|s:a|_3#2-3]" + |
| 208 | "[(3-4)s:a|i:a|_4#3-4]" + |
| 209 | "[(4-5)s:a|i:a|_5#4-5]"); |
| 210 | return fd; |
| 211 | } |
| 212 | |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 213 | private FieldDocument createFieldDoc2(){ |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 214 | fd = new FieldDocument(); |
| 215 | fd.addString("ID", "doc-2"); |
| 216 | fd.addTV("base", |
| 217 | "bdb", |
| 218 | "[(0-1)s:b|i:b|_1#0-1]" + |
| 219 | "[(1-2)s:d|i:d|s:b|_2#1-2]"+ |
| 220 | "[(2-3)s:b|i:b|s:a|_3#2-3]"); |
| 221 | return fd; |
| 222 | } |
| Eliza Margaretha | befc23f | 2014-01-20 14:34:15 +0000 | [diff] [blame] | 223 | |
| 224 | private FieldDocument createFieldDoc4(){ |
| 225 | fd = new FieldDocument(); |
| 226 | fd.addString("ID", "doc-3"); |
| 227 | fd.addTV("base", |
| 228 | "bdb", |
| 229 | "[(0-1)s:b|i:b|s:c|_1#0-1]" + |
| 230 | "[(1-2)s:d|_2#1-2]"+ |
| 231 | "[(2-3)s:d|i:d|_3#2-3]"); |
| 232 | return fd; |
| 233 | } |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 234 | } |