| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanQuery; |
| 9 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 10 | import org.junit.Test; |
| 11 | import org.junit.runner.RunWith; |
| 12 | import org.junit.runners.JUnit4; |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 13 | import org.slf4j.Logger; |
| 14 | import org.slf4j.LoggerFactory; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 15 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.KrillIndex; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 17 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 18 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| 19 | import de.ids_mannheim.korap.query.SpanSegmentQuery; |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 20 | import de.ids_mannheim.korap.response.Result; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 21 | |
| 22 | |
| 23 | @RunWith(JUnit4.class) |
| 24 | public class TestSegmentIndex { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 25 | private SpanQuery sq; |
| 26 | private KrillIndex ki; |
| 27 | private Result kr; |
| 28 | private FieldDocument fd; |
| 29 | private Logger log; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 30 | |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 31 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 32 | public TestSegmentIndex () throws IOException { |
| 33 | ki = new KrillIndex(); |
| 34 | ki.addDoc(createFieldDoc0()); |
| 35 | ki.addDoc(createFieldDoc1()); |
| 36 | ki.addDoc(createFieldDoc2()); |
| 37 | ki.commit(); |
| 38 | |
| 39 | log = LoggerFactory.getLogger(getClass()); |
| 40 | } |
| 41 | |
| 42 | |
| 43 | /** Multiple matches in one document. */ |
| 44 | @Test |
| 45 | public void testCase1 () throws IOException { |
| 46 | sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:b")), |
| 47 | new SpanTermQuery(new Term("base", "s:c"))); |
| 48 | |
| 49 | kr = ki.search(sq, (short) 10); |
| 50 | ki.close(); |
| 51 | |
| 52 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 53 | assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos); |
| 54 | assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos); |
| 55 | assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos); |
| 56 | assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos); |
| 57 | } |
| 58 | |
| 59 | |
| 60 | /** |
| 61 | * Matches in multiple documents. |
| 62 | * Ensure the same document. The current secondspan is skipped to |
| 63 | * the doc number of the firstspan. |
| 64 | */ |
| 65 | @Test |
| 66 | public void testCase2 () throws IOException { |
| 67 | // log.trace("Testcase2"); |
| 68 | sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:a")), |
| 69 | new SpanTermQuery(new Term("base", "s:b"))); |
| 70 | |
| 71 | kr = ki.search(sq, (short) 10); |
| 72 | ki.close(); |
| 73 | |
| 74 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 75 | // Match #0 |
| 76 | assertEquals("doc-number", 1, kr.getMatch(0).getLocalDocID()); |
| 77 | assertEquals("StartPos", 1, kr.getMatch(0).startPos); |
| 78 | assertEquals("EndPos", 2, kr.getMatch(0).endPos); |
| 79 | // Match #2 |
| 80 | assertEquals("doc-number", 2, kr.getMatch(2).getLocalDocID()); |
| 81 | assertEquals("StartPos", 2, kr.getMatch(2).startPos); |
| 82 | assertEquals("EndPos", 3, kr.getMatch(2).endPos); |
| 83 | } |
| 84 | |
| 85 | |
| 86 | /** Ensure the same document, skip to a greater doc number */ |
| 87 | @Test |
| 88 | public void testCase3 () throws IOException { |
| 89 | // log.trace("Testcase3"); |
| 90 | sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:d")), |
| 91 | new SpanTermQuery(new Term("base", "s:b"))); |
| 92 | |
| 93 | kr = ki.search(sq, (short) 10); |
| 94 | ki.close(); |
| 95 | |
| 96 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| 97 | assertEquals("doc-number", 2, kr.getMatch(0).getLocalDocID()); |
| 98 | assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos); |
| 99 | assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos); |
| 100 | } |
| 101 | |
| 102 | |
| 103 | /** |
| 104 | * Matching a SpanElementQuery and a SpanNextQuery |
| 105 | * Multiple atomic indices |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 106 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 107 | @Test |
| 108 | public void testCase4 () throws IOException { |
| 109 | // log.trace("Testcase4"); |
| 110 | |
| 111 | ki = new KrillIndex(); |
| 112 | ki.addDoc(createFieldDoc0()); |
| 113 | ki.commit(); |
| 114 | ki.addDoc(createFieldDoc1()); |
| 115 | ki.addDoc(createFieldDoc2()); |
| 116 | ki.commit(); |
| 117 | |
| 118 | sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"), |
| 119 | new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")), |
| 120 | new SpanTermQuery(new Term("base", "s:b")))); |
| 121 | |
| 122 | kr = ki.search(sq, (short) 10); |
| 123 | ki.close(); |
| 124 | |
| 125 | assertEquals("totalResults", kr.getTotalResults(), 2); |
| 126 | // Match #0 |
| 127 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 128 | assertEquals("StartPos", 3, kr.getMatch(0).startPos); |
| 129 | assertEquals("EndPos", 5, kr.getMatch(0).endPos); |
| 130 | // Match #1 |
| 131 | assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID()); |
| 132 | assertEquals("StartPos", 1, kr.getMatch(1).startPos); |
| 133 | assertEquals("EndPos", 3, kr.getMatch(1).endPos); |
| 134 | } |
| 135 | |
| 136 | |
| 137 | /** Matching SpanElementQueries */ |
| 138 | @Test |
| 139 | public void testCase5 () throws IOException { |
| 140 | // log.trace("Testcase5"); |
| 141 | sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"), |
| 142 | new SpanElementQuery("base", "e2")); |
| 143 | |
| 144 | kr = ki.search(sq, (short) 10); |
| 145 | ki.close(); |
| 146 | |
| 147 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| 148 | // Match #0 |
| 149 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 150 | assertEquals("StartPos", 3, kr.getMatch(0).startPos); |
| 151 | assertEquals("EndPos", 5, kr.getMatch(0).endPos); |
| 152 | } |
| 153 | |
| 154 | |
| 155 | /** Skip to SegmentSpan */ |
| 156 | @Test |
| 157 | public void testcase6 () throws IOException { |
| 158 | ki.addDoc(createFieldDoc4()); |
| 159 | ki.commit(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 160 | sq = new SpanNextQuery( |
| 161 | new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:b")), |
| 162 | new SpanTermQuery(new Term("base", "s:c"))), |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 163 | new SpanTermQuery(new Term("base", "s:d"))); |
| 164 | |
| 165 | kr = ki.search(sq, (short) 10); |
| 166 | ki.close(); |
| 167 | |
| 168 | assertEquals("totalResults", kr.getTotalResults(), 2); |
| 169 | // Match #0 |
| 170 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 171 | assertEquals("StartPos (0)", 4, kr.getMatch(0).startPos); |
| 172 | assertEquals("EndPos (0)", 6, kr.getMatch(0).endPos); |
| 173 | // Match #1 in the other atomic index |
| 174 | assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID()); |
| 175 | assertEquals("StartPos (0)", 0, kr.getMatch(1).startPos); |
| 176 | assertEquals("EndPos (0)", 2, kr.getMatch(1).endPos); |
| 177 | } |
| 178 | |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 179 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 180 | private FieldDocument createFieldDoc0 () { |
| 181 | fd = new FieldDocument(); |
| 182 | fd.addString("ID", "doc-0"); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 183 | fd.addTV("base", "bcbabd", "[(0-1)s:b|i:b|_1$<i>0<i>1]" |
| 184 | + "[(1-2)s:c|i:c|s:b|_2$<i>1<i>2]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 185 | + "[(2-3)s:b|i:b|_3$<i>2<i>3|<>:e$<b>64<i>2<i>4<i>4<b>0]" |
| 186 | + "[(3-4)s:a|i:a|_4$<i>3<i>4|<>:e$<b>64<i>3<i>5<i>5<b>0|" |
| 187 | + "<>:e2$<b>64<i>3<i>5<i>5<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 188 | + "[(4-5)s:b|i:b|s:c|_5$<i>4<i>5]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 189 | + "[(5-6)s:d|i:d|_6$<i>5<i>6|<>:e2$<b>64<i>5<i>6<i>6<b>0]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 190 | return fd; |
| 191 | } |
| 192 | |
| 193 | |
| 194 | private FieldDocument createFieldDoc1 () { |
| 195 | fd = new FieldDocument(); |
| 196 | fd.addString("ID", "doc-1"); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 197 | fd.addTV("base", "babaa", "[(0-1)s:b|i:b|s:c|_1$<i>0<i>1]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 198 | + "[(1-2)s:a|i:a|s:b|_2$<i>1<i>2|<>:e$<b>64<i>1<i>3<i>3<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 199 | + "[(2-3)s:b|i:b|s:a|_3$<i>2<i>3]" |
| 200 | + "[(3-4)s:a|i:a|_4$<i>3<i>4]" + "[(4-5)s:a|i:a|_5$<i>4<i>5]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 201 | return fd; |
| 202 | } |
| 203 | |
| 204 | |
| 205 | private FieldDocument createFieldDoc2 () { |
| 206 | fd = new FieldDocument(); |
| 207 | fd.addString("ID", "doc-2"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 208 | fd.addTV("base", "bdb", |
| 209 | "[(0-1)s:b|i:b|_1$<i>0<i>1]" + "[(1-2)s:d|i:d|s:b|_2$<i>1<i>2]" |
| 210 | + "[(2-3)s:b|i:b|s:a|_3$<i>2<i>3]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 211 | return fd; |
| 212 | } |
| 213 | |
| 214 | |
| 215 | private FieldDocument createFieldDoc4 () { |
| 216 | fd = new FieldDocument(); |
| 217 | fd.addString("ID", "doc-4"); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 218 | fd.addTV("base", "bdb", "[(0-1)s:b|i:b|s:c|_1$<i>0<i>1]" |
| 219 | + "[(1-2)s:d|_2$<i>1<i>2]" + "[(2-3)s:d|i:d|_3$<i>2<i>3]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 220 | return fd; |
| 221 | } |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 222 | } |