| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanQuery; |
| 9 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 10 | import org.junit.Test; |
| 11 | import org.junit.runner.RunWith; |
| 12 | import org.junit.runners.JUnit4; |
| Eliza Margaretha | 76592d7 | 2014-01-16 16:04:23 +0000 | [diff] [blame] | 13 | import org.slf4j.Logger; |
| 14 | import org.slf4j.LoggerFactory; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 15 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 17 | import de.ids_mannheim.korap.response.Result; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 18 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 19 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| 20 | import de.ids_mannheim.korap.query.SpanSegmentQuery; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 21 | import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper; |
| 22 | import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 23 | |
| 24 | |
| 25 | @RunWith(JUnit4.class) |
| 26 | public class TestSegmentIndex { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 27 | private SpanQuery sq; |
| 28 | private KrillIndex ki; |
| 29 | private Result kr; |
| 30 | private FieldDocument fd; |
| 31 | private Logger log; |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 32 | |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 33 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 34 | public TestSegmentIndex () throws IOException { |
| 35 | ki = new KrillIndex(); |
| 36 | ki.addDoc(createFieldDoc0()); |
| 37 | ki.addDoc(createFieldDoc1()); |
| 38 | ki.addDoc(createFieldDoc2()); |
| 39 | ki.commit(); |
| 40 | |
| 41 | log = LoggerFactory.getLogger(getClass()); |
| 42 | } |
| 43 | |
| 44 | |
| 45 | /** Multiple matches in one document. */ |
| 46 | @Test |
| 47 | public void testCase1 () throws IOException { |
| 48 | sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:b")), |
| 49 | new SpanTermQuery(new Term("base", "s:c"))); |
| 50 | |
| 51 | kr = ki.search(sq, (short) 10); |
| 52 | ki.close(); |
| 53 | |
| 54 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 55 | assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos); |
| 56 | assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos); |
| 57 | assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos); |
| 58 | assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos); |
| 59 | } |
| 60 | |
| 61 | |
| 62 | /** |
| 63 | * Matches in multiple documents. |
| 64 | * Ensure the same document. The current secondspan is skipped to |
| 65 | * the doc number of the firstspan. |
| 66 | */ |
| 67 | @Test |
| 68 | public void testCase2 () throws IOException { |
| 69 | // log.trace("Testcase2"); |
| 70 | sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:a")), |
| 71 | new SpanTermQuery(new Term("base", "s:b"))); |
| 72 | |
| 73 | kr = ki.search(sq, (short) 10); |
| 74 | ki.close(); |
| 75 | |
| 76 | assertEquals("totalResults", kr.getTotalResults(), 3); |
| 77 | // Match #0 |
| 78 | assertEquals("doc-number", 1, kr.getMatch(0).getLocalDocID()); |
| 79 | assertEquals("StartPos", 1, kr.getMatch(0).startPos); |
| 80 | assertEquals("EndPos", 2, kr.getMatch(0).endPos); |
| 81 | // Match #2 |
| 82 | assertEquals("doc-number", 2, kr.getMatch(2).getLocalDocID()); |
| 83 | assertEquals("StartPos", 2, kr.getMatch(2).startPos); |
| 84 | assertEquals("EndPos", 3, kr.getMatch(2).endPos); |
| 85 | } |
| 86 | |
| 87 | |
| 88 | /** Ensure the same document, skip to a greater doc number */ |
| 89 | @Test |
| 90 | public void testCase3 () throws IOException { |
| 91 | // log.trace("Testcase3"); |
| 92 | sq = new SpanSegmentQuery(new SpanTermQuery(new Term("base", "s:d")), |
| 93 | new SpanTermQuery(new Term("base", "s:b"))); |
| 94 | |
| 95 | kr = ki.search(sq, (short) 10); |
| 96 | ki.close(); |
| 97 | |
| 98 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| 99 | assertEquals("doc-number", 2, kr.getMatch(0).getLocalDocID()); |
| 100 | assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos); |
| 101 | assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos); |
| 102 | } |
| 103 | |
| 104 | |
| 105 | /** |
| 106 | * Matching a SpanElementQuery and a SpanNextQuery |
| 107 | * Multiple atomic indices |
| 108 | * */ |
| 109 | @Test |
| 110 | public void testCase4 () throws IOException { |
| 111 | // log.trace("Testcase4"); |
| 112 | |
| 113 | ki = new KrillIndex(); |
| 114 | ki.addDoc(createFieldDoc0()); |
| 115 | ki.commit(); |
| 116 | ki.addDoc(createFieldDoc1()); |
| 117 | ki.addDoc(createFieldDoc2()); |
| 118 | ki.commit(); |
| 119 | |
| 120 | sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"), |
| 121 | new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")), |
| 122 | new SpanTermQuery(new Term("base", "s:b")))); |
| 123 | |
| 124 | kr = ki.search(sq, (short) 10); |
| 125 | ki.close(); |
| 126 | |
| 127 | assertEquals("totalResults", kr.getTotalResults(), 2); |
| 128 | // Match #0 |
| 129 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 130 | assertEquals("StartPos", 3, kr.getMatch(0).startPos); |
| 131 | assertEquals("EndPos", 5, kr.getMatch(0).endPos); |
| 132 | // Match #1 |
| 133 | assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID()); |
| 134 | assertEquals("StartPos", 1, kr.getMatch(1).startPos); |
| 135 | assertEquals("EndPos", 3, kr.getMatch(1).endPos); |
| 136 | } |
| 137 | |
| 138 | |
| 139 | /** Matching SpanElementQueries */ |
| 140 | @Test |
| 141 | public void testCase5 () throws IOException { |
| 142 | // log.trace("Testcase5"); |
| 143 | sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"), |
| 144 | new SpanElementQuery("base", "e2")); |
| 145 | |
| 146 | kr = ki.search(sq, (short) 10); |
| 147 | ki.close(); |
| 148 | |
| 149 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| 150 | // Match #0 |
| 151 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 152 | assertEquals("StartPos", 3, kr.getMatch(0).startPos); |
| 153 | assertEquals("EndPos", 5, kr.getMatch(0).endPos); |
| 154 | } |
| 155 | |
| 156 | |
| 157 | /** Skip to SegmentSpan */ |
| 158 | @Test |
| 159 | public void testcase6 () throws IOException { |
| 160 | ki.addDoc(createFieldDoc4()); |
| 161 | ki.commit(); |
| 162 | sq = new SpanNextQuery(new SpanSegmentQuery(new SpanTermQuery(new Term( |
| 163 | "base", "s:b")), new SpanTermQuery(new Term("base", "s:c"))), |
| 164 | new SpanTermQuery(new Term("base", "s:d"))); |
| 165 | |
| 166 | kr = ki.search(sq, (short) 10); |
| 167 | ki.close(); |
| 168 | |
| 169 | assertEquals("totalResults", kr.getTotalResults(), 2); |
| 170 | // Match #0 |
| 171 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 172 | assertEquals("StartPos (0)", 4, kr.getMatch(0).startPos); |
| 173 | assertEquals("EndPos (0)", 6, kr.getMatch(0).endPos); |
| 174 | // Match #1 in the other atomic index |
| 175 | assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID()); |
| 176 | assertEquals("StartPos (0)", 0, kr.getMatch(1).startPos); |
| 177 | assertEquals("EndPos (0)", 2, kr.getMatch(1).endPos); |
| 178 | } |
| 179 | |
| 180 | |
| 181 | private FieldDocument createFieldDoc0 () { |
| 182 | fd = new FieldDocument(); |
| 183 | fd.addString("ID", "doc-0"); |
| 184 | fd.addTV("base", "bcbabd", "[(0-1)s:b|i:b|_1#0-1]" |
| 185 | + "[(1-2)s:c|i:c|s:b|_2#1-2]" |
| 186 | + "[(2-3)s:b|i:b|_3#2-3|<>:e#2-4$<i>4]" |
| 187 | + "[(3-4)s:a|i:a|_4#3-4|<>:e#3-5$<i>5|<>:e2#3-5$<i>5]" |
| 188 | + "[(4-5)s:b|i:b|s:c|_5#4-5]" |
| 189 | + "[(5-6)s:d|i:d|_6#5-6|<>:e2#5-6$<i>6]"); |
| 190 | return fd; |
| 191 | } |
| 192 | |
| 193 | |
| 194 | private FieldDocument createFieldDoc1 () { |
| 195 | fd = new FieldDocument(); |
| 196 | fd.addString("ID", "doc-1"); |
| 197 | fd.addTV("base", "babaa", "[(0-1)s:b|i:b|s:c|_1#0-1]" |
| 198 | + "[(1-2)s:a|i:a|s:b|_2#1-2|<>:e#1-3$<i>3]" |
| 199 | + "[(2-3)s:b|i:b|s:a|_3#2-3]" + "[(3-4)s:a|i:a|_4#3-4]" |
| 200 | + "[(4-5)s:a|i:a|_5#4-5]"); |
| 201 | return fd; |
| 202 | } |
| 203 | |
| 204 | |
| 205 | private FieldDocument createFieldDoc2 () { |
| 206 | fd = new FieldDocument(); |
| 207 | fd.addString("ID", "doc-2"); |
| 208 | fd.addTV("base", "bdb", "[(0-1)s:b|i:b|_1#0-1]" |
| 209 | + "[(1-2)s:d|i:d|s:b|_2#1-2]" + "[(2-3)s:b|i:b|s:a|_3#2-3]"); |
| 210 | return fd; |
| 211 | } |
| 212 | |
| 213 | |
| 214 | private FieldDocument createFieldDoc4 () { |
| 215 | fd = new FieldDocument(); |
| 216 | fd.addString("ID", "doc-4"); |
| 217 | fd.addTV("base", "bdb", "[(0-1)s:b|i:b|s:c|_1#0-1]" |
| 218 | + "[(1-2)s:d|_2#1-2]" + "[(2-3)s:d|i:d|_3#2-3]"); |
| 219 | return fd; |
| 220 | } |
| Eliza Margaretha | c1960f6 | 2014-01-14 12:35:53 +0000 | [diff] [blame] | 221 | } |