| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanQuery; |
| 9 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 10 | import org.junit.Test; |
| 11 | import org.junit.runner.RunWith; |
| 12 | import org.junit.runners.JUnit4; |
| 13 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 15 | import de.ids_mannheim.korap.response.Result; |
| Eliza Margaretha | d469346 | 2014-03-17 13:16:18 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.query.DistanceConstraint; |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 17 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| 18 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 19 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 20 | |
| 21 | @RunWith(JUnit4.class) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 22 | public class TestUnorderedDistanceIndex { |
| 23 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 24 | private KrillIndex ki; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 25 | private Result kr; |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 26 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 27 | |
| 28 | private FieldDocument createFieldDoc0 () { |
| 29 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 30 | fd.addString("ID", "doc-0"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 31 | fd.addTV("base", "text", "[(0-1)s:c|_1#0-1]" + "[(1-2)s:e|_2#1-2]" |
| 32 | + "[(2-3)s:c|_3#2-3|<>:y#2-4$<i>4]" |
| 33 | + "[(3-4)s:c|_4#3-4|<>:x#3-7$<i>7]" |
| 34 | + "[(4-5)s:d|_5#4-5|<>:y#4-6$<i>6]" |
| 35 | + "[(5-6)s:c|_6#5-6|<>:y#5-8$<i>8]" + "[(6-7)s:d|_7#6-7]" |
| 36 | + "[(7-8)s:f|_8#7-8|<>:x#7-9$<i>9]" |
| 37 | + "[(8-9)s:e|_9#8-9|<>:x#8-10$<i>10]" + "[(9-10)s:d|_10#9-10]"); |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 38 | return fd; |
| 39 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 40 | |
| 41 | |
| 42 | private FieldDocument createFieldDoc1 () { |
| 43 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 44 | fd.addString("ID", "doc-1"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 45 | fd.addTV("base", "text", "[(0-1)s:d|_1#0-1]" + "[(1-2)s:c|_2#1-2]" |
| 46 | + "[(2-3)s:e|_3#2-3]" + "[(3-4)s:e|_4#3-4]" |
| 47 | + "[(4-5)s:d|_5#4-5]" + "[(5-6)s:e|_6#5-6]" |
| 48 | + "[(6-7)s:e|_7#6-7]" + "[(7-8)s:c|_8#7-8]" |
| 49 | + "[(8-9)s:e|_9#8-9]" + "[(9-10)s:d|_10#9-10]"); |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 50 | return fd; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 51 | } |
| 52 | |
| 53 | |
| 54 | private FieldDocument createFieldDoc2 () { |
| 55 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 56 | fd.addString("ID", "doc-2"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 57 | fd.addTV("base", "text", "[(0-1)s:f|_1#0-1]" + "[(1-2)s:c|_2#1-2]" |
| 58 | + "[(2-3)s:e|_3#2-3]" + "[(3-4)s:e|_4#3-4]" |
| 59 | + "[(4-5)s:d|_5#4-5]" + "[(5-6)s:f|_6#5-6]" |
| 60 | + "[(6-7)s:f|_7#6-7]"); |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 61 | return fd; |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 62 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 63 | |
| 64 | |
| 65 | private SpanQuery createQuery (String x, String y, int min, int max, |
| 66 | boolean isOrdered) { |
| 67 | SpanQuery sq = new SpanDistanceQuery(new SpanTermQuery(new Term("base", |
| 68 | x)), new SpanTermQuery(new Term("base", y)), |
| 69 | new DistanceConstraint(min, max, isOrdered, false), true); |
| 70 | return sq; |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 71 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 72 | |
| 73 | |
| 74 | private SpanQuery createElementQuery (String x, String y, int min, int max, |
| 75 | boolean isOrdered) { |
| 76 | SpanQuery sq = new SpanDistanceQuery(new SpanElementQuery("base", x), |
| 77 | new SpanElementQuery("base", y), new DistanceConstraint(min, |
| 78 | max, isOrdered, false), true); |
| 79 | return sq; |
| 80 | } |
| 81 | |
| 82 | |
| 83 | /** |
| 84 | * One document, multiple occurrences |
| 85 | * The first first and second spans are too far from each other |
| 86 | * One of the spans ends first |
| 87 | * One of the candidate list is empty |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 88 | * */ |
| Eliza Margaretha | adedcb6 | 2014-02-03 17:21:17 +0000 | [diff] [blame] | 89 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 90 | public void testCase1 () throws IOException { |
| 91 | //System.out.println("testcase 1"); |
| 92 | ki = new KrillIndex(); |
| 93 | ki.addDoc(createFieldDoc0()); |
| 94 | ki.commit(); |
| 95 | |
| 96 | SpanQuery sq = createQuery("s:c", "s:d", 0, 3, false); |
| 97 | kr = ki.search(sq, (short) 10); |
| 98 | |
| 99 | assertEquals(kr.getTotalResults(), 5); |
| Eliza Margaretha | b0449d0 | 2014-02-04 11:54:41 +0000 | [diff] [blame] | 100 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 101 | |
| 102 | |
| 103 | /** |
| 104 | * Multiple documents |
| 105 | * Ensure same doc |
| 106 | * Both candidate lists are empty, but there is a span left in the |
| 107 | * doc |
| 108 | * Both candidate lists are empty, but there are more matches in |
| 109 | * the doc |
| Eliza Margaretha | b0449d0 | 2014-02-04 11:54:41 +0000 | [diff] [blame] | 110 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 111 | * @throws IOException |
| Eliza Margaretha | b0449d0 | 2014-02-04 11:54:41 +0000 | [diff] [blame] | 112 | * */ |
| 113 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 114 | public void testCase2 () throws IOException { |
| 115 | //System.out.println("testcase 2"); |
| 116 | ki = new KrillIndex(); |
| 117 | ki.addDoc(createFieldDoc0()); |
| 118 | ki.addDoc(createFieldDoc1()); |
| 119 | ki.commit(); |
| 120 | |
| 121 | SpanQuery sq = createQuery("s:c", "s:d", 1, 2, false); |
| 122 | kr = ki.search(sq, (short) 10); |
| 123 | |
| 124 | assertEquals(kr.getTotalResults(), 6); |
| Eliza Margaretha | adedcb6 | 2014-02-03 17:21:17 +0000 | [diff] [blame] | 125 | } |
| Eliza Margaretha | 6651fc3 | 2014-02-18 14:57:47 +0000 | [diff] [blame] | 126 | |
| Eliza Margaretha | ecf8b59 | 2014-09-30 17:08:09 +0000 | [diff] [blame] | 127 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 128 | /** |
| 129 | * Multiple documents |
| 130 | * Ensure same Doc |
| 131 | * |
| 132 | * @throws IOException |
| 133 | * */ |
| 134 | @Test |
| 135 | public void testCase3 () throws IOException { |
| 136 | //System.out.println("testcase 3"); |
| 137 | ki = new KrillIndex(); |
| 138 | ki.addDoc(createFieldDoc0()); |
| 139 | ki.addDoc(createFieldDoc1()); |
| 140 | ki.addDoc(createFieldDoc2()); |
| 141 | ki.commit(); |
| 142 | |
| 143 | SpanQuery sq = createQuery("s:e", "s:f", 1, 2, false); |
| 144 | kr = ki.search(sq, (short) 10); |
| 145 | |
| 146 | assertEquals(kr.getTotalResults(), 3); |
| 147 | assertEquals(0, kr.getMatch(0).getLocalDocID()); |
| 148 | assertEquals(7, kr.getMatch(0).getStartPos()); |
| 149 | assertEquals(9, kr.getMatch(0).getEndPos()); |
| 150 | assertEquals(2, kr.getMatch(1).getLocalDocID()); |
| 151 | assertEquals(0, kr.getMatch(1).getStartPos()); |
| 152 | assertEquals(3, kr.getMatch(1).getEndPos()); |
| 153 | } |
| 154 | |
| 155 | |
| 156 | /** Skip to */ |
| 157 | @Test |
| 158 | public void testCase4 () throws IOException { |
| 159 | //System.out.println("testcase 4"); |
| 160 | ki = new KrillIndex(); |
| 161 | ki.addDoc(createFieldDoc0()); |
| 162 | ki.addDoc(createFieldDoc1()); |
| 163 | ki.addDoc(createFieldDoc2()); |
| 164 | ki.commit(); |
| 165 | |
| 166 | SpanQuery sq = new SpanNextQuery( |
| 167 | createQuery("s:d", "s:e", 1, 2, false), new SpanTermQuery( |
| 168 | new Term("base", "s:f"))); |
| 169 | |
| 170 | kr = ki.search(sq, (short) 10); |
| 171 | assertEquals(kr.getTotalResults(), 2); |
| 172 | assertEquals(2, kr.getMatch(0).getLocalDocID()); |
| 173 | assertEquals(2, kr.getMatch(0).getStartPos()); |
| 174 | assertEquals(6, kr.getMatch(0).getEndPos()); |
| 175 | assertEquals(3, kr.getMatch(1).getStartPos()); |
| 176 | assertEquals(6, kr.getMatch(1).getEndPos()); |
| 177 | } |
| 178 | |
| 179 | |
| 180 | /** ElementQueries */ |
| 181 | @Test |
| 182 | public void testCase5 () throws IOException { |
| 183 | ki = new KrillIndex(); |
| 184 | ki.addDoc(createFieldDoc0()); |
| 185 | ki.commit(); |
| 186 | |
| 187 | // Intersection ---- Distance 0:0 |
| 188 | //System.out.println("Intersection ---- Distance 0:0"); |
| 189 | SpanQuery sq = createElementQuery("x", "y", 0, 0, false); |
| 190 | kr = ki.search(sq, (short) 10); |
| 191 | |
| 192 | assertEquals(kr.getTotalResults(), 4); |
| 193 | assertEquals(2, kr.getMatch(0).startPos); |
| 194 | assertEquals(7, kr.getMatch(0).endPos); |
| 195 | assertEquals(3, kr.getMatch(1).startPos); |
| 196 | assertEquals(7, kr.getMatch(1).endPos); |
| 197 | assertEquals(3, kr.getMatch(2).startPos); |
| 198 | assertEquals(8, kr.getMatch(2).endPos); |
| 199 | |
| 200 | // Next to ---- Distance 1:1 |
| 201 | //System.out.println("Next to ---- Distance 1:1"); |
| 202 | sq = createElementQuery("x", "y", 1, 1, false); |
| 203 | kr = ki.search(sq, (short) 10); |
| 204 | |
| 205 | assertEquals(kr.getTotalResults(), 1); |
| 206 | assertEquals(5, kr.getMatch(0).startPos); |
| 207 | assertEquals(10, kr.getMatch(0).endPos); |
| 208 | |
| 209 | // ---- Distance 1:2 |
| 210 | //System.out.println("---- Distance 1:2"); |
| 211 | sq = createElementQuery("x", "y", 1, 2, false); |
| 212 | kr = ki.search(sq, (short) 10); |
| 213 | |
| 214 | assertEquals(kr.getTotalResults(), 2); |
| 215 | assertEquals(4, kr.getMatch(0).startPos); |
| 216 | assertEquals(9, kr.getMatch(0).endPos); |
| 217 | assertEquals(5, kr.getMatch(1).startPos); |
| 218 | assertEquals(10, kr.getMatch(1).endPos); |
| 219 | |
| 220 | } |
| 221 | |
| 222 | |
| 223 | /** |
| 224 | * The same element type |
| 225 | * |
| 226 | * WARNING: |
| 227 | * This kind of query is not appropriate for an unordered distance |
| 228 | * span query. |
| 229 | * Instead, it must be an ordered distance span query. Such an |
| 230 | * unordered distance |
| 231 | * span query yields "redundant results" because matches are |
| 232 | * searched for each |
| 233 | * child span. |
| 234 | * */ |
| 235 | @Test |
| 236 | public void testCase6 () throws IOException { |
| 237 | ki = new KrillIndex(); |
| 238 | ki.addDoc(createFieldDoc0()); |
| 239 | ki.commit(); |
| 240 | |
| 241 | //---- Distance 1:2 |
| 242 | SpanQuery sq = createElementQuery("x", "x", 1, 2, false); |
| 243 | kr = ki.search(sq, (short) 10); |
| 244 | |
| 245 | assertEquals(kr.getTotalResults(), 4); |
| 246 | } |
| 247 | |
| 248 | |
| 249 | /** |
| 250 | * Nested distance queries |
| 251 | * */ |
| 252 | @Test |
| 253 | public void testCase7 () throws IOException { |
| 254 | //System.out.println("testcase 7"); |
| 255 | ki = new KrillIndex(); |
| 256 | ki.addDoc(createFieldDoc0()); |
| 257 | ki.addDoc(createFieldDoc1()); |
| 258 | ki.commit(); |
| 259 | |
| 260 | SpanQuery sq = createQuery("s:c", "s:d", 1, 2, false); |
| 261 | SpanQuery sq2 = new SpanDistanceQuery(sq, new SpanTermQuery(new Term( |
| 262 | "base", "s:e")), new DistanceConstraint(1, 2, true, false), |
| 263 | true); |
| 264 | kr = ki.search(sq2, (short) 10); |
| 265 | assertEquals(kr.getTotalResults(), 3); |
| 266 | assertEquals(5, kr.getMatch(0).getStartPos()); |
| 267 | assertEquals(9, kr.getMatch(0).getEndPos()); |
| 268 | assertEquals(1, kr.getMatch(1).getLocalDocID()); |
| 269 | assertEquals(0, kr.getMatch(1).getStartPos()); |
| 270 | assertEquals(3, kr.getMatch(1).getEndPos()); |
| 271 | assertEquals(0, kr.getMatch(2).getStartPos()); |
| 272 | assertEquals(4, kr.getMatch(2).getEndPos()); |
| 273 | } |
| 274 | |
| 275 | |
| 276 | /** |
| 277 | * Multiple NextSpans in the same first span position |
| 278 | * */ |
| 279 | @Test |
| 280 | public void testCase8 () throws IOException { |
| 281 | ki = new KrillIndex(); |
| 282 | ki.addDoc(createFieldDoc1()); |
| 283 | ki.commit(); |
| 284 | SpanQuery sq = new SpanNextQuery(new SpanTermQuery(new Term("base", |
| 285 | "s:d")), createQuery("s:c", "s:e", 1, 2, false)); |
| 286 | kr = ki.search(sq, (short) 10); |
| 287 | |
| 288 | assertEquals(kr.getTotalResults(), 3); |
| 289 | assertEquals(0, kr.getMatch(1).getStartPos()); |
| 290 | assertEquals(4, kr.getMatch(1).getEndPos()); |
| 291 | |
| 292 | } |
| 293 | |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 294 | } |