| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| margaretha | 69726b1 | 2015-12-10 12:03:19 +0100 | [diff] [blame] | 3 | import static org.junit.Assert.assertEquals; |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 4 | |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanQuery; |
| 9 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 10 | import org.junit.Test; |
| 11 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 12 | import de.ids_mannheim.korap.KrillIndex; |
| Eliza Margaretha | d469346 | 2014-03-17 13:16:18 +0000 | [diff] [blame] | 13 | import de.ids_mannheim.korap.query.DistanceConstraint; |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 15 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| margaretha | 69726b1 | 2015-12-10 12:03:19 +0100 | [diff] [blame] | 17 | import de.ids_mannheim.korap.response.Result; |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 18 | |
| 19 | public class TestDistanceExclusionIndex { |
| 20 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 21 | private KrillIndex ki; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 22 | private Result kr; |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 23 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 24 | |
| 25 | /** |
| 26 | * Ordered, unordered |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 27 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 28 | @Test |
| 29 | public void testCase1 () throws IOException { |
| 30 | ki = new KrillIndex(); |
| 31 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 32 | ki.commit(); |
| 33 | SpanQuery sq; |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 34 | //ordered distance 0 to 1 |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 35 | sq = createQuery("s:c", "s:e", 0, 1, true); |
| 36 | kr = ki.search(sq, (short) 10); |
| 37 | assertEquals((long) 3, kr.getTotalResults()); |
| 38 | assertEquals(2, kr.getMatch(0).getStartPos()); |
| 39 | assertEquals(3, kr.getMatch(0).getEndPos()); |
| 40 | assertEquals(3, kr.getMatch(1).getStartPos()); |
| 41 | assertEquals(4, kr.getMatch(1).getEndPos()); |
| 42 | assertEquals(5, kr.getMatch(2).getStartPos()); |
| 43 | assertEquals(6, kr.getMatch(2).getEndPos()); |
| 44 | |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 45 | // Unordered |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 46 | sq = createQuery("s:c", "s:e", 0, 1, false); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 47 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 48 | assertEquals((long) 2, kr.getTotalResults()); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 49 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 50 | |
| 51 | |
| 52 | /** |
| 53 | * Multiple docs, unordered |
| 54 | * No more secondSpans |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 55 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 56 | @Test |
| 57 | public void testCase2 () throws IOException { |
| 58 | ki = new KrillIndex(); |
| 59 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 60 | ki.addDoc(createFieldDoc1()); |
| 61 | ki.commit(); |
| 62 | SpanQuery sq; |
| 63 | // ---- Distance 0 to 1 |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 64 | sq = createQuery("s:c", "s:e", 0, 1, false); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 65 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 66 | assertEquals((long) 5, kr.getTotalResults()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 67 | assertEquals(1, kr.getMatch(3).getLocalDocID()); |
| 68 | } |
| 69 | |
| 70 | |
| 71 | /** |
| 72 | * Secondspans' document number is bigger than firstspans' |
| 73 | * Actual distance is smaller than min distance. |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 74 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 75 | @Test |
| 76 | public void testCase3 () throws IOException { |
| 77 | ki = new KrillIndex(); |
| 78 | ki.addDoc(createFieldDoc1()); |
| 79 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 80 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 81 | |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 82 | SpanQuery sq; |
| 83 | // Unordered |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 84 | sq = createQuery("s:c", "s:e", 2, 2, false); |
| 85 | kr = ki.search(sq, (short) 10); |
| 86 | assertEquals((long) 5, kr.getTotalResults()); |
| 87 | } |
| 88 | |
| 89 | |
| 90 | /** |
| 91 | * Unordered: firstspan in on the right side of the secondspan, |
| 92 | * but within max distance. |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 93 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 94 | @Test |
| 95 | public void testCase4 () throws IOException { |
| 96 | ki = new KrillIndex(); |
| 97 | ki.addDoc(createFieldDoc2()); |
| 98 | ki.commit(); |
| 99 | |
| 100 | SpanQuery sq; |
| 101 | // Unordered |
| 102 | sq = createQuery("s:b", "s:c", 2, 2, false); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 103 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 104 | assertEquals((long) 1, kr.getTotalResults()); |
| 105 | assertEquals(1, kr.getMatch(0).getStartPos()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 106 | assertEquals(2, kr.getMatch(0).getEndPos()); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 107 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 108 | |
| 109 | |
| 110 | /** |
| 111 | * Element queries |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 112 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 113 | @Test |
| 114 | public void testCase5 () throws IOException { |
| 115 | ki = new KrillIndex(); |
| 116 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 117 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 118 | |
| 119 | SpanDistanceQuery sq; |
| 120 | sq = new SpanDistanceQuery(new SpanElementQuery("base", "x"), |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 121 | new SpanElementQuery("base", "y"), |
| 122 | new DistanceConstraint(0, 1, false, true), true); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 123 | |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 124 | kr = ki.search(sq, (short) 10); |
| margaretha | 69726b1 | 2015-12-10 12:03:19 +0100 | [diff] [blame] | 125 | |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 126 | assertEquals((long) 1, kr.getTotalResults()); |
| 127 | assertEquals(9, kr.getMatch(0).getStartPos()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 128 | assertEquals(10, kr.getMatch(0).getEndPos()); |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 129 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 130 | |
| 131 | |
| 132 | // Add skipTo test |
| 133 | @Test |
| 134 | public void testCase6 () throws IOException { |
| 135 | ki = new KrillIndex(); |
| 136 | ki.addDoc(createFieldDoc1()); |
| 137 | ki.addDoc(createFieldDoc2()); |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 138 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 139 | |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 140 | SpanQuery sq; |
| 141 | //ordered distance 0 to 1 |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 142 | sq = createQuery("s:d", "s:b", 0, 1, true); |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 143 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 144 | assertEquals((long) 4, kr.getTotalResults()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 145 | |
| 146 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c")); |
| 147 | kr = ki.search(stq, (short) 10); |
| 148 | assertEquals((long) 6, kr.getTotalResults()); |
| 149 | |
| 150 | SpanNextQuery snq = new SpanNextQuery(stq, sq); |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 151 | kr = ki.search(snq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 152 | assertEquals((long) 2, kr.getTotalResults()); |
| 153 | assertEquals(3, kr.getMatch(0).getStartPos()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 154 | assertEquals(5, kr.getMatch(0).getEndPos()); |
| 155 | assertEquals(8, kr.getMatch(1).getStartPos()); |
| 156 | assertEquals(10, kr.getMatch(1).getEndPos()); |
| 157 | |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 158 | /*System.out.print(kr.getTotalResults()+"\n"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 159 | for (int i=0; i< kr.getTotalResults(); i++){ |
| 160 | System.out.println( |
| 161 | kr.getMatch(i).getLocalDocID()+" "+ |
| 162 | kr.getMatch(i).startPos + " " + |
| 163 | kr.getMatch(i).endPos |
| 164 | ); |
| 165 | }*/ |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 166 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 167 | |
| 168 | |
| 169 | private SpanQuery createQuery (String x, String y, int min, int max, |
| 170 | boolean isOrdered) { |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 171 | SpanDistanceQuery sq = new SpanDistanceQuery( |
| 172 | new SpanTermQuery(new Term("base", x)), |
| 173 | new SpanTermQuery(new Term("base", y)), |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 174 | new DistanceConstraint(min, max, isOrdered, true), true); |
| 175 | return sq; |
| 176 | } |
| 177 | |
| 178 | |
| 179 | private FieldDocument createFieldDoc0 () { |
| 180 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 181 | fd.addString("ID", "doc-0"); |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 182 | fd.addTV("base", "text", "[(0-1)s:c|_1$<i>0<i>1]" |
| 183 | + "[(1-2)s:e|_2$<i>1<i>2]" |
| 184 | + "[(2-3)s:c|_3$<i>2<i>3|<>:y$<b>64<i>2<i>4<i>4<b>0]" |
| 185 | + "[(3-4)s:c|_4$<i>3<i>4|<>:x$<b>64<i>3<i>7<i>7<b>0]" |
| 186 | + "[(4-5)s:d|_5$<i>4<i>5|<>:y$<b>64<i>4<i>6<i>6<b>0]" |
| 187 | + "[(5-6)s:c|_6$<i>5<i>6|<>:y$<b>64<i>5<i>8<i>8<b>0]" |
| 188 | + "[(6-7)s:d|_7$<i>6<i>7]" |
| 189 | + "[(7-8)s:e|_8$<i>7<i>8|<>:x$<b>64<i>7<i>9<i>9<b>0]" |
| 190 | + "[(8-9)s:e|_9$<i>8<i>9]" |
| 191 | + "[(9-10)s:d|_10$<i>9<i>10|<>:x$<b>64<i>9<i>10<i>10<b>0]"); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 192 | return fd; |
| 193 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 194 | |
| 195 | |
| 196 | private FieldDocument createFieldDoc1 () { |
| 197 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 198 | fd.addString("ID", "doc-1"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 199 | fd.addTV("base", "text", |
| 200 | "[(0-1)s:b|s:c|_1$<i>0<i>1]" + "[(1-2)s:b|_2$<i>1<i>2]" |
| 201 | + "[(2-3)s:c|_3$<i>2<i>3]" + "[(3-4)s:c|_4$<i>3<i>4]" |
| 202 | + "[(4-5)s:d|_5$<i>4<i>5]" + "[(5-6)s:d|_6$<i>5<i>6]"); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 203 | return fd; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 204 | } |
| 205 | |
| 206 | |
| 207 | private FieldDocument createFieldDoc2 () { |
| 208 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 209 | fd.addString("ID", "doc-2"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 210 | fd.addTV("base", "text", |
| 211 | "[(0-1)s:b|_1$<i>0<i>1]" + "[(1-2)s:b|_2$<i>1<i>2]" |
| 212 | + "[(2-3)s:c|_3$<i>2<i>3]" + "[(3-4)s:c|_4$<i>3<i>4]" |
| 213 | + "[(4-5)s:b|_5$<i>4<i>5]" + "[(5-6)s:d|_6$<i>5<i>6]" |
| 214 | + "[(6-7)s:b|_7$<i>6<i>7]" + "[(7-8)s:d|_8$<i>7<i>8]" |
| 215 | + "[(8-9)s:c|_9$<i>8<i>9]" |
| 216 | + "[(9-10)s:d|_10$<i>9<i>10]"); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 217 | return fd; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 218 | } |
| 219 | |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 220 | } |