| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 3 | import static org.junit.Assert.*; |
| 4 | |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanQuery; |
| 9 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 10 | import org.junit.Test; |
| 11 | |
| 12 | import de.ids_mannheim.korap.KorapIndex; |
| 13 | import de.ids_mannheim.korap.KorapResult; |
| Eliza Margaretha | d469346 | 2014-03-17 13:16:18 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.query.DistanceConstraint; |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 15 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 17 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 18 | |
| 19 | public class TestDistanceExclusionIndex { |
| 20 | |
| 21 | private KorapIndex ki; |
| 22 | private KorapResult kr; |
| 23 | |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 24 | /** Ordered, unordered |
| 25 | * */ |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 26 | @Test |
| 27 | public void testCase1() throws IOException{ |
| 28 | ki = new KorapIndex(); |
| 29 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 30 | ki.commit(); |
| 31 | SpanQuery sq; |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 32 | //ordered distance 0 to 1 |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 33 | sq = createQuery("s:c","s:e",0,1,true); |
| 34 | kr = ki.search(sq, (short) 10); |
| 35 | assertEquals(3, kr.getTotalResults()); |
| 36 | assertEquals(2, kr.match(0).getStartPos()); |
| 37 | assertEquals(3, kr.match(0).getEndPos()); |
| 38 | assertEquals(3, kr.match(1).getStartPos()); |
| 39 | assertEquals(4, kr.match(1).getEndPos()); |
| 40 | assertEquals(5, kr.match(2).getStartPos()); |
| 41 | assertEquals(6, kr.match(2).getEndPos()); |
| 42 | |
| 43 | // Unordered |
| 44 | sq = createQuery("s:c","s:e",0,1,false); |
| 45 | kr = ki.search(sq, (short) 10); |
| 46 | assertEquals(2, kr.getTotalResults()); |
| 47 | } |
| 48 | |
| 49 | /** Multiple docs, unordered |
| 50 | * No more secondSpans |
| 51 | * */ |
| 52 | @Test |
| 53 | public void testCase2() throws IOException{ |
| 54 | ki = new KorapIndex(); |
| 55 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 56 | ki.addDoc(createFieldDoc1()); |
| 57 | ki.commit(); |
| 58 | SpanQuery sq; |
| 59 | // ---- Distance 0 to 1 |
| 60 | sq = createQuery("s:c","s:e",0,1,false); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 61 | kr = ki.search(sq, (short) 10); |
| 62 | assertEquals(5, kr.getTotalResults()); |
| 63 | assertEquals(1, kr.match(3).getLocalDocID()); |
| 64 | } |
| 65 | |
| 66 | /** Secondspans' document number is bigger than firstspans' |
| 67 | * Actual distance is smaller than min distance. |
| 68 | * */ |
| 69 | @Test |
| 70 | public void testCase3() throws IOException{ |
| 71 | ki = new KorapIndex(); |
| 72 | ki.addDoc(createFieldDoc1()); |
| 73 | ki.addDoc(createFieldDoc0()); |
| 74 | ki.commit(); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 75 | |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 76 | SpanQuery sq; |
| 77 | // Unordered |
| 78 | sq = createQuery("s:c","s:e",2,2,false); |
| 79 | kr = ki.search(sq, (short) 10); |
| 80 | assertEquals(5, kr.getTotalResults()); |
| 81 | } |
| 82 | |
| 83 | /** Unordered: firstspan in on the right side of the secondspan, |
| 84 | * but within max distance. |
| 85 | * */ |
| 86 | @Test |
| 87 | public void testCase4() throws IOException{ |
| 88 | ki = new KorapIndex(); |
| 89 | ki.addDoc(createFieldDoc2()); |
| 90 | ki.commit(); |
| 91 | |
| 92 | SpanQuery sq; |
| 93 | // Unordered |
| 94 | sq = createQuery("s:b","s:c",2,2,false); |
| 95 | kr = ki.search(sq, (short) 10); |
| 96 | assertEquals(1, kr.getTotalResults()); |
| 97 | assertEquals(1, kr.match(0).getStartPos()); |
| 98 | assertEquals(2, kr.match(0).getEndPos()); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 99 | } |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 100 | |
| 101 | /** Element queries |
| 102 | * */ |
| 103 | @Test |
| 104 | public void testCase5() throws IOException{ |
| 105 | ki = new KorapIndex(); |
| 106 | ki.addDoc(createFieldDoc0()); |
| 107 | ki.commit(); |
| 108 | |
| 109 | SpanDistanceQuery sq; |
| 110 | sq = new SpanDistanceQuery( |
| 111 | new SpanElementQuery("base", "x"), |
| Eliza Margaretha | d469346 | 2014-03-17 13:16:18 +0000 | [diff] [blame] | 112 | new SpanElementQuery("base", "y"), |
| 113 | new DistanceConstraint(0, 1, false, true), |
| 114 | true); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 115 | |
| 116 | kr = ki.search(sq, (short) 10); |
| 117 | assertEquals(1, kr.getTotalResults()); |
| 118 | assertEquals(9, kr.match(0).getStartPos()); |
| 119 | assertEquals(10, kr.match(0).getEndPos()); |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 120 | } |
| 121 | |
| 122 | // Add skipTo test |
| 123 | @Test |
| 124 | public void testCase6() throws IOException{ |
| 125 | ki = new KorapIndex(); |
| 126 | ki.addDoc(createFieldDoc1()); |
| 127 | ki.addDoc(createFieldDoc2()); |
| 128 | ki.commit(); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 129 | |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 130 | SpanQuery sq; |
| 131 | //ordered distance 0 to 1 |
| 132 | sq = createQuery("s:d","s:b",0,1,true); |
| 133 | kr = ki.search(sq, (short) 10); |
| 134 | assertEquals(4, kr.getTotalResults()); |
| 135 | |
| 136 | SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c")); |
| 137 | kr = ki.search(stq, (short) 10); |
| 138 | assertEquals(6, kr.getTotalResults()); |
| 139 | |
| 140 | SpanNextQuery snq = new SpanNextQuery(stq,sq); |
| 141 | kr = ki.search(snq, (short) 10); |
| 142 | assertEquals(2, kr.getTotalResults()); |
| 143 | assertEquals(3, kr.match(0).getStartPos()); |
| 144 | assertEquals(5, kr.match(0).getEndPos()); |
| 145 | assertEquals(8, kr.match(1).getStartPos()); |
| 146 | assertEquals(10, kr.match(1).getEndPos()); |
| 147 | |
| 148 | /*System.out.print(kr.getTotalResults()+"\n"); |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 149 | for (int i=0; i< kr.getTotalResults(); i++){ |
| 150 | System.out.println( |
| 151 | kr.match(i).getLocalDocID()+" "+ |
| 152 | kr.match(i).startPos + " " + |
| 153 | kr.match(i).endPos |
| 154 | ); |
| Eliza Margaretha | 7eab4ef | 2014-02-13 16:55:16 +0000 | [diff] [blame] | 155 | }*/ |
| Eliza Margaretha | 38a9466 | 2014-11-20 13:48:00 +0000 | [diff] [blame] | 156 | } |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 157 | |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 158 | private SpanQuery createQuery(String x, String y, int min, int max, boolean isOrdered){ |
| 159 | SpanDistanceQuery sq = new SpanDistanceQuery( |
| 160 | new SpanTermQuery(new Term("base",x)), |
| 161 | new SpanTermQuery(new Term("base",y)), |
| Eliza Margaretha | d469346 | 2014-03-17 13:16:18 +0000 | [diff] [blame] | 162 | new DistanceConstraint(min, max, isOrdered,true), |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 163 | true |
| Eliza Margaretha | d469346 | 2014-03-17 13:16:18 +0000 | [diff] [blame] | 164 | ); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 165 | return sq; |
| 166 | } |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 167 | |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 168 | private FieldDocument createFieldDoc0(){ |
| 169 | FieldDocument fd = new FieldDocument(); |
| 170 | fd.addString("ID", "doc-0"); |
| 171 | fd.addTV("base", |
| 172 | "text", |
| 173 | "[(0-1)s:c|_1#0-1]" + |
| 174 | "[(1-2)s:e|_2#1-2]" + |
| 175 | "[(2-3)s:c|_3#2-3|<>:y#2-4$<i>4]" + |
| 176 | "[(3-4)s:c|_4#3-4|<>:x#3-7$<i>7]" + |
| 177 | "[(4-5)s:d|_5#4-5|<>:y#4-6$<i>6]" + |
| 178 | "[(5-6)s:c|_6#5-6|<>:y#5-8$<i>8]" + |
| 179 | "[(6-7)s:d|_7#6-7]" + |
| 180 | "[(7-8)s:e|_8#7-8|<>:x#7-9$<i>9]" + |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 181 | "[(8-9)s:e|_9#8-9]" + |
| 182 | "[(9-10)s:d|_10#9-10|<>:x#9-10$<i>10]"); |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 183 | return fd; |
| 184 | } |
| 185 | |
| 186 | private FieldDocument createFieldDoc1() { |
| 187 | FieldDocument fd = new FieldDocument(); |
| 188 | fd.addString("ID", "doc-1"); |
| 189 | fd.addTV("base", |
| 190 | "text", |
| 191 | "[(0-1)s:b|s:c|_1#0-1]" + |
| 192 | "[(1-2)s:b|_2#1-2]" + |
| 193 | "[(2-3)s:c|_3#2-3]" + |
| 194 | "[(3-4)s:c|_4#3-4]" + |
| 195 | "[(4-5)s:d|_5#4-5]" + |
| 196 | "[(5-6)s:d|_6#5-6]"); |
| 197 | return fd; |
| 198 | } |
| Eliza Margaretha | 63926cb | 2014-02-13 16:54:23 +0000 | [diff] [blame] | 199 | |
| 200 | private FieldDocument createFieldDoc2() { |
| 201 | FieldDocument fd = new FieldDocument(); |
| 202 | fd.addString("ID", "doc-2"); |
| 203 | fd.addTV("base", |
| 204 | "text", |
| 205 | "[(0-1)s:b|_1#0-1]" + |
| 206 | "[(1-2)s:b|_2#1-2]" + |
| 207 | "[(2-3)s:c|_3#2-3]" + |
| 208 | "[(3-4)s:c|_4#3-4]" + |
| 209 | "[(4-5)s:b|_5#4-5]" + |
| 210 | "[(5-6)s:d|_6#5-6]" + |
| 211 | "[(6-7)s:b|_7#6-7]" + |
| 212 | "[(7-8)s:d|_8#7-8]" + |
| 213 | "[(8-9)s:c|_9#8-9]" + |
| 214 | "[(9-10)s:d|_10#9-10]"); |
| 215 | return fd; |
| 216 | } |
| 217 | |
| Eliza Margaretha | 609fcc6 | 2014-02-13 14:10:20 +0000 | [diff] [blame] | 218 | } |