| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanQuery; |
| 9 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 10 | import org.junit.Test; |
| 11 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 12 | import de.ids_mannheim.korap.KrillIndex; |
| Eliza Margaretha | d469346 | 2014-03-17 13:16:18 +0000 | [diff] [blame] | 13 | import de.ids_mannheim.korap.query.DistanceConstraint; |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| 15 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 16 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 17 | import de.ids_mannheim.korap.response.Result; |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 18 | |
| 19 | public class TestElementDistanceExclusionIndex { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 20 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 21 | Result kr; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 22 | KrillIndex ki; |
| 23 | |
| 24 | |
| 25 | private SpanQuery createQuery (String e, String x, String y, int min, |
| 26 | int max, boolean isOrdered, boolean exclusion) { |
| 27 | SpanElementQuery eq = new SpanElementQuery("base", e); |
| 28 | SpanDistanceQuery sq = new SpanDistanceQuery(new SpanTermQuery( |
| 29 | new Term("base", x)), new SpanTermQuery(new Term("base", y)), |
| 30 | new DistanceConstraint(eq, min, max, isOrdered, exclusion), |
| 31 | true); |
| 32 | return sq; |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 33 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 34 | |
| 35 | |
| 36 | private FieldDocument createFieldDoc0 () { |
| 37 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 38 | fd.addString("ID", "doc-0"); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 39 | fd.addTV("base", "ceccdcdecd", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 40 | "[(0-1)s:c|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]" |
| 41 | + "[(1-2)s:e|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]" |
| 42 | + "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 43 | + "[(3-4)s:c|_4$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 44 | + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 45 | + "[(5-6)s:c|_6$<i>5<i>6]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 46 | + "[(6-7)s:d|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]" |
| 47 | + "[(7-8)s:e|_8$<i>7<i>8|<>:s$<b>64<i>7<i>9<i>9<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 48 | + "[(8-9)s:c|_9$<i>8<i>9]" |
| 49 | + "[(9-10)s:d|_10$<i>9<i>10]"); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 50 | return fd; |
| 51 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 52 | |
| 53 | |
| 54 | private FieldDocument createFieldDoc1 () { |
| 55 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 56 | fd.addString("ID", "doc-1"); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 57 | fd.addTV("base", "eedadaeed", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 58 | "[(0-1)s:e|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]" |
| 59 | + "[(1-2)s:e|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]" |
| 60 | + "[(2-3)s:d|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 61 | + "[(3-4)s:a|_4$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 62 | + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>7<i>6<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 63 | + "[(5-6)s:a|_6$<i>5<i>6]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 64 | + "[(6-7)s:e|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>9<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 65 | + "[(7-8)s:e|_8$<i>7<i>8]" + "[(8-9)s:d|_9$<i>8<i>9]"); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 66 | return fd; |
| 67 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 68 | |
| 69 | |
| 70 | private FieldDocument createFieldDoc2 () { |
| 71 | FieldDocument fd = new FieldDocument(); |
| 72 | fd.addString("ID", "doc-"); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 73 | fd.addTV("base", "dcacacdac", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 74 | "[(0-1)s:d|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]" |
| 75 | + "[(1-2)s:c|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]" |
| 76 | + "[(2-3)s:a|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 77 | + "[(3-4)s:c|_4$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 78 | + "[(4-5)s:a|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 79 | + "[(5-6)s:c|_6$<i>5<i>6]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 80 | + "[(6-7)s:d|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]" |
| 81 | + "[(7-8)s:a|_8$<i>7<i>8|<>:s$<b>64<i>7<i>9<i>9<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 82 | + "[(8-9)s:c|_9$<i>8<i>9]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 83 | return fd; |
| 84 | } |
| 85 | |
| 86 | |
| 87 | /** |
| 88 | * Distance Zero, unordered |
| 89 | * There is a secondspan on the right side |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 90 | * */ |
| 91 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 92 | public void testCase1 () throws IOException { |
| 93 | ki = new KrillIndex(); |
| 94 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 95 | ki.commit(); |
| 96 | SpanQuery sq; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 97 | sq = createQuery("s", "s:d", "s:c", 0, 0, false, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 98 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 99 | assertEquals(kr.getTotalResults(), 1); |
| 100 | assertEquals(6, kr.getMatch(0).startPos); |
| 101 | assertEquals(7, kr.getMatch(0).endPos); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 102 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 103 | |
| 104 | |
| 105 | /** |
| 106 | * There is another firstspan within max distance |
| 107 | * Unordered |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 108 | * */ |
| 109 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 110 | public void testCase2 () throws IOException { |
| 111 | ki = new KrillIndex(); |
| 112 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 113 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 114 | SpanQuery sq; |
| 115 | |
| 116 | sq = createQuery("s", "s:c", "s:d", 0, 0, false, true); |
| 117 | kr = ki.search(sq, (short) 10); |
| 118 | |
| 119 | assertEquals(kr.getTotalResults(), 4); |
| 120 | assertEquals(0, kr.getMatch(0).startPos); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 121 | assertEquals(1, kr.getMatch(0).endPos); |
| 122 | assertEquals(2, kr.getMatch(1).startPos); |
| 123 | assertEquals(3, kr.getMatch(1).endPos); |
| 124 | assertEquals(3, kr.getMatch(2).startPos); |
| 125 | assertEquals(4, kr.getMatch(2).endPos); |
| 126 | assertEquals(8, kr.getMatch(3).startPos); |
| 127 | assertEquals(9, kr.getMatch(3).endPos); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 128 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 129 | |
| 130 | |
| 131 | /** |
| 132 | * Distance 0-1, ordered, unordered |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 133 | * */ |
| 134 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 135 | public void testCase3 () throws IOException { |
| 136 | ki = new KrillIndex(); |
| 137 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 138 | ki.commit(); |
| 139 | SpanQuery sq; |
| 140 | // unordered |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 141 | sq = createQuery("s", "s:c", "s:e", 0, 1, false, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 142 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 143 | assertEquals(kr.getTotalResults(), 1); |
| 144 | assertEquals(5, kr.getMatch(0).startPos); |
| 145 | assertEquals(6, kr.getMatch(0).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 146 | |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 147 | //ordered |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 148 | sq = createQuery("s", "s:c", "s:e", 0, 1, true, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 149 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 150 | assertEquals(kr.getTotalResults(), 3); |
| 151 | assertEquals(2, kr.getMatch(0).startPos); |
| 152 | assertEquals(3, kr.getMatch(0).endPos); |
| 153 | assertEquals(3, kr.getMatch(1).startPos); |
| 154 | assertEquals(4, kr.getMatch(1).endPos); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 155 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 156 | |
| 157 | |
| 158 | /** |
| 159 | * Multiple documents, ordered |
| 160 | * No more secondspans, but there is still a firstspan |
| 161 | * */ |
| 162 | @Test |
| 163 | public void testCase4 () throws IOException { |
| 164 | ki = new KrillIndex(); |
| 165 | ki.addDoc(createFieldDoc0()); |
| 166 | ki.addDoc(createFieldDoc1()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 167 | ki.commit(); |
| 168 | SpanQuery sq; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 169 | |
| 170 | sq = createQuery("s", "s:d", "s:e", 1, 1, true, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 171 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 172 | |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 173 | assertEquals(kr.getTotalResults(), 3); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 174 | assertEquals(4, kr.getMatch(0).startPos); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 175 | assertEquals(5, kr.getMatch(0).endPos); |
| 176 | assertEquals(1, kr.getMatch(1).getLocalDocID()); |
| 177 | assertEquals(2, kr.getMatch(1).startPos); |
| 178 | assertEquals(3, kr.getMatch(1).endPos); |
| 179 | assertEquals(8, kr.getMatch(2).startPos); |
| 180 | assertEquals(9, kr.getMatch(2).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 181 | } |
| 182 | |
| 183 | |
| 184 | /** |
| 185 | * Skip to |
| 186 | * */ |
| 187 | @Test |
| 188 | public void testCase5 () throws IOException { |
| 189 | ki = new KrillIndex(); |
| 190 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 191 | ki.addDoc(createFieldDoc1()); |
| 192 | ki.addDoc(createFieldDoc0()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 193 | ki.addDoc(createFieldDoc2()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 194 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 195 | |
| 196 | SpanQuery sq = createQuery("s", "s:c", "s:d", 1, 1, false, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 197 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | aa5c1d3 | 2014-03-20 23:46:55 +0000 | [diff] [blame] | 198 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 199 | assertEquals(kr.getTotalResults(), 3); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 200 | assertEquals(3, kr.getMatch(2).getLocalDocID()); |
| 201 | assertEquals(3, kr.getMatch(2).startPos); |
| 202 | assertEquals(4, kr.getMatch(2).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 203 | |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 204 | sq = new SpanNextQuery( |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 205 | createQuery("s", "s:c", "s:d", 1, 1, false, true), |
| 206 | new SpanTermQuery(new Term("base", "s:a"))); |
| 207 | |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 208 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 209 | assertEquals(kr.getTotalResults(), 1); |
| 210 | assertEquals(3, kr.getMatch(0).getLocalDocID()); |
| 211 | assertEquals(3, kr.getMatch(0).startPos); |
| 212 | assertEquals(5, kr.getMatch(0).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 213 | } |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 214 | } |