| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanQuery; |
| 9 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 10 | import org.junit.Test; |
| 11 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 12 | import de.ids_mannheim.korap.KrillIndex; |
| Eliza Margaretha | d469346 | 2014-03-17 13:16:18 +0000 | [diff] [blame] | 13 | import de.ids_mannheim.korap.query.DistanceConstraint; |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| 15 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 16 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 17 | import de.ids_mannheim.korap.response.Result; |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 18 | |
| 19 | public class TestElementDistanceExclusionIndex { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 20 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 21 | Result kr; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 22 | KrillIndex ki; |
| 23 | |
| 24 | |
| 25 | private SpanQuery createQuery (String e, String x, String y, int min, |
| 26 | int max, boolean isOrdered, boolean exclusion) { |
| 27 | SpanElementQuery eq = new SpanElementQuery("base", e); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 28 | SpanDistanceQuery sq = new SpanDistanceQuery( |
| 29 | new SpanTermQuery(new Term("base", x)), |
| 30 | new SpanTermQuery(new Term("base", y)), |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 31 | new DistanceConstraint(eq, min, max, isOrdered, exclusion), |
| 32 | true); |
| 33 | return sq; |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 34 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 35 | |
| 36 | |
| 37 | private FieldDocument createFieldDoc0 () { |
| 38 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 39 | fd.addString("ID", "doc-0"); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 40 | fd.addTV("base", "ceccdcdecd", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 41 | "[(0-1)s:c|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]" |
| 42 | + "[(1-2)s:e|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]" |
| 43 | + "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 44 | + "[(3-4)s:c|_4$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 45 | + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 46 | + "[(5-6)s:c|_6$<i>5<i>6]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 47 | + "[(6-7)s:d|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]" |
| 48 | + "[(7-8)s:e|_8$<i>7<i>8|<>:s$<b>64<i>7<i>9<i>9<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 49 | + "[(8-9)s:c|_9$<i>8<i>9]" |
| 50 | + "[(9-10)s:d|_10$<i>9<i>10]"); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 51 | return fd; |
| 52 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 53 | |
| 54 | |
| 55 | private FieldDocument createFieldDoc1 () { |
| 56 | FieldDocument fd = new FieldDocument(); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 57 | fd.addString("ID", "doc-1"); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 58 | fd.addTV("base", "eedadaeed", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 59 | "[(0-1)s:e|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]" |
| 60 | + "[(1-2)s:e|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]" |
| 61 | + "[(2-3)s:d|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 62 | + "[(3-4)s:a|_4$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 63 | + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>7<i>6<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 64 | + "[(5-6)s:a|_6$<i>5<i>6]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 65 | + "[(6-7)s:e|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>9<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 66 | + "[(7-8)s:e|_8$<i>7<i>8]" + "[(8-9)s:d|_9$<i>8<i>9]"); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 67 | return fd; |
| 68 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 69 | |
| 70 | |
| 71 | private FieldDocument createFieldDoc2 () { |
| 72 | FieldDocument fd = new FieldDocument(); |
| 73 | fd.addString("ID", "doc-"); |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 74 | fd.addTV("base", "dcacacdac", |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 75 | "[(0-1)s:d|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]" |
| 76 | + "[(1-2)s:c|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]" |
| 77 | + "[(2-3)s:a|_3$<i>2<i>3|<>:s$<b>64<i>2<i>4<i>4<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 78 | + "[(3-4)s:c|_4$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 79 | + "[(4-5)s:a|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 80 | + "[(5-6)s:c|_6$<i>5<i>6]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 81 | + "[(6-7)s:d|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]" |
| 82 | + "[(7-8)s:a|_8$<i>7<i>8|<>:s$<b>64<i>7<i>9<i>9<b>0]" |
| margaretha | 71c66ee | 2015-12-11 14:39:55 +0100 | [diff] [blame] | 83 | + "[(8-9)s:c|_9$<i>8<i>9]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 84 | return fd; |
| 85 | } |
| 86 | |
| 87 | |
| 88 | /** |
| 89 | * Distance Zero, unordered |
| 90 | * There is a secondspan on the right side |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 91 | */ |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 92 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 93 | public void testCase1 () throws IOException { |
| 94 | ki = new KrillIndex(); |
| 95 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 96 | ki.commit(); |
| 97 | SpanQuery sq; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 98 | sq = createQuery("s", "s:d", "s:c", 0, 0, false, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 99 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 100 | assertEquals(kr.getTotalResults(), 1); |
| 101 | assertEquals(6, kr.getMatch(0).startPos); |
| 102 | assertEquals(7, kr.getMatch(0).endPos); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 103 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 104 | |
| 105 | |
| 106 | /** |
| 107 | * There is another firstspan within max distance |
| 108 | * Unordered |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 109 | */ |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 110 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 111 | public void testCase2 () throws IOException { |
| 112 | ki = new KrillIndex(); |
| 113 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 114 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 115 | SpanQuery sq; |
| 116 | |
| 117 | sq = createQuery("s", "s:c", "s:d", 0, 0, false, true); |
| 118 | kr = ki.search(sq, (short) 10); |
| 119 | |
| 120 | assertEquals(kr.getTotalResults(), 4); |
| 121 | assertEquals(0, kr.getMatch(0).startPos); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 122 | assertEquals(1, kr.getMatch(0).endPos); |
| 123 | assertEquals(2, kr.getMatch(1).startPos); |
| 124 | assertEquals(3, kr.getMatch(1).endPos); |
| 125 | assertEquals(3, kr.getMatch(2).startPos); |
| 126 | assertEquals(4, kr.getMatch(2).endPos); |
| 127 | assertEquals(8, kr.getMatch(3).startPos); |
| 128 | assertEquals(9, kr.getMatch(3).endPos); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 129 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 130 | |
| 131 | |
| 132 | /** |
| 133 | * Distance 0-1, ordered, unordered |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 134 | */ |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 135 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 136 | public void testCase3 () throws IOException { |
| 137 | ki = new KrillIndex(); |
| 138 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 139 | ki.commit(); |
| 140 | SpanQuery sq; |
| 141 | // unordered |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 142 | sq = createQuery("s", "s:c", "s:e", 0, 1, false, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 143 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 144 | assertEquals(kr.getTotalResults(), 1); |
| 145 | assertEquals(5, kr.getMatch(0).startPos); |
| 146 | assertEquals(6, kr.getMatch(0).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 147 | |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 148 | //ordered |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 149 | sq = createQuery("s", "s:c", "s:e", 0, 1, true, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 150 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 151 | assertEquals(kr.getTotalResults(), 3); |
| 152 | assertEquals(2, kr.getMatch(0).startPos); |
| 153 | assertEquals(3, kr.getMatch(0).endPos); |
| 154 | assertEquals(3, kr.getMatch(1).startPos); |
| 155 | assertEquals(4, kr.getMatch(1).endPos); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 156 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 157 | |
| 158 | |
| 159 | /** |
| 160 | * Multiple documents, ordered |
| 161 | * No more secondspans, but there is still a firstspan |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 162 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 163 | @Test |
| 164 | public void testCase4 () throws IOException { |
| 165 | ki = new KrillIndex(); |
| 166 | ki.addDoc(createFieldDoc0()); |
| 167 | ki.addDoc(createFieldDoc1()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 168 | ki.commit(); |
| 169 | SpanQuery sq; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 170 | |
| 171 | sq = createQuery("s", "s:d", "s:e", 1, 1, true, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 172 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 173 | |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 174 | assertEquals(kr.getTotalResults(), 3); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 175 | assertEquals(4, kr.getMatch(0).startPos); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 176 | assertEquals(5, kr.getMatch(0).endPos); |
| 177 | assertEquals(1, kr.getMatch(1).getLocalDocID()); |
| 178 | assertEquals(2, kr.getMatch(1).startPos); |
| 179 | assertEquals(3, kr.getMatch(1).endPos); |
| 180 | assertEquals(8, kr.getMatch(2).startPos); |
| 181 | assertEquals(9, kr.getMatch(2).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 182 | } |
| 183 | |
| 184 | |
| 185 | /** |
| 186 | * Skip to |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 187 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 188 | @Test |
| 189 | public void testCase5 () throws IOException { |
| 190 | ki = new KrillIndex(); |
| 191 | ki.addDoc(createFieldDoc0()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 192 | ki.addDoc(createFieldDoc1()); |
| 193 | ki.addDoc(createFieldDoc0()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 194 | ki.addDoc(createFieldDoc2()); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 195 | ki.commit(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 196 | |
| 197 | SpanQuery sq = createQuery("s", "s:c", "s:d", 1, 1, false, true); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 198 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | aa5c1d3 | 2014-03-20 23:46:55 +0000 | [diff] [blame] | 199 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 200 | assertEquals(kr.getTotalResults(), 3); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 201 | assertEquals(3, kr.getMatch(2).getLocalDocID()); |
| 202 | assertEquals(3, kr.getMatch(2).startPos); |
| 203 | assertEquals(4, kr.getMatch(2).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 204 | |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 205 | sq = new SpanNextQuery( |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 206 | createQuery("s", "s:c", "s:d", 1, 1, false, true), |
| 207 | new SpanTermQuery(new Term("base", "s:a"))); |
| 208 | |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 209 | kr = ki.search(sq, (short) 10); |
| Nils Diewald | e1ecd5e | 2014-11-27 02:17:24 +0000 | [diff] [blame] | 210 | assertEquals(kr.getTotalResults(), 1); |
| 211 | assertEquals(3, kr.getMatch(0).getLocalDocID()); |
| 212 | assertEquals(3, kr.getMatch(0).startPos); |
| 213 | assertEquals(5, kr.getMatch(0).endPos); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 214 | } |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 215 | } |