| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.junit.Test; |
| 8 | |
| 9 | import de.ids_mannheim.korap.KrillIndex; |
| 10 | import de.ids_mannheim.korap.query.DistanceConstraint; |
| 11 | import de.ids_mannheim.korap.query.SpanClassFilterQuery; |
| 12 | import de.ids_mannheim.korap.query.SpanClassFilterQuery.ClassOperation; |
| 13 | import de.ids_mannheim.korap.query.SpanClassQuery; |
| 14 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| 15 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 16 | import de.ids_mannheim.korap.response.Result; |
| 17 | |
| 18 | public class TestClassFilterIndex { |
| 19 | |
| 20 | private KrillIndex ki; |
| 21 | private Result kr; |
| 22 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 23 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 24 | @Test |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 25 | public void testInclude () throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 26 | ki = new KrillIndex(); |
| 27 | ki.addDoc(TestReferenceIndex.createFieldDoc0()); |
| 28 | ki.commit(); |
| 29 | |
| 30 | SpanElementQuery seq1 = new SpanElementQuery("tokens", "np"); |
| 31 | SpanElementQuery seq2 = new SpanElementQuery("tokens", "vp"); |
| 32 | SpanClassQuery scq1 = new SpanClassQuery(seq1, (byte) 1); |
| 33 | SpanClassQuery scq2 = new SpanClassQuery(seq2, (byte) 2); |
| 34 | SpanDistanceQuery sdq = new SpanDistanceQuery(scq1, scq2, |
| 35 | new DistanceConstraint(0, 1, false, false), true); |
| 36 | |
| 37 | SpanClassFilterQuery sq = new SpanClassFilterQuery(sdq, |
| 38 | ClassOperation.INCLUDE, 2, 1, true); |
| 39 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 40 | assertEquals(sq.toString(), |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 41 | "spanClassFilter(spanDistance({1: <tokens:np />}, {2: <tokens:vp />}, " |
| 42 | + "[(w[0:1], notOrdered, notExcluded)]),INCLUDE,2,1)"); |
| 43 | |
| 44 | kr = ki.search(sq, (short) 10); |
| 45 | // for (Match km : kr.getMatches()) { |
| 46 | // System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 47 | // + " " |
| 48 | // + km.getSnippetBrackets()); |
| 49 | // } |
| 50 | assertEquals(7, kr.getTotalResults()); |
| 51 | assertEquals(1, kr.getMatch(0).getStartPos()); |
| 52 | assertEquals(5, kr.getMatch(0).getEndPos()); |
| 53 | assertEquals( |
| 54 | "Frankenstein, [{2:treat {1:my daughter} well}]. She is the one that saved ...", |
| 55 | kr.getMatch(0).getSnippetBrackets()); |
| 56 | assertEquals(6, kr.getMatch(1).getStartPos()); |
| 57 | assertEquals(18, kr.getMatch(1).getEndPos()); |
| 58 | assertEquals( |
| 59 | "Frankenstein, treat my daughter well. She [{2:is {1:the one} that saved " |
| 60 | + "your master who you hold so dear}].", kr.getMatch(1) |
| 61 | .getSnippetBrackets()); |
| 62 | assertEquals( |
| 63 | "Frankenstein, treat my daughter well. She [{2:is {1:the one that " |
| 64 | + "saved your master who you hold so dear}}].", kr |
| 65 | .getMatch(2).getSnippetBrackets()); |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 66 | assertEquals( |
| 67 | "Frankenstein, treat my daughter well. She [{2:is the one that " |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 68 | + "saved {1:your master} who you hold so dear}].", kr |
| 69 | .getMatch(3).getSnippetBrackets()); |
| 70 | assertEquals( |
| 71 | "Frankenstein, treat my daughter well. She [{2:is the one that saved your master who {1:you} hold so dear}].", |
| 72 | kr.getMatch(4).getSnippetBrackets()); |
| 73 | |
| 74 | } |
| 75 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 76 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 77 | @Test |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 78 | public void testDisjoint () throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 79 | ki = new KrillIndex(); |
| 80 | ki.addDoc(TestReferenceIndex.createFieldDoc0()); |
| 81 | ki.commit(); |
| 82 | |
| 83 | SpanElementQuery seq1 = new SpanElementQuery("tokens", "np"); |
| 84 | SpanElementQuery seq2 = new SpanElementQuery("tokens", "vp"); |
| 85 | SpanClassQuery scq1 = new SpanClassQuery(seq1, (byte) 1); |
| 86 | SpanClassQuery scq2 = new SpanClassQuery(seq2, (byte) 2); |
| 87 | SpanDistanceQuery sdq = new SpanDistanceQuery(scq1, scq2, |
| 88 | new DistanceConstraint(0, 1, false, false), true); |
| 89 | |
| 90 | // kr = ki.search(sdq, (short) 10); |
| 91 | // for (Match km : kr.getMatches()) { |
| 92 | // System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 93 | // + " " |
| 94 | // + km.getSnippetBrackets()); |
| 95 | // } |
| 96 | |
| 97 | SpanClassFilterQuery sq = new SpanClassFilterQuery(sdq, |
| 98 | ClassOperation.DISJOINT, 2, 1, true); |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 99 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 100 | kr = ki.search(sq, (short) 10); |
| 101 | // for (Match km : kr.getMatches()) { |
| 102 | // System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 103 | // + " " |
| 104 | // + km.getSnippetBrackets()); |
| 105 | // } |
| 106 | assertEquals(0, kr.getMatch(0).getStartPos()); |
| 107 | assertEquals(5, kr.getMatch(0).getEndPos()); |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 108 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 109 | assertEquals( |
| 110 | "[{1:Frankenstein}, {2:treat my daughter well}]. She is the one that saved ...", |
| 111 | kr.getMatch(0).getSnippetBrackets()); |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 112 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 113 | assertEquals(1, kr.getMatch(1).getStartPos()); |
| 114 | assertEquals(6, kr.getMatch(1).getEndPos()); |
| 115 | assertEquals( |
| 116 | "Frankenstein, [{2:treat my daughter well}. {1:She}] is the one that saved your ...", |
| 117 | kr.getMatch(1).getSnippetBrackets()); |
| 118 | |
| 119 | assertEquals(5, kr.getMatch(2).getStartPos()); |
| 120 | assertEquals(18, kr.getMatch(2).getEndPos()); |
| 121 | assertEquals( |
| 122 | "Frankenstein, treat my daughter well. [{1:She} {2:is the one that saved your master who you hold so dear}].", |
| 123 | kr.getMatch(2).getSnippetBrackets()); |
| 124 | } |
| 125 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 126 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 127 | // Problem with SpanDistanceQuery - unordered distance spans, |
| 128 | // -> unsorted |
| 129 | @Test |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 130 | public void testEqual () throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 131 | ki = new KrillIndex(); |
| 132 | ki.addDoc(TestReferenceIndex.createFieldDoc0()); |
| 133 | ki.commit(); |
| 134 | |
| 135 | SpanElementQuery seq1 = new SpanElementQuery("tokens", "np"); |
| 136 | SpanElementQuery seq2 = new SpanElementQuery("tokens", "prp"); |
| 137 | SpanClassQuery scq1 = new SpanClassQuery(seq1, (byte) 1); |
| 138 | SpanClassQuery scq2 = new SpanClassQuery(seq2, (byte) 2); |
| 139 | SpanDistanceQuery sdq = new SpanDistanceQuery(scq1, scq2, |
| 140 | new DistanceConstraint(0, 1, false, false), true); |
| 141 | |
| 142 | kr = ki.search(sdq, (short) 10); |
| 143 | assertEquals(6, kr.getTotalResults()); |
| 144 | |
| 145 | kr = ki.search(scq2, (short) 10); |
| 146 | // for (Match km : kr.getMatches()) { |
| 147 | // System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 148 | // + " " |
| 149 | // + km.getSnippetBrackets()); |
| 150 | // } |
| 151 | |
| 152 | SpanClassFilterQuery sq = new SpanClassFilterQuery(sdq, |
| 153 | ClassOperation.EQUAL, 2, 1, true); |
| 154 | |
| 155 | kr = ki.search(sq, (short) 10); |
| 156 | assertEquals(5, kr.getMatch(0).getStartPos()); |
| 157 | assertEquals(6, kr.getMatch(0).getEndPos()); |
| 158 | assertEquals(14, kr.getMatch(1).getStartPos()); |
| 159 | assertEquals(15, kr.getMatch(1).getEndPos()); |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 160 | } |
| 161 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 162 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 163 | @Test |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 164 | public void testDiffer () throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 165 | ki = new KrillIndex(); |
| 166 | ki.addDoc(TestReferenceIndex.createFieldDoc0()); |
| 167 | ki.commit(); |
| 168 | |
| 169 | SpanElementQuery seq1 = new SpanElementQuery("tokens", "np"); |
| 170 | SpanElementQuery seq2 = new SpanElementQuery("tokens", "prp"); |
| 171 | SpanClassQuery scq1 = new SpanClassQuery(seq1, (byte) 1); |
| 172 | SpanClassQuery scq2 = new SpanClassQuery(seq2, (byte) 2); |
| 173 | SpanDistanceQuery sdq = new SpanDistanceQuery(scq1, scq2, |
| 174 | new DistanceConstraint(0, 2, false, false), true); |
| 175 | |
| 176 | SpanClassFilterQuery sq = new SpanClassFilterQuery(sdq, |
| 177 | ClassOperation.DIFFER, 1, 2, true); |
| 178 | kr = ki.search(sq, (short) 20); |
| 179 | // for (Match km : kr.getMatches()) { |
| 180 | // System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 181 | // + " " |
| 182 | // + km.getSnippetBrackets()); |
| 183 | // } |
| 184 | |
| 185 | assertEquals(9, kr.getTotalResults()); |
| 186 | assertEquals(0, kr.getMatch(0).getStartPos()); |
| 187 | assertEquals(3, kr.getMatch(0).getEndPos()); |
| 188 | assertEquals( |
| 189 | "[{1:Frankenstein}, treat {2:my}] daughter well. She is the one ...", |
| 190 | kr.getMatch(0).getSnippetBrackets()); |
| 191 | |
| 192 | assertEquals(5, kr.getMatch(3).getStartPos()); |
| 193 | assertEquals(9, kr.getMatch(3).getEndPos()); |
| 194 | assertEquals( |
| 195 | "Frankenstein, treat my daughter well. [{2:She} is {1:the one}] that saved your master who you ...", |
| 196 | kr.getMatch(3).getSnippetBrackets()); |
| 197 | // she is both prp and np |
| 198 | } |
| 199 | |
| 200 | |
| 201 | @Test |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 202 | public void testIntersect () throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 203 | ki = new KrillIndex(); |
| 204 | ki.addDoc(TestReferenceIndex.createFieldDoc0()); |
| 205 | ki.commit(); |
| 206 | |
| 207 | SpanElementQuery seq1 = new SpanElementQuery("tokens", "np"); |
| 208 | SpanElementQuery seq2 = new SpanElementQuery("tokens", "vb"); |
| 209 | SpanClassQuery scq = new SpanClassQuery(seq2, (byte) 3); |
| 210 | SpanDistanceQuery sdq = new SpanDistanceQuery(seq1, scq, |
| 211 | new DistanceConstraint(0, 1, false, false), true); |
| 212 | SpanClassQuery scq1 = new SpanClassQuery(sdq, (byte) 1); |
| 213 | |
| 214 | SpanElementQuery seq3 = new SpanElementQuery("tokens", "prp"); |
| 215 | SpanDistanceQuery sdq2 = new SpanDistanceQuery(seq3, seq2, |
| 216 | new DistanceConstraint(0, 1, false, false), true); |
| 217 | SpanClassQuery scq2 = new SpanClassQuery(sdq2, (byte) 2); |
| 218 | |
| 219 | SpanDistanceQuery sdq3 = new SpanDistanceQuery(scq1, scq2, |
| 220 | new DistanceConstraint(0, 1, false, false), true); |
| 221 | |
| 222 | SpanClassFilterQuery sq = new SpanClassFilterQuery(sdq3, |
| 223 | ClassOperation.INTERSECT, 1, 2, true); |
| 224 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 225 | assertEquals( |
| 226 | "spanClassFilter(spanDistance({1: spanDistance(<tokens:np />, " |
| 227 | + "{3: <tokens:vb />}, [(w[0:1], notOrdered, notExcluded)])}, " |
| 228 | + "{2: spanDistance(<tokens:prp />, <tokens:vb />, [(w[0:1], " |
| 229 | + "notOrdered, notExcluded)])}, [(w[0:1], notOrdered, notExcluded)]),INTERSECT,1,2)", |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 230 | sq.toString()); |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 231 | |
| 232 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 233 | kr = ki.search(sq, (short) 20); |
| 234 | |
| 235 | // for (Match km : kr.getMatches()) { |
| 236 | // System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 237 | // + " " |
| 238 | // + km.getSnippetBrackets()); |
| 239 | // } |
| 240 | |
| 241 | assertEquals(13, kr.getTotalResults()); |
| 242 | assertEquals(0, kr.getMatch(0).getStartPos()); |
| 243 | assertEquals(3, kr.getMatch(0).getEndPos()); |
| 244 | assertEquals( |
| 245 | "[{1:Frankenstein, {2:{3:treat}}}{2: my}] daughter well. She is the one ...", |
| 246 | kr.getMatch(0).getSnippetBrackets()); |
| 247 | assertEquals(1, kr.getMatch(1).getStartPos()); |
| 248 | assertEquals(4, kr.getMatch(1).getEndPos()); |
| 249 | assertEquals( |
| 250 | "Frankenstein, [{1:{2:{3:treat} my} daughter}] well. She is the one that ...", |
| 251 | kr.getMatch(1).getSnippetBrackets()); |
| 252 | } |
| 253 | |
| 254 | |
| 255 | @Test |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 256 | public void testMultipleSameClasses () throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 257 | |
| 258 | ki = new KrillIndex(); |
| 259 | ki.addDoc(TestReferenceIndex.createFieldDoc0()); |
| 260 | ki.commit(); |
| 261 | |
| 262 | SpanElementQuery seq1 = new SpanElementQuery("tokens", "nn"); |
| 263 | SpanElementQuery seq = new SpanElementQuery("tokens", "prp"); |
| 264 | SpanClassQuery scq1 = new SpanClassQuery(seq1, (byte) 1); |
| 265 | SpanClassQuery scq = new SpanClassQuery(seq, (byte) 1); |
| 266 | |
| 267 | SpanDistanceQuery sdq = new SpanDistanceQuery(scq, scq1, |
| 268 | new DistanceConstraint(3, 5, false, false), true); |
| 269 | |
| 270 | SpanElementQuery seq2 = new SpanElementQuery("tokens", "vp"); |
| 271 | SpanClassQuery scq2 = new SpanClassQuery(seq2, (byte) 2); |
| 272 | |
| 273 | SpanDistanceQuery sdq2 = new SpanDistanceQuery(sdq, scq2, |
| 274 | new DistanceConstraint(0, 1, false, false), true); |
| 275 | |
| 276 | SpanClassFilterQuery sq = new SpanClassFilterQuery(sdq2, |
| 277 | ClassOperation.INCLUDE, 2, 1, true); |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 278 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 279 | kr = ki.search(sdq2, (short) 20); |
| 280 | assertEquals(6, kr.getTotalResults()); |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 281 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 282 | // for (Match km : kr.getMatches()) { |
| 283 | // System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 284 | // + " " |
| 285 | // + km.getSnippetBrackets()); |
| 286 | // } |
| 287 | |
| 288 | kr = ki.search(sq, (short) 20); |
| 289 | |
| 290 | // for (Match km : kr.getMatches()) { |
| 291 | // System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 292 | // + " " |
| 293 | // + km.getSnippetBrackets()); |
| 294 | // } |
| 295 | |
| 296 | assertEquals(6, kr.getMatch(0).getStartPos()); |
| 297 | assertEquals(18, kr.getMatch(0).getEndPos()); |
| 298 | assertEquals( |
| 299 | "Frankenstein, treat my daughter well. She [{2:is the {1:one} that saved {1:your} master who you hold so dear}].", |
| 300 | kr.getMatch(0).getSnippetBrackets()); |
| 301 | } |
| 302 | |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 303 | } |