| margaretha | c35e8a0 | 2017-09-11 16:34:20 +0200 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | import java.util.ArrayList; |
| 7 | |
| 8 | import org.apache.lucene.index.Term; |
| 9 | import org.apache.lucene.search.RegexpQuery; |
| 10 | import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; |
| 11 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 12 | import org.junit.Test; |
| 13 | |
| 14 | import de.ids_mannheim.korap.KrillIndex; |
| 15 | import de.ids_mannheim.korap.query.DistanceConstraint; |
| 16 | import de.ids_mannheim.korap.query.SpanClassQuery; |
| 17 | import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery; |
| 18 | import de.ids_mannheim.korap.response.Result; |
| 19 | |
| 20 | public class TestRegexIndex { |
| 21 | |
| 22 | private SpanTermQuery sq; |
| 23 | private KrillIndex ki; |
| 24 | private Result kr; |
| 25 | private ArrayList<DistanceConstraint> constraints; |
| 26 | |
| 27 | |
| 28 | public TestRegexIndex () { |
| 29 | // &Erfahrung |
| 30 | sq = new SpanTermQuery(new Term("tokens", "tt/l:Erfahrung")); |
| 31 | |
| 32 | // /+w1:2,s0 |
| 33 | constraints = new ArrayList<DistanceConstraint>(); |
| 34 | constraints.add(TestMultipleDistanceIndex.createConstraint("w", 1, 2, |
| 35 | true, false)); |
| 36 | constraints.add(TestMultipleDistanceIndex.createConstraint("tokens", |
| 37 | "base/s:s", 0, 0, true, false)); |
| 38 | } |
| 39 | |
| 40 | |
| 41 | private FieldDocument createFieldDoc1 () { |
| 42 | FieldDocument fd = new FieldDocument(); |
| 43 | fd.addString("ID", "doc-1"); |
| 44 | fd.addTV("tokens", "text", |
| 45 | "[(0-1)s:meine|_1$<i>0<i>1|<>:base/s:s$<b>64<i>0<i>9<i>10<b>0]" |
| 46 | + "[(1-2)tt/l:Erfahrung|_2$<i>1<i>2]" |
| 47 | + "[(2-3)s:meiner|_3$<i>2<i>3]" |
| 48 | + "[(3-4)tt/l:Erfahrung|_4$<i>3<i>4]" |
| 49 | + "[(4-5)s:mein|_5$<i>4<i>5]" |
| 50 | + "[(5-6)tt/l:Erfahrung|_6$<i>5<i>6]" |
| 51 | + "[(6-7)s:meinem|_7$<i>6<i>7]" |
| 52 | + "[(7-8)tt/l:Erfahrung|_8$<i>7<i>8]" |
| 53 | + "[(8-9)s:meinen|_9$<i>8<i>9]" |
| 54 | + "[(9-10)tt/l:Erfahrung|_10$<i>9<i>10]"); |
| 55 | return fd; |
| 56 | } |
| 57 | |
| 58 | |
| 59 | @Test |
| 60 | public void testWildcardStarRewritten () throws IOException { |
| 61 | ki = new KrillIndex(); |
| 62 | ki.addDoc(createFieldDoc1()); |
| 63 | ki.commit(); |
| 64 | |
| 65 | // meine* /+w1:2,s0 &Erfahrung |
| 66 | // rewritten into meine.* |
| 67 | RegexpQuery wcquery = new RegexpQuery(new Term("tokens", "s:meine.*")); |
| 68 | SpanMultiTermQueryWrapper<RegexpQuery> mtq = |
| 69 | new SpanMultiTermQueryWrapper<RegexpQuery>(wcquery); |
| 70 | SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( |
| 71 | new SpanClassQuery(mtq, (byte) 129), |
| 72 | new SpanClassQuery(sq, (byte) 129), constraints, true, true); |
| 73 | |
| 74 | |
| 75 | kr = ki.search(mdsq, (short) 10); |
| 76 | assertEquals(4, kr.getMatches().size()); |
| 77 | } |
| 78 | |
| 79 | |
| 80 | @Test |
| 81 | public void testWildcardQuestionMarkRewritten () throws IOException { |
| 82 | ki = new KrillIndex(); |
| 83 | ki.addDoc(createFieldDoc1()); |
| 84 | ki.commit(); |
| 85 | |
| 86 | // meine? /+w1:2,s0 &Erfahrung |
| 87 | // meine? rewritten into meine. |
| 88 | SpanMultiTermQueryWrapper<RegexpQuery> mtq = |
| 89 | new SpanMultiTermQueryWrapper<RegexpQuery>( |
| 90 | new RegexpQuery(new Term("tokens", "s:meine."))); |
| 91 | SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( |
| 92 | new SpanClassQuery(mtq, (byte) 129), |
| 93 | new SpanClassQuery(sq, (byte) 129), constraints, true, true); |
| 94 | |
| 95 | kr = ki.search(mdsq, (short) 10); |
| 96 | assertEquals(3, kr.getMatches().size()); |
| 97 | } |
| 98 | |
| 99 | |
| 100 | |
| 101 | @Test |
| 102 | public void testWildcardPlusRewritten () throws IOException { |
| 103 | ki = new KrillIndex(); |
| 104 | ki.addDoc(createFieldDoc1()); |
| 105 | ki.commit(); |
| 106 | |
| 107 | // C2 meine+ /+w1:2,s0 &Erfahrung |
| 108 | // meine+ rewritten into meine.? |
| 109 | SpanMultiTermQueryWrapper<RegexpQuery> mtq = |
| 110 | new SpanMultiTermQueryWrapper<RegexpQuery>( |
| 111 | new RegexpQuery(new Term("tokens", "s:meine.?"))); |
| 112 | SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( |
| 113 | new SpanClassQuery(mtq, (byte) 129), |
| 114 | new SpanClassQuery(sq, (byte) 129), constraints, true, true); |
| 115 | |
| 116 | kr = ki.search(mdsq, (short) 10); |
| 117 | assertEquals(4, kr.getMatches().size()); |
| 118 | } |
| 119 | |
| 120 | @Test |
| 121 | public void testWildcardPlusRewritten2 () throws IOException { |
| 122 | ki = new KrillIndex(); |
| 123 | ki.addDoc(createFieldDoc1()); |
| 124 | ki.commit(); |
| 125 | |
| 126 | // C2 mein+ /+w1:2,s0 &Erfahrung |
| 127 | // mein+ rewritten into mein.? |
| 128 | SpanMultiTermQueryWrapper<RegexpQuery> mtq = |
| 129 | new SpanMultiTermQueryWrapper<RegexpQuery>( |
| 130 | new RegexpQuery(new Term("tokens", "s:mein.?"))); |
| 131 | SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( |
| 132 | new SpanClassQuery(mtq, (byte) 129), |
| 133 | new SpanClassQuery(sq, (byte) 129), constraints, true, true); |
| 134 | |
| 135 | kr = ki.search(mdsq, (short) 10); |
| 136 | assertEquals(2, kr.getMatches().size()); |
| 137 | } |
| 138 | |
| 139 | } |
| 140 | |