blob: 8ca339e971416d68ba1c3968984ceee98ff64a48 [file] [log] [blame]
margarethac35e8a02017-09-11 16:34:20 +02001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6import java.util.ArrayList;
7
8import org.apache.lucene.index.Term;
9import org.apache.lucene.search.RegexpQuery;
10import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
11import org.apache.lucene.search.spans.SpanTermQuery;
12import org.junit.Test;
13
14import de.ids_mannheim.korap.KrillIndex;
15import de.ids_mannheim.korap.query.DistanceConstraint;
16import de.ids_mannheim.korap.query.SpanClassQuery;
17import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
18import de.ids_mannheim.korap.response.Result;
19
20public class TestRegexIndex {
21
22 private SpanTermQuery sq;
23 private KrillIndex ki;
24 private Result kr;
25 private ArrayList<DistanceConstraint> constraints;
26
27
28 public TestRegexIndex () {
29 // &Erfahrung
30 sq = new SpanTermQuery(new Term("tokens", "tt/l:Erfahrung"));
31
32 // /+w1:2,s0
33 constraints = new ArrayList<DistanceConstraint>();
34 constraints.add(TestMultipleDistanceIndex.createConstraint("w", 1, 2,
35 true, false));
36 constraints.add(TestMultipleDistanceIndex.createConstraint("tokens",
37 "base/s:s", 0, 0, true, false));
38 }
39
40
41 private FieldDocument createFieldDoc1 () {
42 FieldDocument fd = new FieldDocument();
43 fd.addString("ID", "doc-1");
44 fd.addTV("tokens", "text",
45 "[(0-1)s:meine|_1$<i>0<i>1|<>:base/s:s$<b>64<i>0<i>9<i>10<b>0]"
46 + "[(1-2)tt/l:Erfahrung|_2$<i>1<i>2]"
47 + "[(2-3)s:meiner|_3$<i>2<i>3]"
48 + "[(3-4)tt/l:Erfahrung|_4$<i>3<i>4]"
49 + "[(4-5)s:mein|_5$<i>4<i>5]"
50 + "[(5-6)tt/l:Erfahrung|_6$<i>5<i>6]"
51 + "[(6-7)s:meinem|_7$<i>6<i>7]"
52 + "[(7-8)tt/l:Erfahrung|_8$<i>7<i>8]"
53 + "[(8-9)s:meinen|_9$<i>8<i>9]"
54 + "[(9-10)tt/l:Erfahrung|_10$<i>9<i>10]");
55 return fd;
56 }
57
58
59 @Test
60 public void testWildcardStarRewritten () throws IOException {
61 ki = new KrillIndex();
62 ki.addDoc(createFieldDoc1());
63 ki.commit();
64
65 // meine* /+w1:2,s0 &Erfahrung
66 // rewritten into meine.*
67 RegexpQuery wcquery = new RegexpQuery(new Term("tokens", "s:meine.*"));
68 SpanMultiTermQueryWrapper<RegexpQuery> mtq =
69 new SpanMultiTermQueryWrapper<RegexpQuery>(wcquery);
70 SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(
71 new SpanClassQuery(mtq, (byte) 129),
72 new SpanClassQuery(sq, (byte) 129), constraints, true, true);
73
74
75 kr = ki.search(mdsq, (short) 10);
76 assertEquals(4, kr.getMatches().size());
77 }
78
79
80 @Test
81 public void testWildcardQuestionMarkRewritten () throws IOException {
82 ki = new KrillIndex();
83 ki.addDoc(createFieldDoc1());
84 ki.commit();
85
86 // meine? /+w1:2,s0 &Erfahrung
87 // meine? rewritten into meine.
88 SpanMultiTermQueryWrapper<RegexpQuery> mtq =
89 new SpanMultiTermQueryWrapper<RegexpQuery>(
90 new RegexpQuery(new Term("tokens", "s:meine.")));
91 SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(
92 new SpanClassQuery(mtq, (byte) 129),
93 new SpanClassQuery(sq, (byte) 129), constraints, true, true);
94
95 kr = ki.search(mdsq, (short) 10);
96 assertEquals(3, kr.getMatches().size());
97 }
98
99
100
101 @Test
102 public void testWildcardPlusRewritten () throws IOException {
103 ki = new KrillIndex();
104 ki.addDoc(createFieldDoc1());
105 ki.commit();
106
107 // C2 meine+ /+w1:2,s0 &Erfahrung
108 // meine+ rewritten into meine.?
109 SpanMultiTermQueryWrapper<RegexpQuery> mtq =
110 new SpanMultiTermQueryWrapper<RegexpQuery>(
111 new RegexpQuery(new Term("tokens", "s:meine.?")));
112 SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(
113 new SpanClassQuery(mtq, (byte) 129),
114 new SpanClassQuery(sq, (byte) 129), constraints, true, true);
115
116 kr = ki.search(mdsq, (short) 10);
117 assertEquals(4, kr.getMatches().size());
118 }
119
120 @Test
121 public void testWildcardPlusRewritten2 () throws IOException {
122 ki = new KrillIndex();
123 ki.addDoc(createFieldDoc1());
124 ki.commit();
125
126 // C2 mein+ /+w1:2,s0 &Erfahrung
127 // mein+ rewritten into mein.?
128 SpanMultiTermQueryWrapper<RegexpQuery> mtq =
129 new SpanMultiTermQueryWrapper<RegexpQuery>(
130 new RegexpQuery(new Term("tokens", "s:mein.?")));
131 SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(
132 new SpanClassQuery(mtq, (byte) 129),
133 new SpanClassQuery(sq, (byte) 129), constraints, true, true);
134
135 kr = ki.search(mdsq, (short) 10);
136 assertEquals(2, kr.getMatches().size());
137 }
138
139}
140