blob: 397451616065b8cb8e53dc4b940486f124b5d0c5 [file] [log] [blame]
margarethac35e8a02017-09-11 16:34:20 +02001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6import java.util.ArrayList;
7
8import org.apache.lucene.index.Term;
9import org.apache.lucene.search.WildcardQuery;
10import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
11import org.apache.lucene.search.spans.SpanNearQuery;
12import org.apache.lucene.search.spans.SpanQuery;
13import org.apache.lucene.search.spans.SpanTermQuery;
14import org.junit.Test;
15
16import de.ids_mannheim.korap.KrillIndex;
17import de.ids_mannheim.korap.query.DistanceConstraint;
18import de.ids_mannheim.korap.query.SpanClassQuery;
19import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
20import de.ids_mannheim.korap.response.Result;
21
22public class TestWildcardIndex {
23
24 private SpanTermQuery sq;
25 private KrillIndex ki;
26 private Result kr;
27 private ArrayList<DistanceConstraint> constraints;
28
29
30 public TestWildcardIndex () {
31 // &Erfahrung
32 sq = new SpanTermQuery(new Term("tokens", "tt/l:Erfahrung"));
33
34 // /+w1:2,s0
35 constraints = new ArrayList<DistanceConstraint>();
36 constraints.add(TestMultipleDistanceIndex.createConstraint("w", 1, 2,
37 true, false));
38 constraints.add(TestMultipleDistanceIndex.createConstraint("tokens",
39 "base/s:s", 0, 0, true, false));
40 }
41
42
43 private FieldDocument createFieldDoc1 () {
44 FieldDocument fd = new FieldDocument();
45 fd.addString("ID", "doc-1");
46 fd.addTV("tokens", "text",
47 "[(0-1)s:meine|_1$<i>0<i>1|<>:base/s:s$<b>64<i>0<i>9<i>10<b>0]"
48 + "[(1-2)tt/l:Erfahrung|_2$<i>1<i>2]"
49 + "[(2-3)s:meiner|_3$<i>2<i>3]"
50 + "[(3-4)tt/l:Erfahrung|_4$<i>3<i>4]"
51 + "[(4-5)s:mein|_5$<i>4<i>5]"
52 + "[(5-6)tt/l:Erfahrung|_6$<i>5<i>6]"
53 + "[(6-7)s:meinem|_7$<i>6<i>7]"
54 + "[(7-8)tt/l:Erfahrung|_8$<i>7<i>8]"
55 + "[(8-9)s:meinen|_9$<i>8<i>9]"
56 + "[(9-10)tt/l:Erfahrung|_10$<i>9<i>10]");
57 return fd;
58 }
59
60
61 @Test
62 public void testWildcardStarWithCollection () throws IOException {
63 ki = new KrillIndex();
64 ki.addDoc(createFieldDoc1());
65 ki.commit();
66 // meine*
67 WildcardQuery wcquery =
68 new WildcardQuery(new Term("tokens", "s:meine*"));
69 SpanMultiTermQueryWrapper<WildcardQuery> mtq =
70 new SpanMultiTermQueryWrapper<WildcardQuery>(wcquery);
71
72 // meine* /+w1:2,s0 &Erfahrung
73 SpanQuery mdsq = new SpanMultipleDistanceQuery(
74 new SpanClassQuery(mtq, (byte) 129),
75 new SpanClassQuery(sq, (byte) 129), constraints, true, true);
76
77 kr = ki.search(mdsq, (short) 10);
78 assertEquals(4, kr.getMatches().size());
79 }
80
81
82 @Test
83 public void testWildcardQuestionMark1 () throws IOException {
84 ki = new KrillIndex();
85 ki.addDoc(createFieldDoc1());
86 ki.commit();
87
88 // Wildcard ? means regex . (expects exactly one character)
89 SpanMultiTermQueryWrapper<WildcardQuery> mtq =
90 new SpanMultiTermQueryWrapper<WildcardQuery>(
91 new WildcardQuery(new Term("tokens", "s:meine?")));
92 SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(
93 new SpanClassQuery(mtq, (byte) 129),
94 new SpanClassQuery(sq, (byte) 129), constraints, true, true);
95
96 kr = ki.search(mdsq, (short) 10);
97 assertEquals(3, kr.getMatches().size());
98
99 }
100
101
102 @Test
103 public void testWildcardQuestionMark2 () throws IOException {
104 ki = new KrillIndex();
105 ki.addDoc(createFieldDoc1());
106 ki.commit();
107
108 // Wildcard ? means regex . (expects exactly one character)
109 SpanMultiTermQueryWrapper<WildcardQuery> mtq =
110 new SpanMultiTermQueryWrapper<WildcardQuery>(
111 new WildcardQuery(new Term("tokens", "s:mein?")));
112 SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(
113 new SpanClassQuery(mtq, (byte) 129),
114 new SpanClassQuery(sq, (byte) 129), constraints, true, true);
115
116 kr = ki.search(mdsq, (short) 10);
117 assertEquals(1, kr.getMatches().size());
118
119 }
120
121
122 @Test
123 public void testWildcardPlusWithCollection () throws IOException {
124 ki = new KrillIndex();
125 ki.addDoc(createFieldDoc1());
126 ki.commit();
127 // mein+ /+w1:2,s0 &Erfahrung
128 SpanMultiTermQueryWrapper<WildcardQuery> mtq =
129 new SpanMultiTermQueryWrapper<WildcardQuery>(
130 new WildcardQuery(new Term("tokens", "s:mein+")));
131
132
133 // Just to make sure, Lucene internal queries treat SpanOr([]) correctly
134 SpanQuery soq = new SpanNearQuery(new SpanQuery[] { mtq, sq }, 1, true);
135 kr = ki.search(soq, (short) 10);
136 // As described in http://korap.github.io/Koral/, '+' is not a valid wildcard
137 assertEquals(0, kr.getMatches().size());
138
139
140
141 // Check the reported classed query
142 SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(
143 new SpanClassQuery(mtq, (byte) 129),
144 new SpanClassQuery(sq, (byte) 129), constraints, true, true);
145
146 kr = ki.search(mdsq, (short) 10);
147 assertEquals(0, kr.getMatches().size());
148
149
150 // Check multiple distance query
151 mdsq = new SpanMultipleDistanceQuery(mtq, sq, constraints, true, true);
152
153 kr = ki.search(mdsq, (short) 10);
154 assertEquals(0, kr.getMatches().size());
155 }
156}