blob: 1b1ef4df350ef5ce4e39d578676ba8e020d70b8d [file] [log] [blame]
Eliza Margarethad28469f2014-03-10 12:42:21 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.File;
6import java.io.IOException;
7import java.io.InputStream;
8import java.util.Properties;
9
10import org.apache.lucene.index.Term;
11import org.apache.lucene.search.spans.SpanQuery;
12import org.apache.lucene.search.spans.SpanTermQuery;
13import org.apache.lucene.store.MMapDirectory;
14import org.junit.Test;
15
Nils Diewald2d5f8102015-02-26 21:07:54 +000016import de.ids_mannheim.korap.KrillCollection;
Nils Diewalda14ecd62015-02-26 21:00:20 +000017import de.ids_mannheim.korap.KrillIndex;
Nils Diewald392bcf32015-02-26 20:01:17 +000018import de.ids_mannheim.korap.response.Match;
Nils Diewald884dbcf2015-02-27 17:02:28 +000019import de.ids_mannheim.korap.response.Result;
Nils Diewaldbbd39a52015-02-23 19:56:57 +000020import de.ids_mannheim.korap.Krill;
Nils Diewald01ff7af2015-02-04 22:54:26 +000021import de.ids_mannheim.korap.collection.BooleanFilter;
Eliza Margarethad4693462014-03-17 13:16:18 +000022import de.ids_mannheim.korap.query.DistanceConstraint;
Eliza Margarethad28469f2014-03-10 12:42:21 +000023import de.ids_mannheim.korap.query.SpanDistanceQuery;
24import de.ids_mannheim.korap.query.SpanElementQuery;
25import de.ids_mannheim.korap.query.SpanNextQuery;
Eliza Margarethad4693462014-03-17 13:16:18 +000026import de.ids_mannheim.korap.query.SpanRepetitionQuery;
Eliza Margarethad28469f2014-03-10 12:42:21 +000027
28public class TestWPDIndex {
Nils Diewaldbb33da22015-03-04 16:24:25 +000029 long start, end;
30 KrillIndex ki;
31 Result kr;
32 Krill ks;
33
34
35 private SpanDistanceQuery createElementDistanceQuery (String e, String x,
36 String y, int min, int max, boolean isOrdered, boolean exclusion) {
37 SpanElementQuery eq = new SpanElementQuery("tokens", e);
38 SpanDistanceQuery sq = new SpanDistanceQuery(new SpanTermQuery(
39 new Term("tokens", x)),
40 new SpanTermQuery(new Term("tokens", y)),
41 new DistanceConstraint(eq, min, max, isOrdered, exclusion),
42 true);
43 return sq;
Eliza Margarethad28469f2014-03-10 12:42:21 +000044 }
Eliza Margarethad28469f2014-03-10 12:42:21 +000045
Nils Diewaldbb33da22015-03-04 16:24:25 +000046
47 private SpanDistanceQuery createDistanceQuery (String x, String y, int min,
48 int max, boolean isOrdered, boolean exclusion) {
49 SpanDistanceQuery sq = new SpanDistanceQuery(new SpanTermQuery(
50 new Term("tokens", x)),
51 new SpanTermQuery(new Term("tokens", y)),
52 new DistanceConstraint(min, max, isOrdered, exclusion), true);
53 return sq;
54 }
55
56
57 public TestWPDIndex () throws IOException {
58 InputStream is = getClass().getResourceAsStream("/korap.conf");
59 Properties prop = new Properties();
60 prop.load(is);
61
62 String indexPath = prop.getProperty("lucene.indexDir");
63 MMapDirectory md = new MMapDirectory(new File(indexPath));
64 ki = new KrillIndex(md);
65 }
66
67
68 /** Token distance spans */
69 @Test
70 public void testCase1 () throws IOException {
71 SpanDistanceQuery sq;
72 // ordered
73 sq = createDistanceQuery("s:Wir", "s:kommen", 1, 1, true, false);
74 ks = new Krill(sq);
75 kr = ks.apply(ki);
76 assertEquals(kr.getTotalResults(), 8);
77
78 // unordered
79 sq = createDistanceQuery("s:Wir", "s:kommen", 1, 1, false, false);
80 ks = new Krill(sq);
81 kr = ks.apply(ki);
82 assertEquals(kr.getTotalResults(), 11);
83
84 sq = createDistanceQuery("s:kommen", "s:Wir", 1, 1, false, false);
85 ks = new Krill(sq);
86 kr = ks.apply(ki);
87 assertEquals(kr.getTotalResults(), 11);
88 //System.out.println(kr.getTotalResults());
89 //for (Match km : kr.getMatches()){
90 //System.out.println(km.getDocID() +" "+km.getStartPos() +" "+ km.getEndPos());
91 //System.out.println(km.getSnippetBrackets());
92 //System.out.println(km.toJSON());
Eliza Margaretha7788a982014-08-29 16:10:52 +000093 //}
Nils Diewaldbb33da22015-03-04 16:24:25 +000094 }
Eliza Margarethad28469f2014-03-10 12:42:21 +000095
Nils Diewaldbb33da22015-03-04 16:24:25 +000096
97 /** Token exclusion distance spans */
98 @Test
99 public void testCase2 () throws IOException {
100
101 SpanQuery q = new SpanTermQuery(new Term("tokens", "s:Wir"));
102 ks = new Krill(q);
103 kr = ks.apply(ki);
104 assertEquals(kr.getTotalResults(), 1907);
105
106 SpanDistanceQuery sq;
107 // ordered
108 sq = createDistanceQuery("s:Wir", "s:kommen", 1, 1, true, true);
109 ks = new Krill(sq);
110 kr = ks.apply(ki);
111 assertEquals(kr.getTotalResults(), 1899);
112
113 // unordered
114 sq = createDistanceQuery("s:Wir", "s:kommen", 1, 1, false, true);
115 ks = new Krill(sq);
116 kr = ks.apply(ki);
117 assertEquals(kr.getTotalResults(), 1896);
118 }
119
120
121 /** Element distance spans */
122 @Test
123 public void testCase3 () throws IOException {
124 // ordered
125 SpanDistanceQuery sq = createElementDistanceQuery("s", "s:weg",
126 "s:fahren", 0, 1, true, false);
127 ks = new Krill(sq);
128 kr = ks.apply(ki);
129 assertEquals(kr.getTotalResults(), 3);
130
131 // unordered
132 sq = createElementDistanceQuery("s", "s:weg", "s:fahren", 0, 1, false,
133 false);
134 ks = new Krill(sq);
135 kr = ks.apply(ki);
136 assertEquals(kr.getTotalResults(), 5);
137
138 // only 0
139 sq = createElementDistanceQuery("s", "s:weg", "s:fahren", 0, 0, false,
140 false);
141 kr = ki.search(sq, (short) 100);
142 assertEquals(kr.getTotalResults(), 2);
143 assertEquals("WPD_BBB.04463", kr.getMatch(0).getDocID());
144 assertEquals(1094, kr.getMatch(0).getStartPos());
145 assertEquals(1115, kr.getMatch(0).getEndPos());
146 assertEquals("WPD_III.00758", kr.getMatch(1).getDocID());
147 assertEquals(444, kr.getMatch(1).getStartPos());
148 assertEquals(451, kr.getMatch(1).getEndPos());
149
150 // only 1
151 sq = createElementDistanceQuery("s", "s:weg", "s:fahren", 1, 1, false,
152 false);
153 ks = new Krill(sq);
154 kr = ks.apply(ki);
155 assertEquals(kr.getTotalResults(), 3);
156 }
157
158
159 /** Element distance exclusion */
160 @Test
161 public void testCase4 () throws IOException {
162 SpanDistanceQuery sq = createElementDistanceQuery("s", "s:weg",
163 "s:fahren", 1, 1, false, true);
164 ks = new Krill(sq);
165 kr = ks.apply(ki);
166 assertEquals(kr.getTotalResults(), 979);
167 //0.8s
168
169 // Check if it includes some results
170 BooleanFilter bf = new BooleanFilter();
171 bf.or("ID", "WPD_BBB.04463", "WPD_III.00758");
172 KrillCollection kc = new KrillCollection();
173 kc.filter(bf);
174 ks.setCollection(kc);
175 kr = ks.apply(ki);
176 assertEquals(1094, kr.getMatch(0).getStartPos());
177 assertEquals(451, kr.getMatch(1).getEndPos());
178 }
179
180
181 /** Repetition */
182 @Test
183 public void testCase5 () throws IOException {
184 SpanQuery sq;
185 sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("tokens",
186 "mate/p:ADJA")), 1, 2, true);
187 ks = new Krill(sq);
188 kr = ks.apply(ki);
189 assertEquals(kr.getTotalResults(), 4116416);
190 //0.9s
191
192 sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("tokens",
193 "mate/p:ADJA")), 1, 1, true);
194 ks = new Krill(sq);
195 kr = ks.apply(ki);
196 assertEquals(kr.getTotalResults(), 3879671);
197
198 sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("tokens",
199 "mate/p:ADJA")), 2, 2, true);
200 ks = new Krill(sq);
201 kr = ks.apply(ki);
202 assertEquals(kr.getTotalResults(), 236745);
203 //0.65s
204 }
205
206
207 /** Next and repetition */
208 @Test
209 public void testCase6 () throws IOException {
210 SpanQuery sq = new SpanNextQuery(new SpanTermQuery(new Term("tokens",
211 "tt/p:NN")), new SpanRepetitionQuery(new SpanTermQuery(
212 new Term("tokens", "mate/p:ADJA")), 2, 2, true));
213 ks = new Krill(sq);
214 kr = ks.apply(ki);
215 assertEquals(kr.getTotalResults(), 30223);
216 // 1.1s
217
218 SpanQuery sq2 = new SpanNextQuery(sq, new SpanTermQuery(new Term(
219 "tokens", "tt/p:NN")));
220 ks = new Krill(sq2);
221 kr = ks.apply(ki);
222 assertEquals(kr.getTotalResults(), 26607);
223 // 1.1s
224 }
Eliza Margarethad28469f2014-03-10 12:42:21 +0000225}