blob: 8a0078706e5c1aca756a0b330508065bacbe3b2a [file] [log] [blame]
Eliza Margarethafb25cef2014-06-06 14:19:07 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
Eliza Margaretha997ccde2014-07-04 09:20:35 +00006import java.util.ArrayList;
7import java.util.List;
Eliza Margarethafb25cef2014-06-06 14:19:07 +00008
9import org.apache.lucene.index.Term;
10import org.apache.lucene.search.spans.SpanQuery;
11import org.apache.lucene.search.spans.SpanTermQuery;
12import org.junit.Test;
13
14import de.ids_mannheim.korap.KorapIndex;
15import de.ids_mannheim.korap.KorapResult;
16import de.ids_mannheim.korap.query.SpanAttributeQuery;
17import de.ids_mannheim.korap.query.SpanElementAttributeQuery;
18import de.ids_mannheim.korap.query.SpanElementQuery;
Eliza Margaretha997ccde2014-07-04 09:20:35 +000019import de.ids_mannheim.korap.query.SpanNextQuery;
Eliza Margarethafb25cef2014-06-06 14:19:07 +000020
21public class TestAttributeIndex {
22
Eliza Margaretha997ccde2014-07-04 09:20:35 +000023 private KorapIndex ki = new KorapIndex();
Eliza Margarethafb25cef2014-06-06 14:19:07 +000024 private KorapResult kr;
25 private FieldDocument fd;
26
27 public TestAttributeIndex() throws IOException {
Eliza Margaretha997ccde2014-07-04 09:20:35 +000028 ki = new KorapIndex();
Eliza Margarethafb25cef2014-06-06 14:19:07 +000029 }
30
Eliza Margaretha997ccde2014-07-04 09:20:35 +000031 private FieldDocument createFieldDoc0(){
Eliza Margarethafb25cef2014-06-06 14:19:07 +000032 fd = new FieldDocument();
33 fd.addString("ID", "doc-0");
34 fd.addTV("base",
35 "bcbabd",
Eliza Margaretha997ccde2014-07-04 09:20:35 +000036 "[(0-1)s:a|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<s>1|@:class=header$<s>2]" +
37 "[(1-2)s:e|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<s>1]" +
38 "[(2-3)s:e|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<s>1]" +
Eliza Margarethafb25cef2014-06-06 14:19:07 +000039 "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<s>1]" +
Eliza Margaretha669e7a82014-06-26 12:57:18 +000040 "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<s>2]" +
41 "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>1|<>:div#5-6$<i>6<s>-1|@:class=header$<s>1]"+
42 "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=header$<s>2]");
43
Eliza Margarethafb25cef2014-06-06 14:19:07 +000044 return fd;
45 }
46
Eliza Margaretha997ccde2014-07-04 09:20:35 +000047 private FieldDocument createFieldDoc1(){
48 fd = new FieldDocument();
49 fd.addString("ID", "doc-1");
50 fd.addTV("base",
51 "bcbabd",
52 "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|@:class=header$<s>1|@:class=title$<s>1|@:class=book$<s>1]" +
53 "[(1-2)s:c|_2#1-2|<>:div#1-2$<i>2<s>1|@:class=header$<s>1|@:class=title$<s>1]" +
54 "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=book$<s>1]" +
55 "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1]" +
56 "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1|@:class=book$<s>1|@:class=title$<s>1]" +
57 "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1]"+
58 "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=title$<s>1]");
59
60 return fd;
61 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +000062
Eliza Margaretha997ccde2014-07-04 09:20:35 +000063 private FieldDocument createFieldDoc2(){
64 fd = new FieldDocument();
65 fd.addString("ID", "doc-1");
66 fd.addTV("base",
67 "bcbabd",
68 "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|@:class=header$<s>2|@:class=book$<s>1|@:class=book$<s>2]" +
69 "[(1-2)s:e|_2#1-2|<>:div#1-2$<i>2<s>1|<>:a#1-2$<i>2<s>2|@:class=book$<s>2|@:class=header$<s>1]" +
70 "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|<>:a#1-2$<i>2<s>2|@:class=header$<s>2|@:class=book$<s>1]" +
71 "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1]" +
72 "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1|@:class=book$<s>1|@:class=book$<s>1]" +
73 "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1]"+
74 "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=book$<s>2]");
75
76 return fd;
77 }
78
79
80 /** Test matching elementRef
81 * @throws IOException
82 * */
Eliza Margarethafb25cef2014-06-06 14:19:07 +000083 @Test
Eliza Margaretha997ccde2014-07-04 09:20:35 +000084 public void testCase1() throws IOException {
85 ki.addDoc(createFieldDoc0());
86 ki.commit();
87
Eliza Margarethafb25cef2014-06-06 14:19:07 +000088 SpanAttributeQuery saq = new SpanAttributeQuery(
89 new SpanTermQuery(new Term("base","@:class=header")),
90 true);
91
Eliza Margaretha997ccde2014-07-04 09:20:35 +000092 List<SpanQuery> sql = new ArrayList<>();
93 sql.add(saq);
94
Eliza Margarethafb25cef2014-06-06 14:19:07 +000095 SpanQuery sq = new SpanElementAttributeQuery(
96 new SpanElementQuery("base", "div"),
Eliza Margaretha997ccde2014-07-04 09:20:35 +000097 sql, true);
Eliza Margarethafb25cef2014-06-06 14:19:07 +000098
99 kr = ki.search(sq, (short) 10);
100
101 assertEquals(4, kr.getTotalResults());
102 assertEquals(0,kr.getMatch(0).getStartPos());
103 assertEquals(2,kr.getMatch(0).getEndPos());
104 assertEquals(0,kr.getMatch(1).getStartPos());
105 assertEquals(3,kr.getMatch(1).getEndPos());
106 assertEquals(3,kr.getMatch(2).getStartPos());
107 assertEquals(5,kr.getMatch(2).getEndPos());
Eliza Margaretha669e7a82014-06-26 12:57:18 +0000108 assertEquals(6,kr.getMatch(3).getStartPos());
109 assertEquals(7,kr.getMatch(3).getEndPos());
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000110 }
111
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000112 /** Test multiple attributes and negation
113 * @throws IOException
114 * */
115 @Test
116 public void testCase2() throws IOException{
117 ki.addDoc(createFieldDoc1());
118 ki.commit();
119
120 List<SpanQuery> sql = new ArrayList<>();
121 sql.add(new SpanAttributeQuery(
122 new SpanTermQuery(new Term("base","@:class=header")),true)
123 );
124 sql.add(new SpanAttributeQuery(
125 new SpanTermQuery(new Term("base","@:class=title")),true)
126 );
127
128 SpanQuery sq = new SpanElementAttributeQuery(
129 new SpanElementQuery("base", "div"),
130 sql, true);
131
132 kr = ki.search(sq, (short) 10);
133
134 assertEquals(4, kr.getTotalResults());
135 assertEquals(0,kr.getMatch(0).getStartPos());
136 assertEquals(3,kr.getMatch(0).getEndPos());
137 assertEquals(1,kr.getMatch(1).getStartPos());
138 assertEquals(2,kr.getMatch(1).getEndPos());
139 assertEquals(4,kr.getMatch(2).getStartPos());
140 assertEquals(5,kr.getMatch(2).getEndPos());
141 assertEquals(6,kr.getMatch(3).getStartPos());
142 assertEquals(7,kr.getMatch(3).getEndPos());
143
144 // Add not Attribute
145 sql.add(new SpanAttributeQuery(
146 new SpanTermQuery(new Term("base","@:class=book")),true,true)
147 );
148
149 sq = new SpanElementAttributeQuery(
150 new SpanElementQuery("base", "div"),
151 sql, true);
152
153 kr = ki.search(sq, (short) 10);
154
155 assertEquals(2, kr.getTotalResults());
156 assertEquals(1,kr.getMatch(0).getStartPos());
157 assertEquals(2,kr.getMatch(0).getEndPos());
158 assertEquals(6,kr.getMatch(1).getStartPos());
159 assertEquals(7,kr.getMatch(1).getEndPos());
160
161 // Test multiple negations
162 sql.remove(1);
163 sql.add(new SpanAttributeQuery(
164 new SpanTermQuery(new Term("base","@:class=title")),true,true)
165 );
166
167 sq = new SpanElementAttributeQuery(
168 new SpanElementQuery("base", "div"),
169 sql, true);
170
171 kr = ki.search(sq, (short) 10);
172 assertEquals(1, kr.getTotalResults());
173 assertEquals(5,kr.getMatch(0).getStartPos());
174 assertEquals(6,kr.getMatch(0).getEndPos());
175 }
176
177 /** same attribute types referring to different element types
178 * */
179 @Test
180 public void testCase3() throws IOException{
181 ki.addDoc(createFieldDoc2());
182 ki.commit();
183
184 List<SpanQuery> sql = new ArrayList<>();
185 sql.add(new SpanAttributeQuery(
186 new SpanTermQuery(new Term("base","@:class=header")),true)
187 );
188 sql.add(new SpanAttributeQuery(
189 new SpanTermQuery(new Term("base","@:class=book")),true,true)
190 );
191 SpanQuery sq = new SpanElementAttributeQuery(
192 new SpanElementQuery("base", "div"),
193 sql, true);
194
195 kr = ki.search(sq, (short) 10);
196
197 assertEquals(3, kr.getTotalResults());
198 assertEquals(1,kr.getMatch(0).getStartPos());
199 assertEquals(2,kr.getMatch(0).getEndPos());
200 assertEquals(5,kr.getMatch(1).getStartPos());
201 assertEquals(6,kr.getMatch(1).getEndPos());
202 assertEquals(6,kr.getMatch(2).getStartPos());
203 assertEquals(7,kr.getMatch(2).getEndPos());
204 }
205
206 /** Test SkipTo Doc */
207 @Test
208 public void testCase4() throws IOException{
209 ki.addDoc(createFieldDoc1());
210 ki.addDoc(createFieldDoc0());
211 ki.addDoc(createFieldDoc2());
212 ki.commit();
213
214 SpanAttributeQuery saq = new SpanAttributeQuery(
215 new SpanTermQuery(new Term("base","@:class=book")),
216 true);
217
218 List<SpanQuery> sql = new ArrayList<>();
219 sql.add(saq);
220
221 SpanElementAttributeQuery sq = new SpanElementAttributeQuery(
222 new SpanElementQuery("base", "div"),
223 sql, true);
224
225 SpanNextQuery snq = new SpanNextQuery(
226 new SpanTermQuery(new Term("base", "s:e"))
227 ,sq);
228
229 kr = ki.search(snq, (short) 10);
230
Eliza Margarethac0cab872014-07-04 09:27:24 +0000231 assertEquals(1,kr.getTotalResults());
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000232 assertEquals(2,kr.getMatch(0).getLocalDocID());
Eliza Margarethac0cab872014-07-04 09:27:24 +0000233 assertEquals(1,kr.getMatch(0).getStartPos());
234 assertEquals(5,kr.getMatch(0).getEndPos());
Eliza Margaretha997ccde2014-07-04 09:20:35 +0000235 }
236
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000237
238}