| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 6 | import java.util.ArrayList; |
| 7 | import java.util.List; |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 8 | |
| 9 | import org.apache.lucene.index.Term; |
| 10 | import org.apache.lucene.search.spans.SpanQuery; |
| 11 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 12 | import org.junit.Test; |
| 13 | |
| 14 | import de.ids_mannheim.korap.KorapIndex; |
| 15 | import de.ids_mannheim.korap.KorapResult; |
| 16 | import de.ids_mannheim.korap.query.SpanAttributeQuery; |
| 17 | import de.ids_mannheim.korap.query.SpanElementAttributeQuery; |
| 18 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 19 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 20 | |
| 21 | public class TestAttributeIndex { |
| 22 | |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 23 | private KorapIndex ki = new KorapIndex(); |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 24 | private KorapResult kr; |
| 25 | private FieldDocument fd; |
| 26 | |
| 27 | public TestAttributeIndex() throws IOException { |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 28 | ki = new KorapIndex(); |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 29 | } |
| 30 | |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 31 | private FieldDocument createFieldDoc0(){ |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 32 | fd = new FieldDocument(); |
| 33 | fd.addString("ID", "doc-0"); |
| 34 | fd.addTV("base", |
| 35 | "bcbabd", |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 36 | "[(0-1)s:a|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<s>1|@:class=header$<s>2]" + |
| 37 | "[(1-2)s:e|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<s>1]" + |
| 38 | "[(2-3)s:e|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<s>1]" + |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 39 | "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<s>1]" + |
| Eliza Margaretha | 669e7a8 | 2014-06-26 12:57:18 +0000 | [diff] [blame] | 40 | "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<s>2]" + |
| 41 | "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>1|<>:div#5-6$<i>6<s>-1|@:class=header$<s>1]"+ |
| 42 | "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=header$<s>2]"); |
| 43 | |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 44 | return fd; |
| 45 | } |
| 46 | |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 47 | private FieldDocument createFieldDoc1(){ |
| 48 | fd = new FieldDocument(); |
| 49 | fd.addString("ID", "doc-1"); |
| 50 | fd.addTV("base", |
| 51 | "bcbabd", |
| 52 | "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|@:class=header$<s>1|@:class=title$<s>1|@:class=book$<s>1]" + |
| 53 | "[(1-2)s:c|_2#1-2|<>:div#1-2$<i>2<s>1|@:class=header$<s>1|@:class=title$<s>1]" + |
| 54 | "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=book$<s>1]" + |
| 55 | "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1]" + |
| 56 | "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1|@:class=book$<s>1|@:class=title$<s>1]" + |
| 57 | "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1]"+ |
| 58 | "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=title$<s>1]"); |
| 59 | |
| 60 | return fd; |
| 61 | } |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 62 | |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 63 | private FieldDocument createFieldDoc2(){ |
| 64 | fd = new FieldDocument(); |
| 65 | fd.addString("ID", "doc-1"); |
| 66 | fd.addTV("base", |
| 67 | "bcbabd", |
| 68 | "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|@:class=header$<s>2|@:class=book$<s>1|@:class=book$<s>2]" + |
| 69 | "[(1-2)s:e|_2#1-2|<>:div#1-2$<i>2<s>1|<>:a#1-2$<i>2<s>2|@:class=book$<s>2|@:class=header$<s>1]" + |
| 70 | "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|<>:a#1-2$<i>2<s>2|@:class=header$<s>2|@:class=book$<s>1]" + |
| 71 | "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1]" + |
| 72 | "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1|@:class=book$<s>1|@:class=book$<s>1]" + |
| 73 | "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1]"+ |
| 74 | "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=book$<s>2]"); |
| 75 | |
| 76 | return fd; |
| 77 | } |
| 78 | |
| 79 | |
| 80 | /** Test matching elementRef |
| 81 | * @throws IOException |
| 82 | * */ |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 83 | @Test |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 84 | public void testCase1() throws IOException { |
| 85 | ki.addDoc(createFieldDoc0()); |
| 86 | ki.commit(); |
| 87 | |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 88 | SpanAttributeQuery saq = new SpanAttributeQuery( |
| 89 | new SpanTermQuery(new Term("base","@:class=header")), |
| 90 | true); |
| 91 | |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 92 | List<SpanQuery> sql = new ArrayList<>(); |
| 93 | sql.add(saq); |
| 94 | |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 95 | SpanQuery sq = new SpanElementAttributeQuery( |
| 96 | new SpanElementQuery("base", "div"), |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 97 | sql, true); |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 98 | |
| 99 | kr = ki.search(sq, (short) 10); |
| 100 | |
| 101 | assertEquals(4, kr.getTotalResults()); |
| 102 | assertEquals(0,kr.getMatch(0).getStartPos()); |
| 103 | assertEquals(2,kr.getMatch(0).getEndPos()); |
| 104 | assertEquals(0,kr.getMatch(1).getStartPos()); |
| 105 | assertEquals(3,kr.getMatch(1).getEndPos()); |
| 106 | assertEquals(3,kr.getMatch(2).getStartPos()); |
| 107 | assertEquals(5,kr.getMatch(2).getEndPos()); |
| Eliza Margaretha | 669e7a8 | 2014-06-26 12:57:18 +0000 | [diff] [blame] | 108 | assertEquals(6,kr.getMatch(3).getStartPos()); |
| 109 | assertEquals(7,kr.getMatch(3).getEndPos()); |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 110 | } |
| 111 | |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 112 | /** Test multiple attributes and negation |
| 113 | * @throws IOException |
| 114 | * */ |
| 115 | @Test |
| 116 | public void testCase2() throws IOException{ |
| 117 | ki.addDoc(createFieldDoc1()); |
| 118 | ki.commit(); |
| 119 | |
| 120 | List<SpanQuery> sql = new ArrayList<>(); |
| 121 | sql.add(new SpanAttributeQuery( |
| 122 | new SpanTermQuery(new Term("base","@:class=header")),true) |
| 123 | ); |
| 124 | sql.add(new SpanAttributeQuery( |
| 125 | new SpanTermQuery(new Term("base","@:class=title")),true) |
| 126 | ); |
| 127 | |
| 128 | SpanQuery sq = new SpanElementAttributeQuery( |
| 129 | new SpanElementQuery("base", "div"), |
| 130 | sql, true); |
| 131 | |
| 132 | kr = ki.search(sq, (short) 10); |
| 133 | |
| 134 | assertEquals(4, kr.getTotalResults()); |
| 135 | assertEquals(0,kr.getMatch(0).getStartPos()); |
| 136 | assertEquals(3,kr.getMatch(0).getEndPos()); |
| 137 | assertEquals(1,kr.getMatch(1).getStartPos()); |
| 138 | assertEquals(2,kr.getMatch(1).getEndPos()); |
| 139 | assertEquals(4,kr.getMatch(2).getStartPos()); |
| 140 | assertEquals(5,kr.getMatch(2).getEndPos()); |
| 141 | assertEquals(6,kr.getMatch(3).getStartPos()); |
| 142 | assertEquals(7,kr.getMatch(3).getEndPos()); |
| 143 | |
| 144 | // Add not Attribute |
| 145 | sql.add(new SpanAttributeQuery( |
| 146 | new SpanTermQuery(new Term("base","@:class=book")),true,true) |
| 147 | ); |
| 148 | |
| 149 | sq = new SpanElementAttributeQuery( |
| 150 | new SpanElementQuery("base", "div"), |
| 151 | sql, true); |
| 152 | |
| 153 | kr = ki.search(sq, (short) 10); |
| 154 | |
| 155 | assertEquals(2, kr.getTotalResults()); |
| 156 | assertEquals(1,kr.getMatch(0).getStartPos()); |
| 157 | assertEquals(2,kr.getMatch(0).getEndPos()); |
| 158 | assertEquals(6,kr.getMatch(1).getStartPos()); |
| 159 | assertEquals(7,kr.getMatch(1).getEndPos()); |
| 160 | |
| 161 | // Test multiple negations |
| 162 | sql.remove(1); |
| 163 | sql.add(new SpanAttributeQuery( |
| 164 | new SpanTermQuery(new Term("base","@:class=title")),true,true) |
| 165 | ); |
| 166 | |
| 167 | sq = new SpanElementAttributeQuery( |
| 168 | new SpanElementQuery("base", "div"), |
| 169 | sql, true); |
| 170 | |
| 171 | kr = ki.search(sq, (short) 10); |
| 172 | assertEquals(1, kr.getTotalResults()); |
| 173 | assertEquals(5,kr.getMatch(0).getStartPos()); |
| 174 | assertEquals(6,kr.getMatch(0).getEndPos()); |
| 175 | } |
| 176 | |
| 177 | /** same attribute types referring to different element types |
| 178 | * */ |
| 179 | @Test |
| 180 | public void testCase3() throws IOException{ |
| 181 | ki.addDoc(createFieldDoc2()); |
| 182 | ki.commit(); |
| 183 | |
| 184 | List<SpanQuery> sql = new ArrayList<>(); |
| 185 | sql.add(new SpanAttributeQuery( |
| 186 | new SpanTermQuery(new Term("base","@:class=header")),true) |
| 187 | ); |
| 188 | sql.add(new SpanAttributeQuery( |
| 189 | new SpanTermQuery(new Term("base","@:class=book")),true,true) |
| 190 | ); |
| 191 | SpanQuery sq = new SpanElementAttributeQuery( |
| 192 | new SpanElementQuery("base", "div"), |
| 193 | sql, true); |
| 194 | |
| 195 | kr = ki.search(sq, (short) 10); |
| 196 | |
| 197 | assertEquals(3, kr.getTotalResults()); |
| 198 | assertEquals(1,kr.getMatch(0).getStartPos()); |
| 199 | assertEquals(2,kr.getMatch(0).getEndPos()); |
| 200 | assertEquals(5,kr.getMatch(1).getStartPos()); |
| 201 | assertEquals(6,kr.getMatch(1).getEndPos()); |
| 202 | assertEquals(6,kr.getMatch(2).getStartPos()); |
| 203 | assertEquals(7,kr.getMatch(2).getEndPos()); |
| 204 | } |
| 205 | |
| 206 | /** Test SkipTo Doc */ |
| 207 | @Test |
| 208 | public void testCase4() throws IOException{ |
| 209 | ki.addDoc(createFieldDoc1()); |
| 210 | ki.addDoc(createFieldDoc0()); |
| 211 | ki.addDoc(createFieldDoc2()); |
| 212 | ki.commit(); |
| 213 | |
| 214 | SpanAttributeQuery saq = new SpanAttributeQuery( |
| 215 | new SpanTermQuery(new Term("base","@:class=book")), |
| 216 | true); |
| 217 | |
| 218 | List<SpanQuery> sql = new ArrayList<>(); |
| 219 | sql.add(saq); |
| 220 | |
| 221 | SpanElementAttributeQuery sq = new SpanElementAttributeQuery( |
| 222 | new SpanElementQuery("base", "div"), |
| 223 | sql, true); |
| 224 | |
| 225 | SpanNextQuery snq = new SpanNextQuery( |
| 226 | new SpanTermQuery(new Term("base", "s:e")) |
| 227 | ,sq); |
| 228 | |
| 229 | kr = ki.search(snq, (short) 10); |
| 230 | |
| Eliza Margaretha | c0cab87 | 2014-07-04 09:27:24 +0000 | [diff] [blame] | 231 | assertEquals(1,kr.getTotalResults()); |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 232 | assertEquals(2,kr.getMatch(0).getLocalDocID()); |
| Eliza Margaretha | c0cab87 | 2014-07-04 09:27:24 +0000 | [diff] [blame] | 233 | assertEquals(1,kr.getMatch(0).getStartPos()); |
| 234 | assertEquals(5,kr.getMatch(0).getEndPos()); |
| Eliza Margaretha | 997ccde | 2014-07-04 09:20:35 +0000 | [diff] [blame] | 235 | } |
| 236 | |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 237 | |
| 238 | } |