blob: 02df26caac4aeb3f87245833385ce2d23cb6b681 [file] [log] [blame]
Eliza Margarethad28469f2014-03-10 12:42:21 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.*;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanOrQuery;
9import org.apache.lucene.search.spans.SpanQuery;
10import org.apache.lucene.search.spans.SpanTermQuery;
11import org.junit.Test;
12
13import de.ids_mannheim.korap.KorapIndex;
14import de.ids_mannheim.korap.KorapResult;
15import de.ids_mannheim.korap.query.SpanNextQuery;
Eliza Margarethad4693462014-03-17 13:16:18 +000016import de.ids_mannheim.korap.query.SpanRepetitionQuery;
Eliza Margarethad28469f2014-03-10 12:42:21 +000017
Eliza Margarethad4693462014-03-17 13:16:18 +000018public class TestRepetitionIndex {
Eliza Margarethad28469f2014-03-10 12:42:21 +000019
20 private KorapIndex ki;
21 private KorapResult kr;
22
23 private FieldDocument createFieldDoc0(){
24 FieldDocument fd = new FieldDocument();
25 fd.addString("ID", "doc-0");
26 fd.addTV("base",
27 "text",
28 "[(0-1)s:c|_1#0-1]" +
29 "[(1-2)s:e|_2#1-2]" +
30 "[(2-3)s:c|_3#2-3|<>:y#2-4$<i>4]" +
31 "[(3-4)s:c|s:b|_4#3-4|<>:x#3-7$<i>7]" +
32 "[(4-5)s:e|s:d|_5#4-5|<>:y#4-6$<i>6]" +
33 "[(5-6)s:c|_6#5-6|<>:y#5-8$<i>8]" +
34 "[(6-7)s:d|_7#6-7]" +
35 "[(7-8)s:e|_8#7-8|<>:x#7-9$<i>9]" +
36 "[(8-9)s:e|s:b|_9#8-9|<>:x#8-10$<i>10]" +
37 "[(9-10)s:d|_10#9-10]");
38 return fd;
39 }
40
41 private FieldDocument createFieldDoc1() {
42 FieldDocument fd = new FieldDocument();
43 fd.addString("ID", "doc-1");
44 fd.addTV("base",
45 "text",
46 "[(0-1)s:b|_1#0-1|<>:s#0-2$<i>1]" +
47 "[(1-2)s:e|_2#1-2|<>:s#1-2$<i>4]" +
48 "[(2-3)s:c|_3#2-3]" +
49 "[(3-4)s:c|s:d|_4#3-4]" +
50 "[(4-5)s:d|_5#4-5|<>:s#4-5$<i>7]" +
51 "[(5-6)s:e|_6#5-6]" +
52 "[(6-7)s:e|_7#6-7]" +
53 "[(7-8)s:c|_8#7-8|<>:x#7-9$<i>9]" +
54 "[(8-9)s:d|_9#8-9|<>:x#8-10$<i>10]" +
55 "[(9-10)s:d|_10#9-10]");
56 return fd;
57 }
58
59 private FieldDocument createFieldDoc2() {
60 FieldDocument fd = new FieldDocument();
61 fd.addString("ID", "doc-2");
62 fd.addTV("base",
63 "text",
64 "[(0-1)s:b|s:c|_1#0-1|<>:s#0-2$<i>1]" +
65 "[(1-2)s:c|_2#1-2]" +
66 "[(2-3)s:b|_3#2-3|<>:s#2-3$<i>3]" +
67 "[(3-4)s:c|_4#3-4|<>:s#3-4$<i>4]" +
68 "[(4-5)s:c|_5#4-5|<>:s#4-5$<i>5]" +
69 "[(5-6)s:b|_6#5-6]" +
70 "[(6-7)s:c|_7#6-7|<>:s#6-7$<i>7]");
71 return fd;
72 }
73
74 private FieldDocument createFieldDoc3() {
75 FieldDocument fd = new FieldDocument();
76 fd.addString("ID", "doc-3");
77 fd.addTV("base",
78 "text",
79 "[(0-1)s:a|_1#0-1|<>:s#0-2$<i>1]" +
80 "[(1-2)s:d|_2#1-2|<>:s#1-2$<i>3]" +
81 "[(2-3)s:e|_3#2-3]");
82 return fd;
83 }
84
85
86 @Test
87 public void testCase1() throws IOException{
88 ki = new KorapIndex();
89 ki.addDoc(createFieldDoc0());
90 ki.commit();
91
92 SpanQuery sq, sq2;
93 // Quantifier only
Eliza Margarethad4693462014-03-17 13:16:18 +000094 sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:c")),1,2, true);
Eliza Margarethad28469f2014-03-10 12:42:21 +000095 kr = ki.search(sq, (short) 10);
96 // 0-1, 2-3, 2-4, 3-4, 5-6
97 assertEquals(5,kr.getTotalResults());
98
99 // ec{1,2}
100 sq = new SpanNextQuery(
101 new SpanTermQuery(new Term("base", "s:e")),
Eliza Margarethad4693462014-03-17 13:16:18 +0000102 new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:c")),1,2, true)
Eliza Margarethad28469f2014-03-10 12:42:21 +0000103 );
104
105 kr = ki.search(sq, (short) 10);
106 // 1-3, 1-4, 4-6
107 assertEquals(3,kr.getTotalResults());
108
109 // ec{1,2}d
110 sq2 = new SpanNextQuery(sq, new SpanTermQuery(new Term("base", "s:d")));
111 kr = ki.search(sq2, (short) 10);
112 assertEquals(2,kr.getTotalResults());
113 assertEquals(1, kr.getMatch(0).startPos);
114 assertEquals(5, kr.getMatch(0).endPos);
115 assertEquals(4, kr.getMatch(1).startPos);
116 assertEquals(7, kr.getMatch(1).endPos);
117
118 // Multiple documents
119 ki.addDoc(createFieldDoc1());
120 ki.commit();
121 kr = ki.search(sq2, (short) 10);
122 assertEquals(5,kr.getTotalResults());
123 }
124
125 /** Skip to */
126 @Test
127 public void testCase2() throws IOException{
128 ki = new KorapIndex();
129 ki.addDoc(createFieldDoc0());
130 ki.addDoc(createFieldDoc3());
131 ki.addDoc(createFieldDoc2());
132 ki.addDoc(createFieldDoc1());
133 ki.commit();
134
135 SpanQuery sq;
Eliza Margarethad4693462014-03-17 13:16:18 +0000136 sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:c")),2,2, true);
Eliza Margarethad28469f2014-03-10 12:42:21 +0000137 kr = ki.search(sq, (short) 10);
138 assertEquals(4,kr.getTotalResults());
139
140 kr = ki.search(sq, (short) 10);
141 sq = new SpanNextQuery(
142 new SpanTermQuery(new Term("base", "s:e")),
Eliza Margarethad4693462014-03-17 13:16:18 +0000143 new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:c")),2,2, true)
Eliza Margarethad28469f2014-03-10 12:42:21 +0000144 );
145
146 kr = ki.search(sq, (short) 10);
147 assertEquals(2,kr.getTotalResults());
148 assertEquals(3,kr.getMatch(1).getLocalDocID());
149
150 }
151
152 /** OR */
153 @Test
154 public void testCase3() throws IOException{
155 ki = new KorapIndex();
156 ki.addDoc(createFieldDoc0());
157 ki.commit();
158
159 SpanQuery sq,sq2;
160 // ec{1,2}
161 sq = new SpanNextQuery(
162 new SpanTermQuery(new Term("base", "s:e")),
163 new SpanOrQuery(
Eliza Margarethad4693462014-03-17 13:16:18 +0000164 new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:c")),1,1, true),
165 new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:b")),1,1, true)
Eliza Margarethad28469f2014-03-10 12:42:21 +0000166 )
167 );
168 kr = ki.search(sq, (short) 10);
169 assertEquals(3,kr.getTotalResults());
170 assertEquals(1, kr.getMatch(0).startPos);
171 assertEquals(3, kr.getMatch(0).endPos);
172 assertEquals(4, kr.getMatch(1).startPos);
173 assertEquals(6, kr.getMatch(1).endPos);
174 assertEquals(7, kr.getMatch(2).startPos);
175 assertEquals(9, kr.getMatch(2).endPos);
176
177// System.out.print(kr.getTotalResults()+"\n");
178// for (int i=0; i< kr.getTotalResults(); i++){
179// System.out.println(
180// kr.match(i).getLocalDocID()+" "+
181// kr.match(i).startPos + " " +
182// kr.match(i).endPos
183// );
184// }
185 }
186}