blob: 9c303da4d7080df59321878ac46aba72379098d1 [file] [log] [blame]
Akron79d51d42017-02-13 21:28:27 +01001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4import static org.junit.Assert.fail;
5
6import java.io.IOException;
7
8import org.apache.lucene.index.Term;
9import org.apache.lucene.search.spans.SpanOrQuery;
10import org.apache.lucene.search.spans.SpanQuery;
11import org.apache.lucene.search.spans.SpanTermQuery;
12import org.junit.Ignore;
13import org.junit.Test;
14import org.junit.runner.RunWith;
15import org.junit.runners.JUnit4;
16
17import de.ids_mannheim.korap.KrillCollection;
18import de.ids_mannheim.korap.Krill;
19import de.ids_mannheim.korap.KrillIndex;
20import de.ids_mannheim.korap.query.QueryBuilder;
21import de.ids_mannheim.korap.query.SpanClassQuery;
22import de.ids_mannheim.korap.query.SpanElementQuery;
23import de.ids_mannheim.korap.query.SpanFocusQuery;
24import de.ids_mannheim.korap.query.SpanNextQuery;
25import de.ids_mannheim.korap.query.SpanWithinQuery;
Akron35c2d0d2017-02-15 11:16:22 +010026import de.ids_mannheim.korap.query.QueryBuilder;
27import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
Akron79d51d42017-02-13 21:28:27 +010028import de.ids_mannheim.korap.response.Match;
29import de.ids_mannheim.korap.response.Result;
30import de.ids_mannheim.korap.response.SearchContext;
31
32/*
33 * Retrieve pagebreak annotations
34 */
35
36@RunWith(JUnit4.class)
37public class TestPagebreakIndex {
38
39 @Test
Akron35c2d0d2017-02-15 11:16:22 +010040 public void indexExample1 () throws Exception {
Akron79d51d42017-02-13 21:28:27 +010041 KrillIndex ki = new KrillIndex();
42
43 // abcabcabac
44 FieldDocument fd = new FieldDocument();
45 fd.addTV("tokens", "abcabcabac",
46 "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10|~:base/s:pb$<i>528<i>0]" +
47 "[(1-2)s:b|i:b|_1$<i>1<i>2]" +
48 "[(2-3)s:c|i:c|_2$<i>2<i>3]" +
49 "[(3-4)s:a|i:a|_3$<i>3<i>4]" +
50 "[(4-5)s:b|i:b|_4$<i>4<i>5]" +
51 "[(5-6)s:c|i:c|_5$<i>5<i>6|~:base/s:pb$<i>529<i>5]" +
52 "[(6-7)s:a|i:a|_6$<i>6<i>7]" +
53 "[(7-8)s:b|i:b|_7$<i>7<i>8]" +
54 "[(8-9)s:a|i:a|_8$<i>8<i>9|~:base/s:pb$<i>530<i>8]" +
55 "[(9-10)s:c|i:c|_9$<i>9<i>10]");
56 ki.addDoc(fd);
57 ki.commit();
58
Akrond4b19332017-02-15 18:36:24 +010059 SpanQuery sq;
60 Result kr;
Akrond8f88612017-02-15 19:26:54 +010061
62 sq = new SpanTermQuery(new Term("tokens", "s:c"));
Akrond4b19332017-02-15 18:36:24 +010063 kr = ki.search(sq, (short) 10);
Akrond8f88612017-02-15 19:26:54 +010064
65 assertEquals(2, kr.getMatch(0).getStartPos());
66 assertEquals(3, kr.getMatch(0).getEndPos());
Akron79d51d42017-02-13 21:28:27 +010067 assertEquals(528, kr.getMatch(0).getStartPage());
68 assertEquals(-1, kr.getMatch(0).getEndPage());
69 assertEquals(
70 "snippetHTML",
71 "<span class=\"context-left\">"+
Akron35c2d0d2017-02-15 11:16:22 +010072 // "<span class=\"pb\" data-after=\"528\"></span>"+
Akron79d51d42017-02-13 21:28:27 +010073 "ab"+
74 "</span>"+
75 "<span class=\"match\">"+
76 "<mark>"+
77 "c"+
78 "</mark>"+
79 "</span>"+
80 "<span class=\"context-right\">"+
81 "ab"+
Akron35c2d0d2017-02-15 11:16:22 +010082 // "<span class=\"pb\" data-after=\"528\"></span>"+
Akron79d51d42017-02-13 21:28:27 +010083 "cab"+
Akron35c2d0d2017-02-15 11:16:22 +010084 // "<span class=\"pb\" data-after=\"528\"></span>"+
Akron79d51d42017-02-13 21:28:27 +010085 "a"+
86 "<span class=\"more\">"+
87 "</span>"+
88 "</span>",
89 kr.getMatch(0).getSnippetHTML());
Akrond8f88612017-02-15 19:26:54 +010090
91 /*
Akron35c2d0d2017-02-15 11:16:22 +010092
93 QueryBuilder qb = new QueryBuilder("tokens");
94 sq = qb.seq().append(
95 qb.repeat(
96 qb.seq().append(qb.seg("s:a")).append(qb.seg("s:b")).append(qb.seg("s:c")),
97 2
98 )
99 ).append(qb.seg("s:a"))
100 .toQuery();
101
102 assertEquals(sq.toString(), "spanNext(spanRepetition(spanNext(spanNext(tokens:s:a, tokens:s:b), tokens:s:c){2,2}), tokens:s:a)");
Akrond4b19332017-02-15 18:36:24 +0100103
104
105 kr = ki.search(sq, (short) 10);
106
107 assertEquals(528, kr.getMatch(0).getStartPage());
Akrond8f88612017-02-15 19:26:54 +0100108 assertEquals(529, kr.getMatch(0).getEndPage());
Akrond4b19332017-02-15 18:36:24 +0100109 assertEquals(
110 "snippetHTML",
111 "<span class=\"context-left\"></span>"+
112 "<span class=\"match\">"+
113 "<mark>"+
114 "<span class=\"pb\" data-after=\"528\"></span>"+
115 "abcab"+
Akrond8f88612017-02-15 19:26:54 +0100116 "<span class=\"pb\" data-after=\"529\"></span>"+
Akrond4b19332017-02-15 18:36:24 +0100117 "ca"+
118 "</mark>"+
119 "</span>"+
120 "<span class=\"context-right\">"+
121 "bac"+
122 "</span>",
123 kr.getMatch(0).getSnippetHTML());
Akrond8f88612017-02-15 19:26:54 +0100124 */
Akron79d51d42017-02-13 21:28:27 +0100125 };
126};