| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | import static org.junit.Assert.fail; |
| 5 | |
| 6 | import java.io.IOException; |
| 7 | |
| 8 | import org.apache.lucene.index.Term; |
| 9 | import org.apache.lucene.search.spans.SpanOrQuery; |
| 10 | import org.apache.lucene.search.spans.SpanQuery; |
| 11 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 12 | import org.junit.Ignore; |
| 13 | import org.junit.Test; |
| 14 | import org.junit.runner.RunWith; |
| 15 | import org.junit.runners.JUnit4; |
| 16 | |
| 17 | import de.ids_mannheim.korap.KrillCollection; |
| 18 | import de.ids_mannheim.korap.Krill; |
| 19 | import de.ids_mannheim.korap.KrillIndex; |
| 20 | import de.ids_mannheim.korap.query.QueryBuilder; |
| 21 | import de.ids_mannheim.korap.query.SpanClassQuery; |
| 22 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 23 | import de.ids_mannheim.korap.query.SpanFocusQuery; |
| 24 | import de.ids_mannheim.korap.query.SpanNextQuery; |
| 25 | import de.ids_mannheim.korap.query.SpanWithinQuery; |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 26 | import de.ids_mannheim.korap.query.QueryBuilder; |
| 27 | import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper; |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 28 | import de.ids_mannheim.korap.response.Match; |
| 29 | import de.ids_mannheim.korap.response.Result; |
| 30 | import de.ids_mannheim.korap.response.SearchContext; |
| 31 | |
| 32 | /* |
| 33 | * Retrieve pagebreak annotations |
| 34 | */ |
| 35 | |
| 36 | @RunWith(JUnit4.class) |
| 37 | public class TestPagebreakIndex { |
| 38 | |
| 39 | @Test |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 40 | public void indexExample1 () throws Exception { |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 41 | KrillIndex ki = new KrillIndex(); |
| 42 | |
| 43 | // abcabcabac |
| 44 | FieldDocument fd = new FieldDocument(); |
| 45 | fd.addTV("tokens", "abcabcabac", |
| 46 | "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10|~:base/s:pb$<i>528<i>0]" + |
| 47 | "[(1-2)s:b|i:b|_1$<i>1<i>2]" + |
| 48 | "[(2-3)s:c|i:c|_2$<i>2<i>3]" + |
| 49 | "[(3-4)s:a|i:a|_3$<i>3<i>4]" + |
| 50 | "[(4-5)s:b|i:b|_4$<i>4<i>5]" + |
| 51 | "[(5-6)s:c|i:c|_5$<i>5<i>6|~:base/s:pb$<i>529<i>5]" + |
| 52 | "[(6-7)s:a|i:a|_6$<i>6<i>7]" + |
| 53 | "[(7-8)s:b|i:b|_7$<i>7<i>8]" + |
| 54 | "[(8-9)s:a|i:a|_8$<i>8<i>9|~:base/s:pb$<i>530<i>8]" + |
| 55 | "[(9-10)s:c|i:c|_9$<i>9<i>10]"); |
| 56 | ki.addDoc(fd); |
| 57 | ki.commit(); |
| 58 | |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 59 | SpanQuery sq; |
| 60 | Result kr; |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 61 | |
| 62 | sq = new SpanTermQuery(new Term("tokens", "s:c")); |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 63 | kr = ki.search(sq, (short) 10); |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 64 | |
| 65 | assertEquals(2, kr.getMatch(0).getStartPos()); |
| 66 | assertEquals(3, kr.getMatch(0).getEndPos()); |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 67 | assertEquals(528, kr.getMatch(0).getStartPage()); |
| 68 | assertEquals(-1, kr.getMatch(0).getEndPage()); |
| 69 | assertEquals( |
| 70 | "snippetHTML", |
| 71 | "<span class=\"context-left\">"+ |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 72 | // "<span class=\"pb\" data-after=\"528\"></span>"+ |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 73 | "ab"+ |
| 74 | "</span>"+ |
| 75 | "<span class=\"match\">"+ |
| 76 | "<mark>"+ |
| 77 | "c"+ |
| 78 | "</mark>"+ |
| 79 | "</span>"+ |
| 80 | "<span class=\"context-right\">"+ |
| 81 | "ab"+ |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 82 | // "<span class=\"pb\" data-after=\"528\"></span>"+ |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 83 | "cab"+ |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 84 | // "<span class=\"pb\" data-after=\"528\"></span>"+ |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 85 | "a"+ |
| 86 | "<span class=\"more\">"+ |
| 87 | "</span>"+ |
| 88 | "</span>", |
| 89 | kr.getMatch(0).getSnippetHTML()); |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 90 | |
| 91 | /* |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 92 | |
| 93 | QueryBuilder qb = new QueryBuilder("tokens"); |
| 94 | sq = qb.seq().append( |
| 95 | qb.repeat( |
| 96 | qb.seq().append(qb.seg("s:a")).append(qb.seg("s:b")).append(qb.seg("s:c")), |
| 97 | 2 |
| 98 | ) |
| 99 | ).append(qb.seg("s:a")) |
| 100 | .toQuery(); |
| 101 | |
| 102 | assertEquals(sq.toString(), "spanNext(spanRepetition(spanNext(spanNext(tokens:s:a, tokens:s:b), tokens:s:c){2,2}), tokens:s:a)"); |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 103 | |
| 104 | |
| 105 | kr = ki.search(sq, (short) 10); |
| 106 | |
| 107 | assertEquals(528, kr.getMatch(0).getStartPage()); |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 108 | assertEquals(529, kr.getMatch(0).getEndPage()); |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 109 | assertEquals( |
| 110 | "snippetHTML", |
| 111 | "<span class=\"context-left\"></span>"+ |
| 112 | "<span class=\"match\">"+ |
| 113 | "<mark>"+ |
| 114 | "<span class=\"pb\" data-after=\"528\"></span>"+ |
| 115 | "abcab"+ |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 116 | "<span class=\"pb\" data-after=\"529\"></span>"+ |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 117 | "ca"+ |
| 118 | "</mark>"+ |
| 119 | "</span>"+ |
| 120 | "<span class=\"context-right\">"+ |
| 121 | "bac"+ |
| 122 | "</span>", |
| 123 | kr.getMatch(0).getSnippetHTML()); |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 124 | */ |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 125 | }; |
| 126 | }; |