| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.search.spans.SpanTermQuery; |
| 9 | import org.junit.Test; |
| 10 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 11 | import de.ids_mannheim.korap.KrillIndex; |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 12 | import de.ids_mannheim.korap.response.Match; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 13 | import de.ids_mannheim.korap.response.Result; |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.query.DistanceConstraint; |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 15 | import de.ids_mannheim.korap.query.SpanClassQuery; |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| Akron | f796b86 | 2016-04-29 18:51:25 +0200 | [diff] [blame] | 17 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 18 | import de.ids_mannheim.korap.query.SpanFocusQuery; |
| Nils Diewald | 5380aa6 | 2014-09-01 13:21:07 +0000 | [diff] [blame] | 19 | import de.ids_mannheim.korap.query.SpanSubspanQuery; |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 20 | |
| Akron | f796b86 | 2016-04-29 18:51:25 +0200 | [diff] [blame] | 21 | /* |
| 22 | * @author margaretha |
| 23 | * @author diewald |
| 24 | */ |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 25 | public class TestSubSpanIndex { |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 26 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 27 | Result kr; |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 28 | KrillIndex ki; |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 29 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 30 | |
| Eliza Margaretha | 58ee0bf | 2015-01-26 16:37:31 +0000 | [diff] [blame] | 31 | public TestSubSpanIndex () throws IOException { |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 32 | ki = new KrillIndex(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 33 | ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true); |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 34 | ki.commit(); |
| 35 | } |
| 36 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 37 | |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 38 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 39 | public void testCase1 () throws IOException { |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 40 | SpanDistanceQuery sdq = new SpanDistanceQuery( |
| 41 | new SpanTermQuery(new Term("tokens", "tt/p:NN")), |
| 42 | new SpanTermQuery(new Term("tokens", "tt/p:VAFIN")), |
| 43 | new DistanceConstraint(5, 5, true, false), true); |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 44 | |
| 45 | SpanSubspanQuery ssq = new SpanSubspanQuery(sdq, 0, 2, true); |
| 46 | kr = ki.search(ssq, (short) 10); |
| 47 | |
| 48 | assertEquals((long) 8, kr.getTotalResults()); |
| 49 | assertEquals(35, kr.getMatch(0).getStartPos()); |
| Eliza Margaretha | b21e9fb | 2014-09-30 17:46:27 +0000 | [diff] [blame] | 50 | assertEquals(37, kr.getMatch(0).getEndPos()); |
| 51 | assertEquals(179, kr.getMatch(1).getStartPos()); |
| 52 | assertEquals(181, kr.getMatch(1).getEndPos()); |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 53 | |
| 54 | ssq = new SpanSubspanQuery(sdq, -2, 2, true); |
| 55 | kr = ki.search(ssq, (short) 10); |
| 56 | |
| 57 | assertEquals(39, kr.getMatch(0).getStartPos()); |
| Eliza Margaretha | b21e9fb | 2014-09-30 17:46:27 +0000 | [diff] [blame] | 58 | assertEquals(41, kr.getMatch(0).getEndPos()); |
| 59 | assertEquals(183, kr.getMatch(1).getStartPos()); |
| 60 | assertEquals(185, kr.getMatch(1).getEndPos()); |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 61 | |
| 62 | /* |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 63 | * for (Match km : kr.getMatches()){ |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 64 | * System.out.println(km.getStartPos() +","+km.getEndPos() |
| 65 | * +km.getSnippetBrackets()); } |
| 66 | */ |
| 67 | } |
| 68 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 69 | |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 70 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 71 | public void testCase2 () { |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 72 | SpanDistanceQuery sdq = new SpanDistanceQuery( |
| 73 | new SpanTermQuery(new Term("tokens", "tt/p:NN")), |
| 74 | new SpanTermQuery(new Term("tokens", "tt/p:VAFIN")), |
| 75 | new DistanceConstraint(5, 5, true, false), true); |
| Eliza Margaretha | 58ee0bf | 2015-01-26 16:37:31 +0000 | [diff] [blame] | 76 | |
| 77 | // the subspan length is longer than the span length |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 78 | SpanSubspanQuery ssq = new SpanSubspanQuery(sdq, 0, 7, true); |
| 79 | kr = ki.search(ssq, (short) 10); |
| 80 | |
| 81 | assertEquals(35, kr.getMatch(0).getStartPos()); |
| 82 | assertEquals(41, kr.getMatch(0).getEndPos()); |
| 83 | assertEquals(179, kr.getMatch(1).getStartPos()); |
| 84 | assertEquals(185, kr.getMatch(1).getEndPos()); |
| Eliza Margaretha | 58ee0bf | 2015-01-26 16:37:31 +0000 | [diff] [blame] | 85 | |
| 86 | // the subspan start is before the span start |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 87 | ssq = new SpanSubspanQuery(sdq, -7, 4, true); |
| 88 | kr = ki.search(ssq, (short) 10); |
| Eliza Margaretha | 58ee0bf | 2015-01-26 16:37:31 +0000 | [diff] [blame] | 89 | |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 90 | assertEquals((long) 8, kr.getTotalResults()); |
| 91 | assertEquals(35, kr.getMatch(0).getStartPos()); |
| 92 | assertEquals(39, kr.getMatch(0).getEndPos()); |
| 93 | assertEquals(179, kr.getMatch(1).getStartPos()); |
| 94 | assertEquals(183, kr.getMatch(1).getEndPos()); |
| Eliza Margaretha | 58ee0bf | 2015-01-26 16:37:31 +0000 | [diff] [blame] | 95 | |
| 96 | } |
| 97 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 98 | |
| Eliza Margaretha | 58ee0bf | 2015-01-26 16:37:31 +0000 | [diff] [blame] | 99 | // Length 0 |
| 100 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 101 | public void testCase3 () { |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 102 | SpanDistanceQuery sdq = new SpanDistanceQuery( |
| 103 | new SpanTermQuery(new Term("tokens", "tt/p:NN")), |
| 104 | new SpanTermQuery(new Term("tokens", "tt/p:VAFIN")), |
| 105 | new DistanceConstraint(5, 5, true, false), true); |
| Eliza Margaretha | 58ee0bf | 2015-01-26 16:37:31 +0000 | [diff] [blame] | 106 | |
| 107 | SpanSubspanQuery ssq = new SpanSubspanQuery(sdq, 3, 0, true); |
| 108 | kr = ki.search(ssq, (short) 10); |
| 109 | |
| 110 | assertEquals(38, kr.getMatch(0).getStartPos()); |
| 111 | assertEquals(41, kr.getMatch(0).getEndPos()); |
| 112 | assertEquals(182, kr.getMatch(1).getStartPos()); |
| 113 | assertEquals(185, kr.getMatch(1).getEndPos()); |
| 114 | |
| 115 | ssq = new SpanSubspanQuery(sdq, -2, 0, true); |
| 116 | kr = ki.search(ssq, (short) 10); |
| 117 | |
| 118 | assertEquals(39, kr.getMatch(0).getStartPos()); |
| 119 | assertEquals(41, kr.getMatch(0).getEndPos()); |
| 120 | assertEquals(183, kr.getMatch(1).getStartPos()); |
| 121 | assertEquals(185, kr.getMatch(1).getEndPos()); |
| 122 | |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 123 | // for (Match km : kr.getMatches()) { |
| Eliza Margaretha | 58ee0bf | 2015-01-26 16:37:31 +0000 | [diff] [blame] | 124 | // System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 125 | // + km.getSnippetBrackets()); |
| 126 | // } |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 127 | } |
| 128 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 129 | |
| Akron | f796b86 | 2016-04-29 18:51:25 +0200 | [diff] [blame] | 130 | // Negative SubSpanQuery |
| 131 | @Test |
| 132 | public void testCaseNegativeSubSpan () throws IOException { |
| 133 | KrillIndex ki = new KrillIndex(); |
| 134 | FieldDocument fd = new FieldDocument(); |
| 135 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 136 | fd.addTV("base", |
| Akron | f796b86 | 2016-04-29 18:51:25 +0200 | [diff] [blame] | 137 | // <x>a <x>b </x>c </x> |
| 138 | "a b c ", |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 139 | "[(0-1)s:a|i:a|_0$<i>0<i>2|<>:x$<b>64<i>0<i>6<i>3<b>0]" |
| 140 | + "[(1-2)s:b|i:b|_1$<i>2<i>4|<>:x$<b>64<i>2<i>4<i>2<b>1]" |
| 141 | + "[(3-4)s:c|i:c|_2$<i>4<i>6]"); |
| Akron | f796b86 | 2016-04-29 18:51:25 +0200 | [diff] [blame] | 142 | ki.addDoc(fd); |
| 143 | ki.commit(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 144 | SpanSubspanQuery ssq = new SpanSubspanQuery( |
| 145 | new SpanElementQuery("base", "x"), -1, 1, true); |
| Akron | f796b86 | 2016-04-29 18:51:25 +0200 | [diff] [blame] | 146 | kr = ki.search(ssq, (short) 10); |
| Akron | 9c04ce2 | 2016-05-02 16:03:21 +0200 | [diff] [blame] | 147 | |
| 148 | /* |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 149 | for (Match km : kr.getMatches()) { |
| 150 | System.out.println(km.getStartPos() + "," + km.getEndPos() |
| 151 | + km.getSnippetBrackets()); |
| 152 | } |
| Akron | 9c04ce2 | 2016-05-02 16:03:21 +0200 | [diff] [blame] | 153 | */ |
| 154 | |
| Akron | f796b86 | 2016-04-29 18:51:25 +0200 | [diff] [blame] | 155 | assertEquals(2, kr.getTotalResults()); |
| Akron | 9c04ce2 | 2016-05-02 16:03:21 +0200 | [diff] [blame] | 156 | assertEquals(1, kr.getMatch(0).getStartPos()); |
| 157 | assertEquals(2, kr.getMatch(0).getEndPos()); |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 158 | assertEquals("a [[b ]]c ", kr.getMatch(0).getSnippetBrackets()); |
| Akron | 9c04ce2 | 2016-05-02 16:03:21 +0200 | [diff] [blame] | 159 | |
| 160 | assertEquals(2, kr.getMatch(1).getStartPos()); |
| 161 | assertEquals(3, kr.getMatch(1).getEndPos()); |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 162 | assertEquals("a b [[c ]]", kr.getMatch(1).getSnippetBrackets()); |
| Akron | f796b86 | 2016-04-29 18:51:25 +0200 | [diff] [blame] | 163 | }; |
| 164 | }; |