blob: e0349dbedd048ca58b03e8865190a0c9a416a6ea [file] [log] [blame]
Eliza Margaretha9d1ebeb2014-08-12 11:42:58 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
5import java.io.IOException;
6
7import org.apache.lucene.index.Term;
8import org.apache.lucene.search.spans.SpanTermQuery;
9import org.junit.Test;
10
Nils Diewalda14ecd62015-02-26 21:00:20 +000011import de.ids_mannheim.korap.KrillIndex;
margaretha3865e522016-05-02 13:24:51 +020012import de.ids_mannheim.korap.response.Match;
Nils Diewald884dbcf2015-02-27 17:02:28 +000013import de.ids_mannheim.korap.response.Result;
Eliza Margaretha9d1ebeb2014-08-12 11:42:58 +000014import de.ids_mannheim.korap.query.DistanceConstraint;
margaretha3865e522016-05-02 13:24:51 +020015import de.ids_mannheim.korap.query.SpanClassQuery;
Eliza Margaretha9d1ebeb2014-08-12 11:42:58 +000016import de.ids_mannheim.korap.query.SpanDistanceQuery;
Akronf796b862016-04-29 18:51:25 +020017import de.ids_mannheim.korap.query.SpanElementQuery;
margaretha3865e522016-05-02 13:24:51 +020018import de.ids_mannheim.korap.query.SpanFocusQuery;
Nils Diewald5380aa62014-09-01 13:21:07 +000019import de.ids_mannheim.korap.query.SpanSubspanQuery;
Eliza Margaretha9d1ebeb2014-08-12 11:42:58 +000020
Akronf796b862016-04-29 18:51:25 +020021/*
22 * @author margaretha
23 * @author diewald
24 */
Eliza Margaretha9d1ebeb2014-08-12 11:42:58 +000025public class TestSubSpanIndex {
Eliza Margarethaafe98122015-01-23 17:37:57 +000026
Nils Diewald884dbcf2015-02-27 17:02:28 +000027 Result kr;
Nils Diewalda14ecd62015-02-26 21:00:20 +000028 KrillIndex ki;
Eliza Margarethaafe98122015-01-23 17:37:57 +000029
Akronc12567c2016-06-03 00:40:52 +020030
Eliza Margaretha58ee0bf2015-01-26 16:37:31 +000031 public TestSubSpanIndex () throws IOException {
Nils Diewalda14ecd62015-02-26 21:00:20 +000032 ki = new KrillIndex();
Nils Diewaldbb33da22015-03-04 16:24:25 +000033 ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
Eliza Margarethaafe98122015-01-23 17:37:57 +000034 ki.commit();
35 }
36
Akronc12567c2016-06-03 00:40:52 +020037
Eliza Margaretha9d1ebeb2014-08-12 11:42:58 +000038 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +000039 public void testCase1 () throws IOException {
Eliza Margaretha6f989202016-10-14 21:48:29 +020040 SpanDistanceQuery sdq = new SpanDistanceQuery(
41 new SpanTermQuery(new Term("tokens", "tt/p:NN")),
42 new SpanTermQuery(new Term("tokens", "tt/p:VAFIN")),
43 new DistanceConstraint(5, 5, true, false), true);
Eliza Margarethaafe98122015-01-23 17:37:57 +000044
45 SpanSubspanQuery ssq = new SpanSubspanQuery(sdq, 0, 2, true);
46 kr = ki.search(ssq, (short) 10);
47
48 assertEquals((long) 8, kr.getTotalResults());
49 assertEquals(35, kr.getMatch(0).getStartPos());
Eliza Margarethab21e9fb2014-09-30 17:46:27 +000050 assertEquals(37, kr.getMatch(0).getEndPos());
51 assertEquals(179, kr.getMatch(1).getStartPos());
52 assertEquals(181, kr.getMatch(1).getEndPos());
Eliza Margarethaafe98122015-01-23 17:37:57 +000053
54 ssq = new SpanSubspanQuery(sdq, -2, 2, true);
55 kr = ki.search(ssq, (short) 10);
56
57 assertEquals(39, kr.getMatch(0).getStartPos());
Eliza Margarethab21e9fb2014-09-30 17:46:27 +000058 assertEquals(41, kr.getMatch(0).getEndPos());
59 assertEquals(183, kr.getMatch(1).getStartPos());
60 assertEquals(185, kr.getMatch(1).getEndPos());
Eliza Margarethaafe98122015-01-23 17:37:57 +000061
62 /*
Nils Diewald392bcf32015-02-26 20:01:17 +000063 * for (Match km : kr.getMatches()){
Eliza Margarethaafe98122015-01-23 17:37:57 +000064 * System.out.println(km.getStartPos() +","+km.getEndPos()
65 * +km.getSnippetBrackets()); }
66 */
67 }
68
Nils Diewaldbb33da22015-03-04 16:24:25 +000069
Eliza Margarethaafe98122015-01-23 17:37:57 +000070 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +000071 public void testCase2 () {
Eliza Margaretha6f989202016-10-14 21:48:29 +020072 SpanDistanceQuery sdq = new SpanDistanceQuery(
73 new SpanTermQuery(new Term("tokens", "tt/p:NN")),
74 new SpanTermQuery(new Term("tokens", "tt/p:VAFIN")),
75 new DistanceConstraint(5, 5, true, false), true);
Eliza Margaretha58ee0bf2015-01-26 16:37:31 +000076
77 // the subspan length is longer than the span length
Eliza Margarethaafe98122015-01-23 17:37:57 +000078 SpanSubspanQuery ssq = new SpanSubspanQuery(sdq, 0, 7, true);
79 kr = ki.search(ssq, (short) 10);
80
81 assertEquals(35, kr.getMatch(0).getStartPos());
82 assertEquals(41, kr.getMatch(0).getEndPos());
83 assertEquals(179, kr.getMatch(1).getStartPos());
84 assertEquals(185, kr.getMatch(1).getEndPos());
Eliza Margaretha58ee0bf2015-01-26 16:37:31 +000085
86 // the subspan start is before the span start
Eliza Margarethaafe98122015-01-23 17:37:57 +000087 ssq = new SpanSubspanQuery(sdq, -7, 4, true);
88 kr = ki.search(ssq, (short) 10);
Eliza Margaretha58ee0bf2015-01-26 16:37:31 +000089
Eliza Margarethaafe98122015-01-23 17:37:57 +000090 assertEquals((long) 8, kr.getTotalResults());
91 assertEquals(35, kr.getMatch(0).getStartPos());
92 assertEquals(39, kr.getMatch(0).getEndPos());
93 assertEquals(179, kr.getMatch(1).getStartPos());
94 assertEquals(183, kr.getMatch(1).getEndPos());
Eliza Margaretha58ee0bf2015-01-26 16:37:31 +000095
96 }
97
Nils Diewaldbb33da22015-03-04 16:24:25 +000098
Eliza Margaretha58ee0bf2015-01-26 16:37:31 +000099 // Length 0
100 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +0000101 public void testCase3 () {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200102 SpanDistanceQuery sdq = new SpanDistanceQuery(
103 new SpanTermQuery(new Term("tokens", "tt/p:NN")),
104 new SpanTermQuery(new Term("tokens", "tt/p:VAFIN")),
105 new DistanceConstraint(5, 5, true, false), true);
Eliza Margaretha58ee0bf2015-01-26 16:37:31 +0000106
107 SpanSubspanQuery ssq = new SpanSubspanQuery(sdq, 3, 0, true);
108 kr = ki.search(ssq, (short) 10);
109
110 assertEquals(38, kr.getMatch(0).getStartPos());
111 assertEquals(41, kr.getMatch(0).getEndPos());
112 assertEquals(182, kr.getMatch(1).getStartPos());
113 assertEquals(185, kr.getMatch(1).getEndPos());
114
115 ssq = new SpanSubspanQuery(sdq, -2, 0, true);
116 kr = ki.search(ssq, (short) 10);
117
118 assertEquals(39, kr.getMatch(0).getStartPos());
119 assertEquals(41, kr.getMatch(0).getEndPos());
120 assertEquals(183, kr.getMatch(1).getStartPos());
121 assertEquals(185, kr.getMatch(1).getEndPos());
122
Nils Diewald392bcf32015-02-26 20:01:17 +0000123 // for (Match km : kr.getMatches()) {
Eliza Margaretha58ee0bf2015-01-26 16:37:31 +0000124 // System.out.println(km.getStartPos() + "," + km.getEndPos()
125 // + km.getSnippetBrackets());
126 // }
Eliza Margarethaafe98122015-01-23 17:37:57 +0000127 }
128
Akronc12567c2016-06-03 00:40:52 +0200129
Akronf796b862016-04-29 18:51:25 +0200130 // Negative SubSpanQuery
131 @Test
132 public void testCaseNegativeSubSpan () throws IOException {
133 KrillIndex ki = new KrillIndex();
134 FieldDocument fd = new FieldDocument();
135
Akronc12567c2016-06-03 00:40:52 +0200136 fd.addTV("base",
Akronf796b862016-04-29 18:51:25 +0200137 // <x>a <x>b </x>c </x>
138 "a b c ",
Akronc12567c2016-06-03 00:40:52 +0200139 "[(0-1)s:a|i:a|_0$<i>0<i>2|<>:x$<b>64<i>0<i>6<i>3<b>0]"
140 + "[(1-2)s:b|i:b|_1$<i>2<i>4|<>:x$<b>64<i>2<i>4<i>2<b>1]"
141 + "[(3-4)s:c|i:c|_2$<i>4<i>6]");
Akronf796b862016-04-29 18:51:25 +0200142 ki.addDoc(fd);
143 ki.commit();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200144 SpanSubspanQuery ssq = new SpanSubspanQuery(
145 new SpanElementQuery("base", "x"), -1, 1, true);
Akronf796b862016-04-29 18:51:25 +0200146 kr = ki.search(ssq, (short) 10);
Akron9c04ce22016-05-02 16:03:21 +0200147
148 /*
margaretha3865e522016-05-02 13:24:51 +0200149 for (Match km : kr.getMatches()) {
150 System.out.println(km.getStartPos() + "," + km.getEndPos()
151 + km.getSnippetBrackets());
152 }
Akron9c04ce22016-05-02 16:03:21 +0200153 */
154
Akronf796b862016-04-29 18:51:25 +0200155 assertEquals(2, kr.getTotalResults());
Akron9c04ce22016-05-02 16:03:21 +0200156 assertEquals(1, kr.getMatch(0).getStartPos());
157 assertEquals(2, kr.getMatch(0).getEndPos());
Akronf05fde62016-08-03 23:46:17 +0200158 assertEquals("a [[b ]]c ", kr.getMatch(0).getSnippetBrackets());
Akron9c04ce22016-05-02 16:03:21 +0200159
160 assertEquals(2, kr.getMatch(1).getStartPos());
161 assertEquals(3, kr.getMatch(1).getEndPos());
Akronf05fde62016-08-03 23:46:17 +0200162 assertEquals("a b [[c ]]", kr.getMatch(1).getSnippetBrackets());
Akronf796b862016-04-29 18:51:25 +0200163 };
164};