blob: 1bd1dcdbb46fbc9b3ceab43a3631874718e72c1f [file] [log] [blame]
Nils Diewaldcc7c0b32014-07-31 19:58:22 +00001package de.ids_mannheim.korap.index;
2
3import static org.junit.Assert.assertEquals;
4
Nils Diewaldcc7c0b32014-07-31 19:58:22 +00005import org.apache.lucene.search.spans.SpanQuery;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +00006import org.junit.Test;
7import org.junit.runner.RunWith;
8import org.junit.runners.JUnit4;
9import org.slf4j.Logger;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000010
Akron352dae82016-08-05 17:57:51 +020011import de.ids_mannheim.korap.Krill;
Nils Diewalda14ecd62015-02-26 21:00:20 +000012import de.ids_mannheim.korap.KrillIndex;
Akron352dae82016-08-05 17:57:51 +020013import de.ids_mannheim.korap.KrillQuery;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000014import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
15import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
Akron352dae82016-08-05 17:57:51 +020016import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
margaretha4f995582015-12-14 14:14:34 +010017import de.ids_mannheim.korap.response.Result;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000018
19@RunWith(JUnit4.class)
20public class TestSegmentNegationIndex {
21 private SpanQuery sq;
Nils Diewalda14ecd62015-02-26 21:00:20 +000022 private KrillIndex ki;
Nils Diewald884dbcf2015-02-27 17:02:28 +000023 private Result kr;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000024 private FieldDocument fd;
Nils Diewaldbb33da22015-03-04 16:24:25 +000025 private Logger log;
26
27
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000028 @Test
Nils Diewaldbb33da22015-03-04 16:24:25 +000029 public void testcaseNegation () throws Exception {
30 ki = new KrillIndex();
31 ki.addDoc(createFieldDoc0());
32 ki.addDoc(createFieldDoc1());
33 ki.addDoc(createFieldDoc2());
34 ki.addDoc(createFieldDoc3());
35 ki.commit();
Akron352dae82016-08-05 17:57:51 +020036 SpanSegmentQueryWrapper ssqw = new SpanSegmentQueryWrapper("tokens",
Nils Diewaldbb33da22015-03-04 16:24:25 +000037 "s:b");
38 ssqw.with("s:c");
Akron352dae82016-08-05 17:57:51 +020039 SpanSequenceQueryWrapper sqw = new SpanSequenceQueryWrapper("tokens",
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 ssqw).append("s:d");
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000041
Nils Diewaldbb33da22015-03-04 16:24:25 +000042 kr = ki.search(sqw.toQuery(), (short) 10);
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000043
Nils Diewaldbb33da22015-03-04 16:24:25 +000044 assertEquals("totalResults", kr.getTotalResults(), 2);
45 // Match #0
46 assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
47 assertEquals("StartPos (0)", 4, kr.getMatch(0).startPos);
48 assertEquals("EndPos (0)", 6, kr.getMatch(0).endPos);
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000049
Nils Diewaldbb33da22015-03-04 16:24:25 +000050 // Match #1 in the other atomic index
51 assertEquals("doc-number", 3, kr.getMatch(1).getLocalDocID());
52 assertEquals("StartPos (0)", 0, kr.getMatch(1).startPos);
53 assertEquals("EndPos (0)", 2, kr.getMatch(1).endPos);
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000054
Akron352dae82016-08-05 17:57:51 +020055 ssqw = new SpanSegmentQueryWrapper("tokens", "s:b");
Nils Diewaldbb33da22015-03-04 16:24:25 +000056 ssqw.without("s:c");
Akron352dae82016-08-05 17:57:51 +020057 sqw = new SpanSequenceQueryWrapper("tokens", ssqw).append("s:a");
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000058
Nils Diewaldbb33da22015-03-04 16:24:25 +000059 kr = ki.search(sqw.toQuery(), (short) 10);
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000060
Nils Diewaldbb33da22015-03-04 16:24:25 +000061 assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
62 assertEquals("StartPos (0)", 2, kr.getMatch(0).startPos);
63 assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000064
Nils Diewaldbb33da22015-03-04 16:24:25 +000065 assertEquals("doc-number", 1, kr.getMatch(1).getLocalDocID());
66 assertEquals("StartPos (1)", 1, kr.getMatch(1).startPos);
67 assertEquals("EndPos (1)", 3, kr.getMatch(1).endPos);
68
69 assertEquals("doc-number", 1, kr.getMatch(2).getLocalDocID());
70 assertEquals("StartPos (2)", 2, kr.getMatch(2).startPos);
71 assertEquals("EndPos (2)", 4, kr.getMatch(2).endPos);
72
73 assertEquals("doc-number", 2, kr.getMatch(3).getLocalDocID());
74 assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos);
75 assertEquals("EndPos (3)", 3, kr.getMatch(3).endPos);
Nils Diewaldcc7c0b32014-07-31 19:58:22 +000076 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000077
78
Akron352dae82016-08-05 17:57:51 +020079 @Test
80 public void testcaseWarnings () throws Exception {
81 ki = new KrillIndex();
82 ki.addDoc(createFieldDoc0());
83 ki.addDoc(createFieldDoc1());
84 ki.addDoc(createFieldDoc2());
85 ki.addDoc(createFieldDoc3());
86 ki.commit();
87
88 kr = ki.search(new Krill(
89 "{\"query\" : { \"@type\" : \"koral:token\", \"wrap\" : { \"@type\" : \"koral:term\", \"key\" : \"a\", \"flags\" : [\"caseInsensitive\"], \"layer\" : \"orth\", \"match\" : \"match:eq\" }}}"));
90 assertEquals("totalResults", kr.getTotalResults(), 6);
91 assertEquals("Warning", kr.hasWarnings(), true);
92 assertEquals("Warning text", kr.getWarning(0).getMessage(),
93 "Flag is unknown");
Akron62ca2cf2016-08-05 19:55:52 +020094 assertEquals("Warning text", kr.getWarning(0).toJsonString(),
95 "[748,\"Flag is unknown\",\"caseInsensitive\"]");
Akron352dae82016-08-05 17:57:51 +020096
97 // Negation of segment
98 kr = ki.search(new Krill(
99 "{\"query\" : { \"@type\" : \"koral:token\", \"wrap\" : { \"@type\" : \"koral:term\", \"key\" : \"a\", \"flags\" : [\"flags:caseInsensitive\"], \"layer\" : \"orth\", \"match\" : \"match:ne\" }}}"));
100
101 assertEquals("totalResults", kr.getTotalResults(), 4);
102 assertEquals("Warning", kr.hasWarnings(), true);
103 assertEquals("Warning text", kr.getWarning(0).getMessage(),
104 "Exclusivity of query is ignored");
Akron62ca2cf2016-08-05 19:55:52 +0200105
Akron9ca35892016-08-06 12:50:27 +0200106 // Flag parameter injection
Akron62ca2cf2016-08-05 19:55:52 +0200107 kr = ki.search(new Krill(
108 "{\"query\" : { \"@type\" : \"koral:token\", \"wrap\" : { \"@type\" : \"koral:term\", \"key\" : \"a\", \"flags\" : [{ \"injection\" : true }], \"layer\" : \"orth\", \"match\" : \"match:ne\" }}}"));
109
110 assertEquals("totalResults", kr.getTotalResults(), 6);
111 assertEquals("Warning", kr.hasWarnings(), true);
112 assertEquals("Warning text", kr.getWarning(0).getMessage(),
113 "Flag is unknown");
114 assertEquals("Warning text", kr.getWarning(0).toJsonString(),
115 "[748,\"Flag is unknown\"]");
Akron352dae82016-08-05 17:57:51 +0200116 };
117
118
Nils Diewaldbb33da22015-03-04 16:24:25 +0000119 private FieldDocument createFieldDoc0 () {
120 fd = new FieldDocument();
121 fd.addString("ID", "doc-0");
Akron352dae82016-08-05 17:57:51 +0200122 fd.addTV("tokens", "bcbabd", "[(0-1)s:b|i:b|_1$<i>0<i>1]"
margaretha4f995582015-12-14 14:14:34 +0100123 + "[(1-2)s:c|i:c|s:b|_2$<i>1<i>2]"
124 + "[(2-3)s:b|i:b|_3$<i>2<i>3|<>:e$<b>64<i>2<i>4<i>4<b>0]"
125 + "[(3-4)s:a|i:a|_4$<i>3<i>4|<>:e$<b>64<i>3<i>5<i>5<b>0|"
126 + "<>:e2$<b>64<i>3<i>5<i>5<b>0]"
127 + "[(4-5)s:b|i:b|s:c|_5$<i>4<i>5]"
128 + "[(5-6)s:d|i:d|_6$<i>5<i>6|<>:e2$<b>64<i>5<i>6<i>6<b>0]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000129 return fd;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +0000130 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000131
132
133 private FieldDocument createFieldDoc1 () {
134 fd = new FieldDocument();
135 fd.addString("ID", "doc-1");
Akron352dae82016-08-05 17:57:51 +0200136 fd.addTV("tokens", "babaa", "[(0-1)s:b|i:b|s:c|_1$<i>0<i>1]"
margaretha4f995582015-12-14 14:14:34 +0100137 + "[(1-2)s:a|i:a|s:b|_2$<i>1<i>2|<>:e$<b>64<i>1<i>3<i>3<b>0]"
138 + "[(2-3)s:b|i:b|s:a|_3$<i>2<i>3]"
139 + "[(3-4)s:a|i:a|_4$<i>3<i>4]" + "[(4-5)s:a|i:a|_5$<i>4<i>5]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000140 return fd;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +0000141 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000142
143
144 private FieldDocument createFieldDoc2 () {
145 fd = new FieldDocument();
146 fd.addString("ID", "doc-2");
Eliza Margaretha6f989202016-10-14 21:48:29 +0200147 fd.addTV("tokens", "bdb",
148 "[(0-1)s:b|i:b|_1$<i>0<i>1]" + "[(1-2)s:d|i:d|s:b|_2$<i>1<i>2]"
149 + "[(2-3)s:b|i:b|s:a|_3$<i>2<i>3]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000150 return fd;
151 }
152
153
154 private FieldDocument createFieldDoc3 () {
155 fd = new FieldDocument();
156 fd.addString("ID", "doc-3");
Akron352dae82016-08-05 17:57:51 +0200157 fd.addTV("tokens", "bdb", "[(0-1)s:b|i:b|s:c|_1$<i>0<i>1]"
margaretha4f995582015-12-14 14:14:34 +0100158 + "[(1-2)s:d|_2$<i>1<i>2]" + "[(2-3)s:d|i:d|_3$<i>2<i>3]");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000159 return fd;
Nils Diewaldcc7c0b32014-07-31 19:58:22 +0000160 }
161}