| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| 3 | import static org.junit.Assert.assertEquals; |
| 4 | |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 5 | import org.apache.lucene.search.spans.SpanQuery; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 6 | import org.junit.Test; |
| 7 | import org.junit.runner.RunWith; |
| 8 | import org.junit.runners.JUnit4; |
| 9 | import org.slf4j.Logger; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 10 | |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 11 | import de.ids_mannheim.korap.Krill; |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 12 | import de.ids_mannheim.korap.KrillIndex; |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 13 | import de.ids_mannheim.korap.KrillQuery; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper; |
| 15 | import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper; |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 16 | import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper; |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 17 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 18 | |
| 19 | @RunWith(JUnit4.class) |
| 20 | public class TestSegmentNegationIndex { |
| 21 | private SpanQuery sq; |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 22 | private KrillIndex ki; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 23 | private Result kr; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 24 | private FieldDocument fd; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 25 | private Logger log; |
| 26 | |
| 27 | |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 28 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 29 | public void testcaseNegation () throws Exception { |
| 30 | ki = new KrillIndex(); |
| 31 | ki.addDoc(createFieldDoc0()); |
| 32 | ki.addDoc(createFieldDoc1()); |
| 33 | ki.addDoc(createFieldDoc2()); |
| 34 | ki.addDoc(createFieldDoc3()); |
| 35 | ki.commit(); |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 36 | SpanSegmentQueryWrapper ssqw = new SpanSegmentQueryWrapper("tokens", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 37 | "s:b"); |
| 38 | ssqw.with("s:c"); |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 39 | SpanSequenceQueryWrapper sqw = new SpanSequenceQueryWrapper("tokens", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 40 | ssqw).append("s:d"); |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 41 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 42 | kr = ki.search(sqw.toQuery(), (short) 10); |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 43 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 44 | assertEquals("totalResults", kr.getTotalResults(), 2); |
| 45 | // Match #0 |
| 46 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 47 | assertEquals("StartPos (0)", 4, kr.getMatch(0).startPos); |
| 48 | assertEquals("EndPos (0)", 6, kr.getMatch(0).endPos); |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 49 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 50 | // Match #1 in the other atomic index |
| 51 | assertEquals("doc-number", 3, kr.getMatch(1).getLocalDocID()); |
| 52 | assertEquals("StartPos (0)", 0, kr.getMatch(1).startPos); |
| 53 | assertEquals("EndPos (0)", 2, kr.getMatch(1).endPos); |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 54 | |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 55 | ssqw = new SpanSegmentQueryWrapper("tokens", "s:b"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 56 | ssqw.without("s:c"); |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 57 | sqw = new SpanSequenceQueryWrapper("tokens", ssqw).append("s:a"); |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 58 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 59 | kr = ki.search(sqw.toQuery(), (short) 10); |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 60 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 61 | assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID()); |
| 62 | assertEquals("StartPos (0)", 2, kr.getMatch(0).startPos); |
| 63 | assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos); |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 64 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 65 | assertEquals("doc-number", 1, kr.getMatch(1).getLocalDocID()); |
| 66 | assertEquals("StartPos (1)", 1, kr.getMatch(1).startPos); |
| 67 | assertEquals("EndPos (1)", 3, kr.getMatch(1).endPos); |
| 68 | |
| 69 | assertEquals("doc-number", 1, kr.getMatch(2).getLocalDocID()); |
| 70 | assertEquals("StartPos (2)", 2, kr.getMatch(2).startPos); |
| 71 | assertEquals("EndPos (2)", 4, kr.getMatch(2).endPos); |
| 72 | |
| 73 | assertEquals("doc-number", 2, kr.getMatch(3).getLocalDocID()); |
| 74 | assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos); |
| 75 | assertEquals("EndPos (3)", 3, kr.getMatch(3).endPos); |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 76 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 77 | |
| 78 | |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 79 | @Test |
| 80 | public void testcaseWarnings () throws Exception { |
| 81 | ki = new KrillIndex(); |
| 82 | ki.addDoc(createFieldDoc0()); |
| 83 | ki.addDoc(createFieldDoc1()); |
| 84 | ki.addDoc(createFieldDoc2()); |
| 85 | ki.addDoc(createFieldDoc3()); |
| 86 | ki.commit(); |
| 87 | |
| 88 | kr = ki.search(new Krill( |
| 89 | "{\"query\" : { \"@type\" : \"koral:token\", \"wrap\" : { \"@type\" : \"koral:term\", \"key\" : \"a\", \"flags\" : [\"caseInsensitive\"], \"layer\" : \"orth\", \"match\" : \"match:eq\" }}}")); |
| 90 | assertEquals("totalResults", kr.getTotalResults(), 6); |
| 91 | assertEquals("Warning", kr.hasWarnings(), true); |
| 92 | assertEquals("Warning text", kr.getWarning(0).getMessage(), |
| 93 | "Flag is unknown"); |
| Akron | 62ca2cf | 2016-08-05 19:55:52 +0200 | [diff] [blame] | 94 | assertEquals("Warning text", kr.getWarning(0).toJsonString(), |
| 95 | "[748,\"Flag is unknown\",\"caseInsensitive\"]"); |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 96 | |
| 97 | // Negation of segment |
| 98 | kr = ki.search(new Krill( |
| 99 | "{\"query\" : { \"@type\" : \"koral:token\", \"wrap\" : { \"@type\" : \"koral:term\", \"key\" : \"a\", \"flags\" : [\"flags:caseInsensitive\"], \"layer\" : \"orth\", \"match\" : \"match:ne\" }}}")); |
| 100 | |
| 101 | assertEquals("totalResults", kr.getTotalResults(), 4); |
| 102 | assertEquals("Warning", kr.hasWarnings(), true); |
| 103 | assertEquals("Warning text", kr.getWarning(0).getMessage(), |
| 104 | "Exclusivity of query is ignored"); |
| Akron | 62ca2cf | 2016-08-05 19:55:52 +0200 | [diff] [blame] | 105 | |
| Akron | 9ca3589 | 2016-08-06 12:50:27 +0200 | [diff] [blame] | 106 | // Flag parameter injection |
| Akron | 62ca2cf | 2016-08-05 19:55:52 +0200 | [diff] [blame] | 107 | kr = ki.search(new Krill( |
| 108 | "{\"query\" : { \"@type\" : \"koral:token\", \"wrap\" : { \"@type\" : \"koral:term\", \"key\" : \"a\", \"flags\" : [{ \"injection\" : true }], \"layer\" : \"orth\", \"match\" : \"match:ne\" }}}")); |
| 109 | |
| 110 | assertEquals("totalResults", kr.getTotalResults(), 6); |
| 111 | assertEquals("Warning", kr.hasWarnings(), true); |
| 112 | assertEquals("Warning text", kr.getWarning(0).getMessage(), |
| 113 | "Flag is unknown"); |
| 114 | assertEquals("Warning text", kr.getWarning(0).toJsonString(), |
| 115 | "[748,\"Flag is unknown\"]"); |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 116 | }; |
| 117 | |
| 118 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 119 | private FieldDocument createFieldDoc0 () { |
| 120 | fd = new FieldDocument(); |
| 121 | fd.addString("ID", "doc-0"); |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 122 | fd.addTV("tokens", "bcbabd", "[(0-1)s:b|i:b|_1$<i>0<i>1]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 123 | + "[(1-2)s:c|i:c|s:b|_2$<i>1<i>2]" |
| 124 | + "[(2-3)s:b|i:b|_3$<i>2<i>3|<>:e$<b>64<i>2<i>4<i>4<b>0]" |
| 125 | + "[(3-4)s:a|i:a|_4$<i>3<i>4|<>:e$<b>64<i>3<i>5<i>5<b>0|" |
| 126 | + "<>:e2$<b>64<i>3<i>5<i>5<b>0]" |
| 127 | + "[(4-5)s:b|i:b|s:c|_5$<i>4<i>5]" |
| 128 | + "[(5-6)s:d|i:d|_6$<i>5<i>6|<>:e2$<b>64<i>5<i>6<i>6<b>0]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 129 | return fd; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 130 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 131 | |
| 132 | |
| 133 | private FieldDocument createFieldDoc1 () { |
| 134 | fd = new FieldDocument(); |
| 135 | fd.addString("ID", "doc-1"); |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 136 | fd.addTV("tokens", "babaa", "[(0-1)s:b|i:b|s:c|_1$<i>0<i>1]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 137 | + "[(1-2)s:a|i:a|s:b|_2$<i>1<i>2|<>:e$<b>64<i>1<i>3<i>3<b>0]" |
| 138 | + "[(2-3)s:b|i:b|s:a|_3$<i>2<i>3]" |
| 139 | + "[(3-4)s:a|i:a|_4$<i>3<i>4]" + "[(4-5)s:a|i:a|_5$<i>4<i>5]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 140 | return fd; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 141 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 142 | |
| 143 | |
| 144 | private FieldDocument createFieldDoc2 () { |
| 145 | fd = new FieldDocument(); |
| 146 | fd.addString("ID", "doc-2"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 147 | fd.addTV("tokens", "bdb", |
| 148 | "[(0-1)s:b|i:b|_1$<i>0<i>1]" + "[(1-2)s:d|i:d|s:b|_2$<i>1<i>2]" |
| 149 | + "[(2-3)s:b|i:b|s:a|_3$<i>2<i>3]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 150 | return fd; |
| 151 | } |
| 152 | |
| 153 | |
| 154 | private FieldDocument createFieldDoc3 () { |
| 155 | fd = new FieldDocument(); |
| 156 | fd.addString("ID", "doc-3"); |
| Akron | 352dae8 | 2016-08-05 17:57:51 +0200 | [diff] [blame] | 157 | fd.addTV("tokens", "bdb", "[(0-1)s:b|i:b|s:c|_1$<i>0<i>1]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 158 | + "[(1-2)s:d|_2$<i>1<i>2]" + "[(2-3)s:d|i:d|_3$<i>2<i>3]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 159 | return fd; |
| Nils Diewald | cc7c0b3 | 2014-07-31 19:58:22 +0000 | [diff] [blame] | 160 | } |
| 161 | } |