| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.spans; |
| 2 | |
| Eliza Margaretha | 427149f | 2014-10-29 15:53:30 +0000 | [diff] [blame] | 3 | import java.io.IOException; |
| 4 | import java.nio.ByteBuffer; |
| 5 | import java.util.ArrayList; |
| 6 | import java.util.Collection; |
| 7 | import java.util.List; |
| 8 | import java.util.Map; |
| 9 | |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 10 | import org.apache.lucene.index.LeafReaderContext; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 11 | import org.apache.lucene.index.Term; |
| 12 | import org.apache.lucene.index.TermContext; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 13 | import org.apache.lucene.index.TermState; |
| Eliza Margaretha | 427149f | 2014-10-29 15:53:30 +0000 | [diff] [blame] | 14 | import org.apache.lucene.search.spans.SpanQuery; |
| 15 | import org.apache.lucene.search.spans.Spans; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 16 | import org.apache.lucene.util.Bits; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 17 | import org.slf4j.Logger; |
| 18 | import org.slf4j.LoggerFactory; |
| 19 | |
| Nils Diewald | 1455e1e | 2014-08-01 16:12:43 +0000 | [diff] [blame] | 20 | /** |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 21 | * Add a payload to the span with an identification number (a class) |
| 22 | * And the start and end position of the span, so this information |
| 23 | * can bubble up for later processing (similar to captures in regular |
| 24 | * expression). |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 25 | * |
| Nils Diewald | 1455e1e | 2014-08-01 16:12:43 +0000 | [diff] [blame] | 26 | * @author diewald |
| 27 | */ |
| 28 | |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 29 | public class ClassSpans extends SimpleSpans { |
| 30 | protected List<byte[]> classedPayload; |
| 31 | protected Spans spans; |
| 32 | protected byte number; |
| 33 | protected SpanQuery operand; |
| 34 | protected Boolean hasmorespans = false; |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 35 | |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 36 | private final Logger log = LoggerFactory.getLogger(ClassSpans.class); |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 37 | |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 38 | // This advices the java compiler to ignore all loggings |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 39 | public static final boolean DEBUG = false; |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 40 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 41 | |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 42 | /** |
| 43 | * Construct a new ClassSpans object. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 44 | * |
| 45 | * @param operand |
| 46 | * An arbitrary nested {@link SpanQuery}. |
| 47 | * @param context |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 48 | * The {@link LeafReaderContext}. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 49 | * @param acceptDocs |
| 50 | * Bit vector representing the documents |
| 51 | * to be searched in. |
| 52 | * @param termContexts |
| 53 | * A map managing {@link TermState TermStates}. |
| 54 | * @param number |
| 55 | * The identifying class number. |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 56 | */ |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 57 | public ClassSpans (SpanQuery operand, LeafReaderContext context, |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 58 | Bits acceptDocs, Map<Term, TermContext> termContexts, |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 59 | byte number) |
| 60 | throws IOException { |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 61 | spans = operand.getSpans(context, acceptDocs, termContexts); |
| 62 | |
| 63 | // The number of the class |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 64 | this.number = number; |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 65 | |
| 66 | // The current operand |
| 67 | this.operand = operand; |
| 68 | |
| 69 | // The highlighted payload |
| 70 | this.classedPayload = new ArrayList<byte[]>(3); |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 71 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 72 | }; |
| 73 | |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 74 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 75 | @Override |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 76 | public Collection<byte[]> getPayload () throws IOException { |
| 77 | return classedPayload; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 78 | }; |
| 79 | |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 80 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 81 | @Override |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 82 | public boolean isPayloadAvailable () { |
| 83 | // We set payloads here - so it's always true |
| 84 | return true; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 85 | }; |
| 86 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 87 | |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 88 | public byte getNumber () { |
| 89 | return number; |
| 90 | } |
| 91 | |
| 92 | |
| 93 | public void setNumber (byte number) { |
| 94 | this.number = number; |
| 95 | } |
| 96 | |
| 97 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 98 | @Override |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 99 | public int doc () { |
| 100 | return spans.doc(); |
| 101 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 102 | |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 103 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 104 | @Override |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 105 | public int start () { |
| 106 | return spans.start(); |
| 107 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 108 | |
| 109 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 110 | @Override |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 111 | public int end () { |
| 112 | return spans.end(); |
| 113 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 114 | |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 115 | |
| 116 | @Override |
| 117 | public boolean next () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 118 | if (DEBUG) |
| 119 | log.trace("Forward next"); |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 120 | |
| 121 | if (spans.next()) |
| 122 | return this.addClassPayload(); |
| 123 | |
| 124 | hasmorespans = false; |
| 125 | return false; |
| Eliza Margaretha | 67a8857 | 2014-11-04 14:38:56 +0000 | [diff] [blame] | 126 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 127 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 128 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 129 | protected boolean addClassPayload () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 130 | hasmorespans = true; |
| 131 | |
| 132 | classedPayload.clear(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 133 | |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 134 | // Subquery has payloads |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 135 | if (spans.isPayloadAvailable()) { |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 136 | classedPayload.addAll(spans.getPayload()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 137 | if (DEBUG) |
| 138 | log.trace("Found payload in nested SpanQuery"); |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 139 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 140 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 141 | if (DEBUG) { |
| 142 | log.trace("Wrap class {} around span {} - {}", number, |
| 143 | spans.start(), spans.end()); |
| 144 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 145 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 146 | // Todo: Better allocate using a Factory! |
| Akron | 4299355 | 2016-02-04 13:24:24 +0100 | [diff] [blame] | 147 | ByteBuffer bb = ByteBuffer.allocate(10); |
| 148 | bb.put((byte) 0); |
| 149 | bb.putInt(spans.start()); |
| 150 | bb.putInt(spans.end()); |
| 151 | bb.put(number); |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 152 | /* |
| Akron | c5e73b8 | 2016-01-14 17:01:18 +0100 | [diff] [blame] | 153 | System.err.println( |
| 154 | "####################### " + |
| 155 | spans.start() + |
| 156 | "|" + |
| 157 | spans.end() + |
| 158 | ":" + |
| 159 | number |
| 160 | ); |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 161 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 162 | |
| 163 | // Add highlight information as byte array |
| 164 | classedPayload.add(bb.array()); |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 165 | |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 166 | if (spans instanceof SimpleSpans) { |
| 167 | SimpleSpans ss = (SimpleSpans) spans; |
| 168 | this.hasSpanId = ss.hasSpanId; |
| 169 | this.spanId = ss.spanId; |
| 170 | } |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 171 | return true; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 172 | }; |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 173 | |
| 174 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 175 | @Override |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 176 | public boolean skipTo (int target) throws IOException { |
| 177 | classedPayload.clear(); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 178 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 179 | if (DEBUG) |
| 180 | log.trace("Skip ClassSpans {} -> {}", spans.doc(), target); |
| Nils Diewald | cd22686 | 2015-02-11 22:27:45 +0000 | [diff] [blame] | 181 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 182 | if (hasmorespans && spans.doc() < target && spans.skipTo(target)) |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 183 | return this.addClassPayload(); |
| 184 | return false; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 185 | }; |
| 186 | |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 187 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 188 | @Override |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 189 | public String toString () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 190 | return getClass().getName() + "(" + this.operand.toString() + ")@" |
| 191 | + (doc() + ":" + start() + "-" + end()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 192 | }; |
| 193 | |
| 194 | |
| 195 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 196 | public long cost () { |
| Nils Diewald | 41750bf | 2015-02-06 17:45:20 +0000 | [diff] [blame] | 197 | return spans.cost(); |
| 198 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 199 | }; |