| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.spans; |
| 2 | |
| 3 | import java.io.IOException; |
| Eliza Margaretha | 0170b88 | 2014-10-29 15:49:31 +0000 | [diff] [blame] | 4 | import java.nio.ByteBuffer; |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 5 | import java.util.List; |
| 6 | import java.util.Map; |
| 7 | |
| 8 | import org.apache.lucene.index.AtomicReaderContext; |
| 9 | import org.apache.lucene.index.Term; |
| 10 | import org.apache.lucene.index.TermContext; |
| 11 | import org.apache.lucene.search.spans.TermSpans; |
| 12 | import org.apache.lucene.util.Bits; |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 13 | |
| 14 | import de.ids_mannheim.korap.query.SpanTermWithIdQuery; |
| 15 | |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 16 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 17 | * Enumeration of termSpans having an id. This class just wraps the |
| 18 | * usual Lucene |
| 19 | * TermSpans, and adds spanid property. It reads the term-id from a |
| 20 | * term span |
| 21 | * payload. The term-id is encoded in a short, starting from (offset) |
| 22 | * 0 in the |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 23 | * payload. |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 24 | * |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 25 | * @author margaretha |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 26 | * */ |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 27 | public class TermSpansWithId extends SimpleSpans { |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 28 | |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 29 | private TermSpans termSpans; |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 30 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 31 | |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 32 | /** |
| 33 | * Creates TermSpansWithId from the given spanTermWithIdQuery. |
| 34 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 35 | * @param spanTermWithIdQuery |
| 36 | * a spanTermWithIdQuery |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 37 | * @param context |
| 38 | * @param acceptDocs |
| 39 | * @param termContexts |
| 40 | * @throws IOException |
| 41 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 42 | public TermSpansWithId (SpanTermWithIdQuery spanTermWithIdQuery, |
| 43 | AtomicReaderContext context, Bits acceptDocs, |
| 44 | Map<Term, TermContext> termContexts) |
| 45 | throws IOException { |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 46 | super(spanTermWithIdQuery, context, acceptDocs, termContexts); |
| 47 | termSpans = (TermSpans) firstSpans; |
| 48 | hasMoreSpans = termSpans.next(); |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 49 | hasSpanId = true; |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 50 | } |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 51 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 52 | |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 53 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 54 | public boolean next () throws IOException { |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 55 | isStartEnumeration = false; |
| 56 | return advance(); |
| 57 | } |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 58 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 59 | |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 60 | /** |
| 61 | * Advances to the next match and set it as the current match. |
| 62 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 63 | * @return <code>true</code> if a match is found, |
| 64 | * <code>false</code> |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 65 | * otherwise. |
| 66 | * @throws IOException |
| 67 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 68 | private boolean advance () throws IOException { |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 69 | while (hasMoreSpans) { |
| 70 | readPayload(); |
| 71 | matchDocNumber = firstSpans.doc(); |
| 72 | matchStartPosition = firstSpans.start(); |
| 73 | matchEndPosition = firstSpans.end(); |
| 74 | hasMoreSpans = firstSpans.next(); |
| 75 | return true; |
| 76 | } |
| 77 | return false; |
| 78 | } |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 79 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 80 | |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 81 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 82 | * Read the payloads of the current firstspan and set the term id |
| 83 | * info from |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 84 | * the payloads. |
| 85 | * |
| 86 | * @throws IOException |
| 87 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 88 | private void readPayload () throws IOException { |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 89 | List<byte[]> payload = (List<byte[]>) firstSpans.getPayload(); |
| 90 | ByteBuffer bb = ByteBuffer.allocate(payload.get(0).length); |
| 91 | bb.put(payload.get(0)); |
| 92 | setSpanId(bb.getShort(0)); //term id |
| 93 | } |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 94 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 95 | |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 96 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 97 | public boolean skipTo (int target) throws IOException { |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 98 | if (hasMoreSpans && (firstSpans.doc() < target)) { |
| 99 | if (!firstSpans.skipTo(target)) { |
| 100 | return false; |
| 101 | } |
| 102 | } |
| 103 | matchPayload.clear(); |
| 104 | isStartEnumeration = false; |
| 105 | return advance(); |
| 106 | } |
| 107 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 108 | |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 109 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 110 | public long cost () { |
| Eliza Margaretha | 493bfa9 | 2015-01-13 16:16:38 +0000 | [diff] [blame] | 111 | return firstSpans.cost(); // plus cost from reading payload |
| 112 | } |
| Eliza Margaretha | d12cabb | 2014-10-27 17:45:34 +0000 | [diff] [blame] | 113 | |
| 114 | } |