| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.spans; |
| 2 | |
| 3 | import java.io.IOException; |
| 4 | import java.util.ArrayList; |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 5 | import java.util.List; |
| 6 | import java.util.Map; |
| 7 | |
| 8 | import org.apache.lucene.index.AtomicReaderContext; |
| 9 | import org.apache.lucene.index.Term; |
| 10 | import org.apache.lucene.index.TermContext; |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 11 | import org.apache.lucene.search.spans.Spans; |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 12 | import org.apache.lucene.util.Bits; |
| 13 | import org.slf4j.Logger; |
| 14 | import org.slf4j.LoggerFactory; |
| 15 | |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.query.SimpleSpanQuery; |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 17 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| 18 | |
| Eliza Margaretha | 8e274e3 | 2014-01-28 15:09:30 +0000 | [diff] [blame] | 19 | /** DistanceSpan is a base class for enumeration of span matches, |
| 20 | * whose two child spans have a specific range of distance (within |
| Eliza Margaretha | 9738c39 | 2014-02-03 17:04:53 +0000 | [diff] [blame] | 21 | * a min and a max distance) and must be in order (a firstspan is |
| 22 | * followed by a secondspan). |
| Eliza Margaretha | 8e274e3 | 2014-01-28 15:09:30 +0000 | [diff] [blame] | 23 | * |
| 24 | * @author margaretha |
| 25 | * */ |
| Eliza Margaretha | 795937c | 2014-02-06 13:08:28 +0000 | [diff] [blame] | 26 | public abstract class DistanceSpans extends SimpleSpans{ |
| Eliza Margaretha | 83b9537 | 2014-01-23 09:18:07 +0000 | [diff] [blame] | 27 | |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 28 | protected CandidateSpan matchFirstSpan,matchSecondSpan; |
| 29 | protected Logger log = LoggerFactory.getLogger(DistanceSpans.class); |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 30 | |
| Eliza Margaretha | 795937c | 2014-02-06 13:08:28 +0000 | [diff] [blame] | 31 | public DistanceSpans(SpanDistanceQuery query, |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 32 | AtomicReaderContext context, Bits acceptDocs, |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 33 | Map<Term, TermContext> termContexts) throws IOException { |
| Eliza Margaretha | 83b9537 | 2014-01-23 09:18:07 +0000 | [diff] [blame] | 34 | super(query, context, acceptDocs, termContexts); |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 35 | } |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 36 | |
| 37 | @Override |
| 38 | public boolean next() throws IOException { |
| 39 | isStartEnumeration=false; |
| Eliza Margaretha | 83b9537 | 2014-01-23 09:18:07 +0000 | [diff] [blame] | 40 | matchPayload.clear(); |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 41 | return advance(); |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 42 | } |
| 43 | |
| 44 | |
| 45 | /** Find the next span match. |
| 46 | * @return true iff a span match is available. |
| Eliza Margaretha | 8e274e3 | 2014-01-28 15:09:30 +0000 | [diff] [blame] | 47 | * */ |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 48 | protected abstract boolean advance() throws IOException; |
| 49 | |
| 50 | /** Find the same doc shared by element, firstspan and secondspan. |
| 51 | * @return true iff such a doc is found. |
| 52 | * */ |
| 53 | protected boolean findSameDoc(Spans x, |
| 54 | Spans y, Spans e) throws IOException{ |
| 55 | |
| 56 | while (hasMoreSpans) { |
| 57 | if (ensureSameDoc(x, y) && |
| 58 | e.doc() == x.doc()){ |
| 59 | return true; |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 60 | } |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 61 | if (!ensureSameDoc(e,y)){ |
| 62 | return false; |
| 63 | }; |
| 64 | } |
| 65 | return false; |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 66 | } |
| 67 | |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 68 | public CandidateSpan getMatchFirstSpan() { |
| 69 | return matchFirstSpan; |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 70 | } |
| 71 | |
| Eliza Margaretha | 198e4ef | 2014-02-10 13:50:50 +0000 | [diff] [blame] | 72 | public void setMatchFirstSpan(CandidateSpan matchFirstSpan) { |
| 73 | this.matchFirstSpan = matchFirstSpan; |
| 74 | } |
| 75 | |
| 76 | public CandidateSpan getMatchSecondSpan() { |
| 77 | return matchSecondSpan; |
| 78 | } |
| 79 | |
| 80 | public void setMatchSecondSpan(CandidateSpan matchSecondSpan) { |
| 81 | this.matchSecondSpan = matchSecondSpan; |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 82 | } |
| 83 | |
| Eliza Margaretha | a2603fa | 2014-01-22 10:59:25 +0000 | [diff] [blame] | 84 | } |