| Eliza Margaretha | 8e274e3 | 2014-01-28 15:09:30 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.spans; |
| 2 | |
| 3 | import java.io.IOException; |
| 4 | import java.util.ArrayList; |
| 5 | import java.util.List; |
| 6 | import java.util.Map; |
| 7 | |
| 8 | import org.apache.lucene.index.AtomicReaderContext; |
| 9 | import org.apache.lucene.index.Term; |
| 10 | import org.apache.lucene.index.TermContext; |
| 11 | import org.apache.lucene.util.Bits; |
| 12 | |
| 13 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| 14 | |
| Eliza Margaretha | 609a5be | 2014-12-18 16:52:20 +0000 | [diff] [blame] | 15 | /** |
| 16 | * Enumeration of token-based distance span matches consisting of two child |
| 17 | * spans having an actual distance in the range of the minimum and maximum |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame^] | 18 | * distance parameters specified in the corresponding query. A TokenDistanceSpan |
| 19 | * starts from the minimum start positions of its child spans and ends at the |
| 20 | * maximum end positions of the child spans. |
| Eliza Margaretha | 8e274e3 | 2014-01-28 15:09:30 +0000 | [diff] [blame] | 21 | * |
| Eliza Margaretha | 609a5be | 2014-12-18 16:52:20 +0000 | [diff] [blame] | 22 | * @author margaretha |
| Eliza Margaretha | 8e274e3 | 2014-01-28 15:09:30 +0000 | [diff] [blame] | 23 | * */ |
| Eliza Margaretha | 609a5be | 2014-12-18 16:52:20 +0000 | [diff] [blame] | 24 | public class TokenDistanceSpans extends OrderedDistanceSpans { |
| Eliza Margaretha | 8e274e3 | 2014-01-28 15:09:30 +0000 | [diff] [blame] | 25 | |
| Eliza Margaretha | 609a5be | 2014-12-18 16:52:20 +0000 | [diff] [blame] | 26 | /** |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 27 | * Constructs TokenDistanceSpans from the given query. |
| Eliza Margaretha | 609a5be | 2014-12-18 16:52:20 +0000 | [diff] [blame] | 28 | * |
| 29 | * @param query a SpanDistanceQuery |
| 30 | * @param context |
| 31 | * @param acceptDocs |
| 32 | * @param termContexts |
| 33 | * @throws IOException |
| 34 | */ |
| 35 | public TokenDistanceSpans(SpanDistanceQuery query, |
| 36 | AtomicReaderContext context, Bits acceptDocs, |
| 37 | Map<Term, TermContext> termContexts) throws IOException { |
| 38 | super(query, context, acceptDocs, termContexts); |
| 39 | hasMoreSpans = hasMoreFirstSpans; |
| 40 | } |
| Eliza Margaretha | 8e274e3 | 2014-01-28 15:09:30 +0000 | [diff] [blame] | 41 | |
| Eliza Margaretha | 609a5be | 2014-12-18 16:52:20 +0000 | [diff] [blame] | 42 | @Override |
| 43 | protected void setCandidateList() throws IOException { |
| 44 | if (candidateListDocNum == secondSpans.doc()) { |
| 45 | copyPossibleCandidates(); |
| 46 | addNewCandidates(); |
| 47 | candidateListIndex = -1; |
| 48 | } else { |
| 49 | candidateList.clear(); |
| 50 | if (hasMoreFirstSpans && ensureSameDoc(firstSpans, secondSpans)) { |
| 51 | candidateListDocNum = firstSpans.doc(); |
| 52 | addNewCandidates(); |
| 53 | candidateListIndex = -1; |
| 54 | } |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | /** |
| 59 | * Restructures the candidateList to contain only candidate (first) spans |
| 60 | * which are still possible to create a match, from the candidate list |
| 61 | * prepared for the previous second spans. |
| 62 | * |
| 63 | * */ |
| 64 | private void copyPossibleCandidates() { |
| 65 | List<CandidateSpan> temp = new ArrayList<>(); |
| 66 | for (CandidateSpan cs : candidateList) { |
| 67 | if (cs.getEnd() + maxDistance > secondSpans.start()) |
| 68 | temp.add(cs); |
| 69 | } |
| 70 | candidateList = temp; |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * Add new possible firstspan candidates for the current secondspan. |
| 75 | * */ |
| 76 | private void addNewCandidates() throws IOException { |
| 77 | while (hasMoreFirstSpans && firstSpans.doc() == candidateListDocNum |
| 78 | && firstSpans.start() < secondSpans.end()) { |
| 79 | |
| 80 | if (firstSpans.end() + maxDistance > secondSpans.start()) |
| 81 | candidateList.add(new CandidateSpan(firstSpans)); |
| 82 | |
| 83 | hasMoreFirstSpans = firstSpans.next(); |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | @Override |
| 88 | protected boolean findMatch() throws IOException { |
| 89 | CandidateSpan candidateSpan = candidateList.get(candidateListIndex); |
| 90 | if (minDistance == 0 |
| 91 | && |
| 92 | // intersection |
| 93 | candidateSpan.getStart() < secondSpans.end() |
| 94 | && secondSpans.start() < candidateSpan.getEnd()) { |
| 95 | |
| 96 | setMatchProperties(candidateSpan, true); |
| 97 | return true; |
| 98 | } |
| 99 | |
| 100 | int actualDistance = secondSpans.start() - candidateSpan.getEnd() + 1; |
| 101 | if (candidateSpan.getStart() < secondSpans.start() |
| 102 | && minDistance <= actualDistance |
| 103 | && actualDistance <= maxDistance) { |
| 104 | |
| 105 | setMatchProperties(candidateSpan, false); |
| 106 | return true; |
| 107 | } |
| 108 | return false; |
| 109 | } |
| 110 | |
| 111 | @Override |
| 112 | public long cost() { |
| 113 | CandidateSpan candidateSpan = candidateList.get(candidateListIndex); |
| 114 | return candidateSpan.getCost() + secondSpans.cost(); |
| 115 | } |
| 116 | |
| 117 | @Override |
| 118 | protected boolean isSecondSpanValid() throws IOException { |
| 119 | return true; |
| 120 | } |
| Eliza Margaretha | 8e274e3 | 2014-01-28 15:09:30 +0000 | [diff] [blame] | 121 | } |