blob: 691f978e383b02637e4b232049130838996dbead [file] [log] [blame]
Eliza Margaretha1413e0f2014-02-06 13:01:29 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
4import java.util.ArrayList;
Eliza Margaretha1413e0f2014-02-06 13:01:29 +00005import java.util.List;
6import java.util.Map;
7
Akron700c1eb2015-09-25 16:57:30 +02008import org.apache.lucene.index.LeafReaderContext;
Eliza Margaretha1413e0f2014-02-06 13:01:29 +00009import org.apache.lucene.index.Term;
10import org.apache.lucene.index.TermContext;
11import org.apache.lucene.search.spans.Spans;
12import org.apache.lucene.util.Bits;
13
14import de.ids_mannheim.korap.query.SpanDistanceQuery;
15
Eliza Margaretha609a5be2014-12-18 16:52:20 +000016/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000017 * Enumeration of span matches, whose two child spans have a specific
18 * range of
19 * distance (within a min and a max distance) and can be in any order.
20 * The unit
Eliza Margaretha609a5be2014-12-18 16:52:20 +000021 * distance is a token position.
Eliza Margaretha1413e0f2014-02-06 13:01:29 +000022 *
23 * @author margaretha
Eliza Margaretha6f989202016-10-14 21:48:29 +020024 */
Eliza Margaretha609a5be2014-12-18 16:52:20 +000025public class UnorderedTokenDistanceSpans extends UnorderedDistanceSpans {
Eliza Margaretha1413e0f2014-02-06 13:01:29 +000026
Eliza Margaretha609a5be2014-12-18 16:52:20 +000027 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000028 * Constructs UnorderedTokenDistanceSpans for the given
29 * SpanDistanceQuery.
Eliza Margaretha609a5be2014-12-18 16:52:20 +000030 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000031 * @param query
32 * a SpanDistanceQuery
Eliza Margaretha609a5be2014-12-18 16:52:20 +000033 * @param context
34 * @param acceptDocs
35 * @param termContexts
36 * @throws IOException
37 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000038 public UnorderedTokenDistanceSpans (SpanDistanceQuery query,
Akron700c1eb2015-09-25 16:57:30 +020039 LeafReaderContext context,
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 Bits acceptDocs,
41 Map<Term, TermContext> termContexts)
42 throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000043 super(query, context, acceptDocs, termContexts);
44 }
Eliza Margaretha1413e0f2014-02-06 13:01:29 +000045
Nils Diewaldbb33da22015-03-04 16:24:25 +000046
Eliza Margaretha609a5be2014-12-18 16:52:20 +000047 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +000048 protected boolean prepareLists () throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000049
50 if (firstSpanList.isEmpty() && secondSpanList.isEmpty()) {
51 if (hasMoreFirstSpans && hasMoreSecondSpans
52 && ensureSameDoc(firstSpans, secondSpans)) {
53 firstSpanList.add(new CandidateSpan(firstSpans));
54 secondSpanList.add(new CandidateSpan(secondSpans));
55 currentDocNum = firstSpans.doc();
56 hasMoreFirstSpans = firstSpans.next();
57 hasMoreSecondSpans = secondSpans.next();
Nils Diewaldbb33da22015-03-04 16:24:25 +000058 }
59 else {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000060 hasMoreSpans = false;
61 return false;
62 }
Nils Diewaldbb33da22015-03-04 16:24:25 +000063 }
64 else if (firstSpanList.isEmpty() && hasMoreFirstSpans
Eliza Margaretha609a5be2014-12-18 16:52:20 +000065 && firstSpans.doc() == currentDocNum) {
66 firstSpanList.add(new CandidateSpan(firstSpans));
67 hasMoreFirstSpans = firstSpans.next();
Nils Diewaldbb33da22015-03-04 16:24:25 +000068 }
69 else if (secondSpanList.isEmpty() && hasMoreSecondSpans
Eliza Margaretha609a5be2014-12-18 16:52:20 +000070 && secondSpans.doc() == currentDocNum) {
71 secondSpanList.add(new CandidateSpan(secondSpans));
72 hasMoreSecondSpans = secondSpans.next();
73 }
74 return true;
75 }
76
Nils Diewaldbb33da22015-03-04 16:24:25 +000077
Eliza Margaretha609a5be2014-12-18 16:52:20 +000078 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +000079 protected boolean setCandidateList (List<CandidateSpan> candidateList,
Eliza Margaretha609a5be2014-12-18 16:52:20 +000080 Spans candidate, boolean hasMoreCandidates,
81 List<CandidateSpan> targetList) throws IOException {
82
83 if (!targetList.isEmpty()) {
84 CandidateSpan target = targetList.get(0);
85 while (hasMoreCandidates && candidate.doc() == target.getDoc()
86 && isWithinMaxDistance(target, candidate)) {
87 candidateList.add(new CandidateSpan(candidate));
88 hasMoreCandidates = candidate.next();
89 }
90 }
91 return hasMoreCandidates;
92 }
93
Nils Diewaldbb33da22015-03-04 16:24:25 +000094
Eliza Margaretha609a5be2014-12-18 16:52:20 +000095 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000096 * Tells if the target and candidate spans are not too far from
97 * each other
Eliza Margaretha609a5be2014-12-18 16:52:20 +000098 * (within the maximum distance).
99 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000100 * @param target
101 * a target span
102 * @param candidate
103 * a candidate span
104 * @return <code>true</code> if the target and candidate spans are
105 * within
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000106 * the maximum distance, <code>false</code> otherwise.
107 */
Eliza Margaretha6f989202016-10-14 21:48:29 +0200108 protected boolean isWithinMaxDistance (CandidateSpan target,
109 Spans candidate) {
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000110 // left candidate
111 if (candidate.end() < target.getStart()
112 && candidate.end() + maxDistance <= target.getStart()) {
113 return false;
114 }
115 // right candidate
116 if (candidate.start() > target.getEnd()
117 && target.getEnd() + maxDistance <= candidate.start()) {
118 return false;
119 }
120 return true;
121 }
122
Nils Diewaldbb33da22015-03-04 16:24:25 +0000123
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000124 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000125 protected List<CandidateSpan> findMatches (CandidateSpan target,
margarethac66265c2016-12-14 13:48:45 +0100126 List<CandidateSpan> candidateList, boolean isTargetFirstSpan) {
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000127
128 List<CandidateSpan> matches = new ArrayList<>();
129 int actualDistance;
margaretha35120872016-12-19 18:24:22 +0100130 CandidateSpan match;
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000131 for (CandidateSpan cs : candidateList) {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200132 if (minDistance == 0 &&
133 // intersection
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000134 target.getStart() < cs.getEnd()
135 && cs.getStart() < target.getEnd()) {
margaretha35120872016-12-19 18:24:22 +0100136 match = createMatchCandidate(target, cs, true, isTargetFirstSpan);
137 matches.add(match);
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000138 continue;
139 }
140
141 // left candidate
margaretha50110f32015-05-12 18:21:29 +0200142 if (cs.getEnd() < target.getStart()) {
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000143 actualDistance = target.getStart() - cs.getEnd() + 1;
margaretha50110f32015-05-12 18:21:29 +0200144 }
145 else {
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000146 // right candidate
147 actualDistance = cs.getStart() - target.getEnd() + 1;
margaretha50110f32015-05-12 18:21:29 +0200148 }
Eliza Margaretha6f989202016-10-14 21:48:29 +0200149 if (minDistance <= actualDistance
150 && actualDistance <= maxDistance) {
margaretha35120872016-12-19 18:24:22 +0100151 match = createMatchCandidate(target, cs, false, isTargetFirstSpan);
152 matches.add(match);
margaretha50110f32015-05-12 18:21:29 +0200153 }
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000154 }
155 return matches;
156 }
157
Nils Diewaldbb33da22015-03-04 16:24:25 +0000158
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000159 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000160 protected void updateList (List<CandidateSpan> candidateList) {
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000161 candidateList.remove(0);
162 }
Eliza Margaretha1413e0f2014-02-06 13:01:29 +0000163
164}