blob: dbdf1302e0636c21b9a56d4eb8d333a9c91ef76a [file] [log] [blame]
Eliza Margaretha8e274e32014-01-28 15:09:30 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
4import java.util.ArrayList;
5import java.util.List;
6import java.util.Map;
7
Akron700c1eb2015-09-25 16:57:30 +02008import org.apache.lucene.index.LeafReaderContext;
Eliza Margaretha8e274e32014-01-28 15:09:30 +00009import org.apache.lucene.index.Term;
10import org.apache.lucene.index.TermContext;
11import org.apache.lucene.util.Bits;
12
13import de.ids_mannheim.korap.query.SpanDistanceQuery;
14
Eliza Margaretha609a5be2014-12-18 16:52:20 +000015/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000016 * Enumeration of token-based distance span matches consisting of two
17 * child
18 * spans having an actual distance in the range of the minimum and
19 * maximum
20 * distance parameters specified in the corresponding query. A
21 * TokenDistanceSpan
22 * starts from the minimum start positions of its child spans and ends
23 * at the
Eliza Margarethaafe98122015-01-23 17:37:57 +000024 * maximum end positions of the child spans.
Eliza Margaretha8e274e32014-01-28 15:09:30 +000025 *
Eliza Margaretha609a5be2014-12-18 16:52:20 +000026 * @author margaretha
Eliza Margaretha6f989202016-10-14 21:48:29 +020027 */
Eliza Margaretha609a5be2014-12-18 16:52:20 +000028public class TokenDistanceSpans extends OrderedDistanceSpans {
Eliza Margaretha8e274e32014-01-28 15:09:30 +000029
Eliza Margaretha609a5be2014-12-18 16:52:20 +000030 /**
Eliza Margaretha7612bde2015-01-14 10:28:42 +000031 * Constructs TokenDistanceSpans from the given query.
Eliza Margaretha609a5be2014-12-18 16:52:20 +000032 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000033 * @param query
34 * a SpanDistanceQuery
Eliza Margaretha609a5be2014-12-18 16:52:20 +000035 * @param context
36 * @param acceptDocs
37 * @param termContexts
38 * @throws IOException
39 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 public TokenDistanceSpans (SpanDistanceQuery query,
Akron700c1eb2015-09-25 16:57:30 +020041 LeafReaderContext context, Bits acceptDocs,
Nils Diewaldbb33da22015-03-04 16:24:25 +000042 Map<Term, TermContext> termContexts)
43 throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000044 super(query, context, acceptDocs, termContexts);
45 hasMoreSpans = hasMoreFirstSpans;
46 }
Eliza Margaretha8e274e32014-01-28 15:09:30 +000047
Nils Diewaldbb33da22015-03-04 16:24:25 +000048
Eliza Margaretha609a5be2014-12-18 16:52:20 +000049 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +000050 protected void setCandidateList () throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000051 if (candidateListDocNum == secondSpans.doc()) {
52 copyPossibleCandidates();
53 addNewCandidates();
54 candidateListIndex = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +000055 }
56 else {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000057 candidateList.clear();
58 if (hasMoreFirstSpans && ensureSameDoc(firstSpans, secondSpans)) {
59 candidateListDocNum = firstSpans.doc();
60 addNewCandidates();
61 candidateListIndex = -1;
62 }
63 }
64 }
65
Nils Diewaldbb33da22015-03-04 16:24:25 +000066
Eliza Margaretha609a5be2014-12-18 16:52:20 +000067 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000068 * Restructures the candidateList to contain only candidate
69 * (first) spans
70 * which are still possible to create a match, from the candidate
71 * list
Eliza Margaretha609a5be2014-12-18 16:52:20 +000072 * prepared for the previous second spans.
73 *
Eliza Margaretha6f989202016-10-14 21:48:29 +020074 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000075 private void copyPossibleCandidates () {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000076 List<CandidateSpan> temp = new ArrayList<>();
77 for (CandidateSpan cs : candidateList) {
78 if (cs.getEnd() + maxDistance > secondSpans.start())
79 temp.add(cs);
80 }
81 candidateList = temp;
82 }
83
Nils Diewaldbb33da22015-03-04 16:24:25 +000084
Eliza Margaretha609a5be2014-12-18 16:52:20 +000085 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000086 * Add new possible firstspan candidates for the current
87 * secondspan.
Eliza Margaretha6f989202016-10-14 21:48:29 +020088 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000089 private void addNewCandidates () throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +000090 while (hasMoreFirstSpans && firstSpans.doc() == candidateListDocNum
91 && firstSpans.start() < secondSpans.end()) {
92
93 if (firstSpans.end() + maxDistance > secondSpans.start())
94 candidateList.add(new CandidateSpan(firstSpans));
95
96 hasMoreFirstSpans = firstSpans.next();
97 }
98 }
99
Nils Diewaldbb33da22015-03-04 16:24:25 +0000100
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000101 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000102 protected boolean findMatch () throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000103 CandidateSpan candidateSpan = candidateList.get(candidateListIndex);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200104 if (minDistance == 0 &&
105 // intersection
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000106 candidateSpan.getStart() < secondSpans.end()
107 && secondSpans.start() < candidateSpan.getEnd()) {
108
109 setMatchProperties(candidateSpan, true);
110 return true;
111 }
112
113 int actualDistance = secondSpans.start() - candidateSpan.getEnd() + 1;
114 if (candidateSpan.getStart() < secondSpans.start()
115 && minDistance <= actualDistance
116 && actualDistance <= maxDistance) {
117
118 setMatchProperties(candidateSpan, false);
119 return true;
120 }
121 return false;
122 }
123
Nils Diewaldbb33da22015-03-04 16:24:25 +0000124
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000125 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000126 public long cost () {
Akron42993552016-02-04 13:24:24 +0100127 if (candidateList.size() > 0) {
128 long cost = 0;
129 for (CandidateSpan candidateSpan : candidateList) {
130 cost += candidateSpan.getCost();
131 }
132 return cost + secondSpans.cost();
133 }
134 else {
135 return firstSpans.cost() + secondSpans.cost();
136 }
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000137 }
138
Nils Diewaldbb33da22015-03-04 16:24:25 +0000139
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000140 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141 protected boolean isSecondSpanValid () throws IOException {
Eliza Margaretha609a5be2014-12-18 16:52:20 +0000142 return true;
143 }
Eliza Margaretha8e274e32014-01-28 15:09:30 +0000144}