blob: 9d40d31c95766c5e771e1ec91048b94c785cc5fa [file] [log] [blame]
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
margaretha50c76332015-03-19 10:10:39 +01004import java.util.Collection;
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +00005import java.util.Map;
6
7import org.apache.lucene.index.AtomicReaderContext;
8import org.apache.lucene.index.Term;
9import org.apache.lucene.index.TermContext;
10import org.apache.lucene.util.Bits;
11
Eliza Margaretha5b69bdd2014-01-21 17:45:57 +000012import de.ids_mannheim.korap.query.SpanSegmentQuery;
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000013
Eliza Margaretha7612bde2015-01-14 10:28:42 +000014/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000015 * SegmentSpans is an enumeration of Span matches in which that two
16 * child spans
Eliza Margaretha7612bde2015-01-14 10:28:42 +000017 * have exactly the same start and end positions.
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000018 *
Eliza Margaretha7612bde2015-01-14 10:28:42 +000019 * @author margaretha
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000020 * */
margaretha50c76332015-03-19 10:10:39 +010021public class SegmentSpans extends SimpleSpans {
Eliza Margaretha7612bde2015-01-14 10:28:42 +000022
Nils Diewaldbb33da22015-03-04 16:24:25 +000023 private boolean isRelation;
24
25
Eliza Margaretha7612bde2015-01-14 10:28:42 +000026 /**
27 * Creates SegmentSpans from the given {@link SpanSegmentQuery}.
28 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000029 * @param spanSegmentQuery
30 * a spanSegmentQuery.
Eliza Margaretha7612bde2015-01-14 10:28:42 +000031 * @param context
32 * @param acceptDocs
33 * @param termContexts
34 * @throws IOException
35 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000036 public SegmentSpans (SpanSegmentQuery spanSegmentQuery,
37 AtomicReaderContext context, Bits acceptDocs,
38 Map<Term, TermContext> termContexts)
39 throws IOException {
40 super(spanSegmentQuery, context, acceptDocs, termContexts);
41 if (spanSegmentQuery.isRelation()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +000042 isRelation = true;
43 }
margaretha50c76332015-03-19 10:10:39 +010044
45 collectPayloads = true;
46 hasMoreSpans = secondSpans.next();
47 }
48
49
50 @Override
51 public boolean next () throws IOException {
52 // Warning: this does not work for overlapping spans
53 // e.g. get multiple second spans in a firstspan
54 hasMoreSpans &= firstSpans.next();
55 isStartEnumeration = false;
56 matchPayload.clear();
57 return advance();
58 }
59
60
61 /**
62 * Advances to the next match.
63 *
64 * @return <code>true</code> if a match is found,
65 * <code>false</code> otherwise.
66 * @throws IOException
67 */
68 protected boolean advance () throws IOException {
69 // The complexity is linear for searching in a document.
70 // It's better if we can skip to >= position in a document.
71 while (hasMoreSpans && ensureSameDoc(firstSpans, secondSpans)) {
72 int matchCase = findMatch();
73 if (matchCase == 0) {
74 doCollectPayloads();
75 return true;
76 }
77 else if (matchCase == 1) {
78 hasMoreSpans = secondSpans.next();
79 }
80 else {
81 hasMoreSpans = firstSpans.next();
82 }
83 }
84 return false;
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000085 }
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +000086
Nils Diewaldbb33da22015-03-04 16:24:25 +000087
Eliza Margaretha7612bde2015-01-14 10:28:42 +000088 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000089 * Check weather the start and end positions of the current
90 * firstspan and
Eliza Margaretha7612bde2015-01-14 10:28:42 +000091 * secondspan are identical.
92 *
93 * */
Nils Diewaldbb33da22015-03-04 16:24:25 +000094 protected int findMatch () {
95 RelationSpans s1;
margaretha50c76332015-03-19 10:10:39 +010096 SimpleSpans s2;
margarethaf70addb2015-04-27 13:17:18 +020097
Eliza Margaretha7612bde2015-01-14 10:28:42 +000098 if (firstSpans.start() == secondSpans.start()
99 && firstSpans.end() == secondSpans.end()) {
Eliza Margaretha2db5e232015-03-04 10:20:01 +0000100
Nils Diewaldbb33da22015-03-04 16:24:25 +0000101 if (isRelation) {
102 s1 = (RelationSpans) firstSpans;
margaretha50c76332015-03-19 10:10:39 +0100103 s2 = (SimpleSpans) secondSpans;
Eliza Margaretha2db5e232015-03-04 10:20:01 +0000104
margarethaf70addb2015-04-27 13:17:18 +0200105 if (s2.hasSpanId) {
106 if (s1.getLeftId() == s2.getSpanId()) {
107 setSpanId(s2.getSpanId());
108 setMatch();
109 return 0;
110 }
111 }
112 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000113 setMatch();
114 return 0;
115 }
margarethaf70addb2015-04-27 13:17:18 +0200116
Nils Diewaldbb33da22015-03-04 16:24:25 +0000117 }
118 else {
119 setMatch();
120 return 0;
121 }
122 }
Eliza Margaretha2db5e232015-03-04 10:20:01 +0000123
Nils Diewaldbb33da22015-03-04 16:24:25 +0000124 if (firstSpans.start() < secondSpans.start()
Eliza Margaretha7612bde2015-01-14 10:28:42 +0000125 || firstSpans.end() < secondSpans.end())
126 return -1;
127
128 return 1;
129 }
Eliza Margaretha2db5e232015-03-04 10:20:01 +0000130
Nils Diewaldbb33da22015-03-04 16:24:25 +0000131
132 private void setMatch () {
133 matchDocNumber = firstSpans.doc();
134 matchStartPosition = firstSpans.start();
135 matchEndPosition = firstSpans.end();
136 }
margaretha50c76332015-03-19 10:10:39 +0100137
138
139 /**
140 * Collects available payloads from the current first and second
141 * spans.
142 *
143 * @throws IOException
144 */
145 private void doCollectPayloads () throws IOException {
146 Collection<byte[]> payload;
147 if (collectPayloads) {
148 if (firstSpans.isPayloadAvailable()) {
149 payload = firstSpans.getPayload();
150 matchPayload.addAll(payload);
151 }
152 if (secondSpans.isPayloadAvailable()) {
153 payload = secondSpans.getPayload();
154 matchPayload.addAll(payload);
155 }
156 }
157 }
158
159
160 @Override
161 public boolean skipTo (int target) throws IOException {
162 if (hasMoreSpans && (firstSpans.doc() < target)) {
163 if (!firstSpans.skipTo(target)) {
164 hasMoreSpans = false;
165 return false;
166 }
167 }
168 matchPayload.clear();
169 return advance();
170 }
171
172
173 @Override
174 public long cost () {
175 return firstSpans.cost() + secondSpans.cost();
176 }
Eliza Margarethaed3bb3b2014-01-14 10:53:56 +0000177}