blob: a9aa08b836b7b63c5c083e9e32ec543484318057 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap.query.spans;
2
Eliza Margarethac7fb7312014-07-25 14:11:36 +00003import java.io.IOException;
Nils Diewaldf399a672013-11-18 17:55:22 +00004import java.nio.ByteBuffer;
Eliza Margarethac7fb7312014-07-25 14:11:36 +00005import java.util.ArrayList;
6import java.util.Collections;
7import java.util.List;
8import java.util.Map;
Nils Diewaldf399a672013-11-18 17:55:22 +00009
Eliza Margarethac7fb7312014-07-25 14:11:36 +000010import org.apache.lucene.index.AtomicReaderContext;
11import org.apache.lucene.index.Term;
12import org.apache.lucene.index.TermContext;
13import org.apache.lucene.search.spans.Spans;
14import org.apache.lucene.search.spans.TermSpans;
15import org.apache.lucene.util.Bits;
16import org.apache.lucene.util.BytesRef;
Nils Diewaldf399a672013-11-18 17:55:22 +000017import org.slf4j.Logger;
18import org.slf4j.LoggerFactory;
19
Eliza Margarethac7fb7312014-07-25 14:11:36 +000020import de.ids_mannheim.korap.query.SpanElementQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000021
Eliza Margaretha1c3bf272014-06-11 11:50:39 +000022/**
23 * @author Nils Diewald, margaretha
Nils Diewald6802acd2014-03-18 18:29:30 +000024 *
Nils Diewald20607ab2014-03-20 23:28:36 +000025 * Use copyFrom instead of clone
Nils Diewald82a4b862014-02-20 21:17:41 +000026 */
Eliza Margarethac7fb7312014-07-25 14:11:36 +000027public class ElementSpans extends SimpleSpans {
Nils Diewald82a4b862014-02-20 21:17:41 +000028
Eliza Margarethac7fb7312014-07-25 14:11:36 +000029 private List<CandidateElementSpans> candidateList;
30 private int currentDoc, currentPosition;
31 private short elementRef;
32 private TermSpans termSpans;
Nils Diewald20607ab2014-03-20 23:28:36 +000033
Eliza Margarethac7fb7312014-07-25 14:11:36 +000034 public boolean isElementRef = false; // A dummy flag
Nils Diewald20607ab2014-03-20 23:28:36 +000035
Eliza Margarethac7fb7312014-07-25 14:11:36 +000036 protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
Nils Diewald20607ab2014-03-20 23:28:36 +000037
Eliza Margarethac7fb7312014-07-25 14:11:36 +000038 public ElementSpans(SpanElementQuery spanElementQuery,
39 AtomicReaderContext context, Bits acceptDocs,
40 Map<Term, TermContext> termContexts) throws IOException {
41 super(spanElementQuery, context, acceptDocs, termContexts);
42 candidateList = new ArrayList<>();
43 termSpans = (TermSpans) firstSpans;
44 hasMoreSpans = termSpans.next();
45 if (hasMoreSpans) {
46 currentDoc = termSpans.doc();
47 currentPosition = termSpans.start();
Eliza Margarethafb25cef2014-06-06 14:19:07 +000048 }
Eliza Margarethac7fb7312014-07-25 14:11:36 +000049 }
Nils Diewaldf399a672013-11-18 17:55:22 +000050
Eliza Margarethac7fb7312014-07-25 14:11:36 +000051 @Override
52 public boolean next() throws IOException {
53 isStartEnumeration=false;
54 return advance();
55 }
56
57 /** Get the next match by first checking the candidate match list
58 * and setting the list when it is empty.
59 * */
60 private boolean advance() throws IOException {
61 while(hasMoreSpans || !candidateList.isEmpty()){
62 if (!candidateList.isEmpty()){
63 CandidateElementSpans cs = candidateList.get(0);
64 this.matchDocNumber = cs.getDoc();
65 this.matchStartPosition = cs.getStart();
66 this.matchEndPosition = cs.getEnd();
67 this.matchPayload = cs.getPayloads();
68 this.setElementRef(cs.getElementRef());
69 candidateList.remove(0);
70 return true;
71 }
72 else{
73 logger.info("Setting candidate list");
74 setCandidateList();
75 currentDoc = termSpans.doc();
76 currentPosition = termSpans.start();
77 }
78 }
79 return false;
80 }
81
82 /** Collect all the elements in the same start position and sort them by
83 * end position (smallest first).
84 * */
85 private void setCandidateList() throws IOException {
86 while (hasMoreSpans && termSpans.doc() == currentDoc &&
87 termSpans.start() == currentPosition){
88 CandidateElementSpans cs = new CandidateElementSpans(termSpans,
89 elementRef);
90 readPayload(cs);
91 candidateList.add(cs);
92 hasMoreSpans = termSpans.next();
93 }
94 Collections.sort(candidateList);
95 }
96
97
98 /** This method reads the payload of the termSpan and assigns the end
99 * position and element ref to the candidate match. The character offset
100 * payload is set as the candidate match payload.
101 * <br/><br/>
102 * <em>Note</em>: payloadbuffer should actually collects all other payload
103 * beside end position and element ref, but KorapIndex identify element's
104 * payload by its length (8), which is only the character offsets. So
105 * these offsets are directly set as the candidate match payload.
106 *
107 * @author margaretha
108 * */
109 private void readPayload(CandidateElementSpans cs) throws IOException {
110 BytesRef payload = termSpans.getPostings().getPayload();
111 //ByteBuffer payloadBuffer = ByteBuffer.allocate(128);
112
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000113 if (payload != null) {
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000114 // Copy some payloads like start character and end character
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000115 //payloadBuffer.put(payload.bytes, payload.offset, 8);
116
117 cs.setEnd(readEndPostion(payload));
Eliza Margaretha1c3bf272014-06-11 11:50:39 +0000118
119 if (isElementRef ){
120 // Copy rest of payloads after the end position and elementref
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000121 //payloadBuffer.put(payload.bytes, payload.offset + 14, payload.length - 14);
122 cs.setElementRef(readElementRef(payload));
Eliza Margaretha1c3bf272014-06-11 11:50:39 +0000123 }
124 else{
125 // Copy rest of payloads after the end position
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000126 //payloadBuffer.put(payload.bytes, payload.offset + 12, payload.length - 12);
127 cs.setElementRef((short) -1);
Eliza Margaretha1c3bf272014-06-11 11:50:39 +0000128 }
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000129
130 //byte[] offsetCharacters = new byte[8];
131 //System.arraycopy(payloadBuffer.array(), 0, offsetCharacters, 0, 8);
132
133 cs.setPayloads(Collections.singletonList(readOffset(payload)));
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000134 }
135 else {
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000136 cs.setEnd(cs.getStart());
137 cs.setElementRef((short) -1);
138 cs.setPayloads(null);
139 }
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000140 }
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000141
142
143 /** Get the offset bytes from the payload.
144 * */
145 private byte[] readOffset(BytesRef payload){
146 byte[] b = new byte[8];
147 System.arraycopy(payload.bytes, payload.offset, b, 0, 8);
148 return b;
149 }
150
151 /** Get the end position bytes from the payload and cast it to int.
152 * */
153 private int readEndPostion(BytesRef payload) {
154 byte[] b = new byte[4];
155 System.arraycopy(payload.bytes, payload.offset + 8, b, 0, 4);
156 return ByteBuffer.wrap(b).getInt();
157 }
158
159 /** Get the elementRef bytes from the payload and cast it into short.
160 * */
161 private short readElementRef(BytesRef payload) {
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000162 byte[] b = new byte[2];
163 System.arraycopy(payload.bytes, payload.offset + 12, b, 0, 2);
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000164 return ByteBuffer.wrap(b).getShort();
Eliza Margarethafb25cef2014-06-06 14:19:07 +0000165 }
166
167 @Override
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000168 public boolean skipTo(int target) throws IOException {
169 if (hasMoreSpans && (firstSpans.doc() < target)){
170 if (!firstSpans.skipTo(target)){
171 candidateList.clear();
172 return false;
173 }
174 }
175 setCandidateList();
176 matchPayload.clear();
177 isStartEnumeration=false;
178 return advance();
179 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000180
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000181 @Override
182 public long cost() {
183 return termSpans.cost();
184 }
Nils Diewald20607ab2014-03-20 23:28:36 +0000185
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000186 public short getElementRef() {
187 return elementRef;
188 }
Nils Diewald20607ab2014-03-20 23:28:36 +0000189
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000190 public void setElementRef(short elementRef) {
191 this.elementRef = elementRef;
192 }
Nils Diewald20607ab2014-03-20 23:28:36 +0000193
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000194 /** Match candidate for element spans.
195 * */
Eliza Margarethae7938d32014-07-29 12:12:15 +0000196 class CandidateElementSpans extends CandidateSpan {
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000197
198 private short elementRef;
199
200 public CandidateElementSpans(Spans span, short elementRef)
201 throws IOException {
202 super(span);
203 setElementRef(elementRef);
204 }
205
206 public void setElementRef(short elementRef) {
207 this.elementRef = elementRef;
208 }
209 public short getElementRef() {
210 return elementRef;
Eliza Margarethae7938d32014-07-29 12:12:15 +0000211 }
Eliza Margarethac7fb7312014-07-25 14:11:36 +0000212 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000213};