blob: 72d59c8671a79497b70b102a2c1de6652e6cb2f3 [file] [log] [blame]
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +00001package de.ids_mannheim.korap.query.spans;
2
3import java.io.IOException;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +00004import java.util.ArrayList;
5import java.util.Collections;
6import java.util.List;
7import java.util.Map;
8
9import org.apache.lucene.index.AtomicReaderContext;
10import org.apache.lucene.index.Term;
11import org.apache.lucene.index.TermContext;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000012import org.apache.lucene.search.spans.Spans;
13import org.apache.lucene.search.spans.TermSpans;
14import org.apache.lucene.util.Bits;
15import org.apache.lucene.util.BytesRef;
16import org.slf4j.Logger;
17import org.slf4j.LoggerFactory;
18
19import de.ids_mannheim.korap.query.SpanRelationQuery;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000020
21/** Enumeration of spans denoting relations between two tokens/elements. The start and end of
22 * a RelationSpan always denote the start and end of the source token/element.
23 *
24 * There are 4 types of relations, which is differentiated by the payload length in bytes.
25 * 1. Token to token relation (1 int & 1 short, length: 6)
26 * 2. Token to span (2 int & 1 short, length: 10)
27 * 3. Span to token (int, byte, int, short, length: 11)
Eliza Margaretha51fd5c22014-10-14 13:12:33 +000028 * 4. Span to Span (3 int & 1 short, length: 14)
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000029 *
30 * Every integer value denotes the start/end position of the start/target of a relation,
31 * in this format: (sourceEndPos?, startTargetPos, endTargetPos?). The end position of a token is
32 * identical to its start position, and therefore not is saved in a payload.
33 *
34 * A short value denote the relation id, used for matching relation-attributes.
35 * The byte in relation #3 is just a dummy to create a different length from the relation #2.
36 *
37 * NOTE: Sorting of the candidate spans can alternatively be done in indexing, instead of here.
38 *
39 * @author margaretha
40 * */
Eliza Margarethaf4611272014-10-16 08:45:33 +000041public class RelationSpans extends SpansWithId{
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000042
Eliza Margaretha98c200e2014-10-15 13:59:58 +000043 //short relationId;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000044 int targetStart, targetEnd;
45 int currentDoc, currentPosition;
46
47 private TermSpans relationTermSpan;
48
49 protected Logger logger = LoggerFactory.getLogger(RelationSpans.class);
50 private List<CandidateRelationSpan> candidateList;
51
52 public RelationSpans(SpanRelationQuery relationSpanQuery,
53 AtomicReaderContext context, Bits acceptDocs,
54 Map<Term, TermContext> termContexts) throws IOException {
55 super(relationSpanQuery, context, acceptDocs, termContexts);
56 candidateList = new ArrayList<>();
57 relationTermSpan = (TermSpans) firstSpans;
58 hasMoreSpans = relationTermSpan.next();
59 }
60
61 @Override
62 public boolean next() throws IOException {
Eliza Margaretha51fd5c22014-10-14 13:12:33 +000063 isStartEnumeration=false;
64 return advance();
65 }
66
67 private boolean advance() throws IOException{
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000068 while(hasMoreSpans || !candidateList.isEmpty()){
69 if (!candidateList.isEmpty()){
70 CandidateRelationSpan cs = candidateList.get(0);
71 this.matchDocNumber = cs.getDoc();
72 this.matchStartPosition = cs.getStart();
73 this.matchEndPosition = cs.getEnd();
Eliza Margaretha98c200e2014-10-15 13:59:58 +000074 this.matchPayload = cs.getPayloads();
75 this.spanId = cs.getSpanId(); // relation id
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +000076 candidateList.remove(0);
77 return true;
78 }
79 else{
80 setCandidateList();
81 currentDoc = relationTermSpan.doc();
82 currentPosition = relationTermSpan.start();
83 }
84 }
85 return false;
86 }
87
88 private void setCandidateList() throws IOException {
89 while (hasMoreSpans && relationTermSpan.doc() == currentDoc &&
90 relationTermSpan.start() == currentPosition){
91 CandidateRelationSpan cs = new CandidateRelationSpan(relationTermSpan);
92 readPayload(cs);
93
94 candidateList.add(cs);
95 hasMoreSpans = relationTermSpan.next();
96 }
97 Collections.sort(candidateList);
98
99 /*for (CandidateRelationSpan cs:candidateList){
Eliza Margaretha98c200e2014-10-15 13:59:58 +0000100 System.out.println(cs.getStart()+","+cs.getEnd() //+" <size:" +payload.get(0).length
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000101 +" target "+cs.getTargetStart()+","+cs.getTargetEnd() +" id:"+cs.getRelationId());
102 }*/
103 }
104
105 private void readPayload(CandidateRelationSpan cs) {
106 List<byte[]> payload = (List<byte[]>) cs.getPayloads();
107 int length = payload.get(0).length;
108 BytesRef payloadBytesRef = new BytesRef(payload.get(0));
109
110 int i;
111
112 switch (length) {
113 case 6: // Token to token
114 i = PayloadReader.readInteger(payloadBytesRef,0);
115 cs.setTargetStart(i);
Eliza Margaretha98c200e2014-10-15 13:59:58 +0000116 cs.setTargetEnd(i);
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000117 break;
118
119 case 10: // Token to span
120 cs.setTargetStart(PayloadReader.readInteger(payloadBytesRef,0));
121 cs.setTargetEnd(PayloadReader.readInteger(payloadBytesRef,4));
122 break;
123
124 case 11: // Span to token
125 cs.setEnd(PayloadReader.readInteger(payloadBytesRef,0));
126 i = PayloadReader.readInteger(payloadBytesRef,5);
127 cs.setTargetStart(i);
Eliza Margaretha98c200e2014-10-15 13:59:58 +0000128 cs.setTargetEnd(i);
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000129 break;
130
Eliza Margaretha51fd5c22014-10-14 13:12:33 +0000131 case 14: // Span to span
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000132 cs.setEnd(PayloadReader.readInteger(payloadBytesRef,0));
133 cs.setTargetStart(PayloadReader.readInteger(payloadBytesRef,4));
134 cs.setTargetEnd(PayloadReader.readInteger(payloadBytesRef,8));
135 break;
136 }
137
Eliza Margaretha98c200e2014-10-15 13:59:58 +0000138 cs.setSpanId(PayloadReader.readShort(payloadBytesRef, length-2)); //relation id
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000139 }
140
141 @Override
Eliza Margaretha51fd5c22014-10-14 13:12:33 +0000142 public boolean skipTo(int target) throws IOException {
143 if (hasMoreSpans && (firstSpans.doc() < target)){
144 if (!firstSpans.skipTo(target)){
145 candidateList.clear();
146 return false;
147 }
148 }
149 setCandidateList();
150 matchPayload.clear();
151 isStartEnumeration=false;
152 return advance();
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000153 }
154
155 @Override
156 public long cost() {
Eliza Margaretha51fd5c22014-10-14 13:12:33 +0000157 return firstSpans.cost();
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000158 }
Eliza Margaretha98c200e2014-10-15 13:59:58 +0000159/*
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000160 public short getRelationId() {
161 return relationId;
162 }
163
164 public void setRelationId(short relationId) {
165 this.relationId = relationId;
Eliza Margaretha98c200e2014-10-15 13:59:58 +0000166 }*/
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000167
168 public int getTargetStart() {
169 return targetStart;
170 }
171
172 public void setTargetStart(int targetStart) {
173 this.targetStart = targetStart;
174 }
175
176 public int getTargetEnd() {
177 return targetEnd;
178 }
179
180 public void setTargetEnd(int targetEnd) {
181 this.targetEnd = targetEnd;
182 }
183
184
185 class CandidateRelationSpan extends CandidateSpan implements Comparable<CandidateSpan>{
186
Eliza Margaretha98c200e2014-10-15 13:59:58 +0000187 private int targetStart, targetEnd;
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000188
189 public CandidateRelationSpan(Spans span) throws IOException{
190 super(span);
191 }
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000192
193 @Override
194 public int compareTo(CandidateSpan o) {
195
196 int sourcePositionComparison = super.compareTo(o);
197
198 CandidateRelationSpan cs = (CandidateRelationSpan) o;
199 if (sourcePositionComparison == 0){
200 if (this.getTargetStart() == cs.getTargetStart()){
201 if (this.getTargetEnd() == cs.getTargetEnd())
202 return 0;
203 if (this.getTargetEnd() > cs.getTargetEnd() )
204 return 1;
205 else return -1;
206 }
207 else if (this.getTargetStart() < cs.getTargetStart())
208 return -1;
209 else return 1;
210 }
211
212 return sourcePositionComparison;
213 }
214
215 public int getTargetEnd() {
216 return targetEnd;
217 }
218
219 public void setTargetEnd(int targetEnd) {
220 this.targetEnd = targetEnd;
221 }
222
223 public int getTargetStart() {
224 return targetStart;
225 }
226
227 public void setTargetStart(int targetStart) {
228 this.targetStart = targetStart;
229 }
Eliza Margarethaf13b8ad2014-10-13 16:36:28 +0000230 }
231
232}