Fixed repetition spans, added comparable to CandidateSpan
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
index 8746bf8..22ef490 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
@@ -20,19 +20,22 @@
/** Matches spans wrapped by an element. */
public class SpanElementQuery extends SimpleSpanQuery {
- //private static Term element;
+ //private SpanTermQuery termQuery;
+ private static Term elementTerm;
private String elementStr;
/** Constructor. */
public SpanElementQuery (String field, String term) {
- super(new SpanTermQuery(new Term(field,"<>:"+term)
- //(element = new Term(field,"<>:"+term))
- ),
+ super(new SpanTermQuery(
+ (elementTerm = new Term(field,"<>:"+term))
+ ),
true
);
- this.elementStr = term;
+ this.elementStr = term;
+ //this.termQuery = (SpanTermQuery) this.getFirstClause();
+ //this.elementTerm = termQuery.getTerm();
};
-
+
@Override
public Spans getSpans(final AtomicReaderContext context,
Bits acceptDocs,
@@ -60,7 +63,7 @@
@Override
public void extractTerms(Set<Term> terms) {
- terms.add(new Term(getField(),"<>:"+elementStr));
+ terms.add(elementTerm);
};
@Override
@@ -91,6 +94,8 @@
else if (!elementStr.equals(other.elementStr))
return false;
+ if (!getField().equals(other.getField()))
+ return false;
return true;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index bdd084d..b64d253 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -154,7 +154,7 @@
/** Match candidate for attribute spans.
* */
class CandidateAttributeSpan extends CandidateSpan
- implements Comparable<CandidateAttributeSpan>{
+ implements Comparable<CandidateSpan>{
private short elementRef;
@@ -172,10 +172,11 @@
}
@Override
- public int compareTo(CandidateAttributeSpan o) {
- if (this.elementRef == o.elementRef)
+ public int compareTo(CandidateSpan o) {
+ CandidateAttributeSpan cs = (CandidateAttributeSpan) o;
+ if (this.elementRef == cs.elementRef)
return 0;
- else if (this.elementRef > o.elementRef )
+ else if (this.elementRef > cs.elementRef )
return 1;
return -1;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
index 41ba103..e13e54e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
@@ -9,12 +9,14 @@
/** A span kept as a candidate for matching with another Span
* @author margaretha
* */
-public class CandidateSpan {
+public class CandidateSpan implements Comparable<CandidateSpan>{
private int doc,start,end;
private long cost;
private Collection<byte[]> payloads = new ArrayList<>();
private int position;
private CandidateSpan childSpan; // used for multiple distance with unordered constraint
+ private short elementRef;
+
public CandidateSpan(Spans span) throws IOException {
this.doc = span.doc();
@@ -23,6 +25,15 @@
this.cost = span.cost();
if (span.isPayloadAvailable())
setPayloads(span.getPayload());
+
+ /*if (span instanceof ElementSpans ){
+ ElementSpans s = (ElementSpans) span;
+ this.elementRef = s.getElementRef();
+ }
+ else if (span instanceof AttributeSpans){
+ AttributeSpans s = (AttributeSpans) span;
+ this.elementRef = s.getElementRef();
+ } */
}
public CandidateSpan(Spans span, int position) throws IOException {
@@ -91,4 +102,27 @@
public void setChildSpan(CandidateSpan childSpan) {
this.childSpan = childSpan;
}
+
+ public short getElementRef() {
+ return elementRef;
+ }
+
+ public void setElementRef(short elementRef) {
+ this.elementRef = elementRef;
+ }
+
+ @Override
+ public int compareTo(CandidateSpan o) {
+ if (this.getStart() == o.getStart()){
+ if (this.getEnd() == o.getEnd())
+ return 0;
+ if (this.getEnd() > o.getEnd() )
+ return 1;
+ else return -1;
+ }
+ else if (this.getStart() < o.getStart())
+ return -1;
+ else
+ return 1;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
index 5f8b553..fb5ce66 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
@@ -16,8 +16,7 @@
/** DistanceSpan is a base class for enumeration of span matches,
* whose two child spans have a specific range of distance (within
- * a min and a max distance) and must be in order (a firstspan is
- * followed by a secondspan).
+ * a min and a max distance).
*
* @author margaretha
* */
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index b5a2f97..a9aa08b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -193,8 +193,7 @@
/** Match candidate for element spans.
* */
- class CandidateElementSpans extends CandidateSpan
- implements Comparable<CandidateElementSpans>{
+ class CandidateElementSpans extends CandidateSpan {
private short elementRef;
@@ -209,15 +208,6 @@
}
public short getElementRef() {
return elementRef;
- }
-
- @Override
- public int compareTo(CandidateElementSpans o) {
- if (this.getEnd() == o.getEnd())
- return 0;
- else if (this.getEnd() > o.getEnd() )
- return 1;
- return -1;
- }
+ }
}
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
index abd33e9..8ece0f8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
@@ -3,6 +3,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -15,14 +16,19 @@
import de.ids_mannheim.korap.query.SpanRepetitionQuery;
+/** Enumeration of spans occurring multiple times in a sequence.
+ * The number of min and max repetition can be set.
+ *
+ * @author margaretha
+ * */
public class RepetitionSpans extends SimpleSpans{
private int min,max;
private long matchCost;
private List<CandidateSpan> matchList;
private Logger log = LoggerFactory.getLogger(RepetitionSpans.class);
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
public RepetitionSpans(SpanRepetitionQuery query,
@@ -42,6 +48,9 @@
return advance();
}
+ /** Get the next span from the candidate match list, or set it first when
+ * it is empty.
+ * */
private boolean advance() throws IOException {
while (hasMoreSpans || !matchList.isEmpty()){
@@ -52,51 +61,99 @@
}
matchPayload.clear();
matchCost = 0;
- setMatchList();
+
+ List<CandidateSpan> adjacentSpans = collectAdjacentSpans();
+ setMatchList(adjacentSpans);
}
-
return false;
}
- private void setMatchList() throws IOException {
+ /** Collect all adjacent spans occurring in a sequence.
+ * @return a list of the adjacent spans
+ * */
+ private List<CandidateSpan> collectAdjacentSpans() throws IOException {
CandidateSpan startSpan = new CandidateSpan(firstSpans);
- if (min == 1 ) matchList.add(startSpan);
- if (max == 1) {
- hasMoreSpans = firstSpans.next();
- }
- else {
- CandidateSpan prevSpan = startSpan;
- Collection<byte[]> payload;
- int n = 2;
- while (n <= max &&
- (hasMoreSpans = firstSpans.next()) &&
- startSpan.getDoc() == firstSpans.doc() ){
- if (firstSpans.start() > prevSpan.getEnd()){
- break;
- }
- else if (min <= n){
- if (firstSpans.isPayloadAvailable()){
- payload = firstSpans.getPayload();
- } else {payload = null;}
-
- matchCost += firstSpans.cost();
- matchList.add(new CandidateSpan(
- startSpan.getStart(),
- firstSpans.end(),
- firstSpans.doc(),
- matchCost,
- payload)
- );
- }
- prevSpan = new CandidateSpan(firstSpans);
- n++;
+ List<CandidateSpan> adjacentSpans = new ArrayList<CandidateSpan>();
+ adjacentSpans.add(startSpan);
+
+ CandidateSpan prevSpan = startSpan;
+
+ while ((hasMoreSpans = firstSpans.next()) &&
+ startSpan.getDoc() == firstSpans.doc() ){
+
+ if (firstSpans.start() > prevSpan.getEnd()){
+ break;
}
- }
+ else {
+ prevSpan = new CandidateSpan(firstSpans);
+ adjacentSpans.add(prevSpan);
+ }
+ }
+ return adjacentSpans;
}
+ /** Generate all possible repetition candidate spans from the adjacent spans
+ * and add them to the match list.
+ * */
+ private void setMatchList(List<CandidateSpan> adjacentSpans){
+ CandidateSpan startSpan, endSpan, matchSpan;
+ for (int i=min; i<max+1; i++){
+ //System.out.println("num: "+i);
+ int j=0;
+ int endIndex;
+ while ((endIndex = j+i-1) < adjacentSpans.size()){
+ startSpan = adjacentSpans.get(j);
+ if (i == 1){
+ matchList.add(startSpan);
+ }
+ else {
+ endSpan = adjacentSpans.get(endIndex);
+ matchSpan = new CandidateSpan(
+ startSpan.getStart(),
+ endSpan.getEnd(),
+ startSpan.getDoc(),
+ computeMatchCost(adjacentSpans, i, endIndex),
+ computeMatchPayload(adjacentSpans, i, endIndex));
+
+ //System.out.println("c:"+matchSpan.getCost() +" p:"+ matchSpan.getPayloads().size());
+ //System.out.println(startSpan.getStart() +","+endSpan.getEnd());
+
+ matchList.add(matchSpan);
+ }
+ j++;
+ }
+ }
+
+ Collections.sort(matchList);
+ }
+
+ /** Add all the payloads of a candidate span
+ * */
+ private Collection<byte[]> computeMatchPayload(
+ List<CandidateSpan> adjacentSpans, int start, int end) {
+ Collection<byte[]> payload = new ArrayList<byte[]>();
+ for (int i=start; i<= end; i++){
+ payload.addAll(adjacentSpans.get(i).getPayloads());
+ }
+ return payload;
+ }
+ /** Add all the cost of a candidate span
+ * */
+ private long computeMatchCost(List<CandidateSpan> adjacentSpans,
+ int start, int end){
+ long matchCost = 0;
+ for (int i=start; i<= end; i++){
+ matchCost += adjacentSpans.get(i).getCost();
+ }
+ return matchCost;
+ }
+
+
+ /** Setting match properties from the candidate span
+ * */
private void setMatchProperties(CandidateSpan candidateSpan)
throws IOException {
matchDocNumber = candidateSpan.getDoc();
@@ -105,6 +162,7 @@
if (collectPayloads && candidateSpan.getPayloads() != null) {
matchPayload.addAll(candidateSpan.getPayloads());
}
+
if (DEBUG)
log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
matchEndPosition);
@@ -125,6 +183,5 @@
@Override
public long cost() {
return matchCost;
- }
-
+ }
}