Add flag to switch reading elementRef
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
index d2360ac..dc23d7b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
@@ -29,7 +29,8 @@
super(simpleSpanQuery, context, acceptDocs, termContexts);
elements = (ElementSpans) firstSpans;
attributes = (AttributeSpans) secondSpans;
- hasMoreSpans = firstSpans.next() & secondSpans.next();
+ elements.isElementRef = true; // dummy setting enabling reading elementRef
+ hasMoreSpans = elements.next() & attributes.next();
}
@Override
@@ -40,7 +41,8 @@
private boolean advance() throws IOException {
- while (hasMoreSpans && ensureSamePosition(elements,attributes)){
+ while (hasMoreSpans && ensureSamePosition(elements,attributes)){
+
logger.info("element: " + elements.start() + ","+ elements.end() +" ref:"+elements.getElementRef());
logger.info("attribute {} ref:{}", attributes.start(), attributes.getElementRef());
@@ -55,7 +57,10 @@
if (elements.getElementRef() < attributes.getElementRef())
hasMoreSpans = attributes.next();
- else hasMoreSpans = elements.next();
+ else {
+ elements.isElementRef = true; // dummy setting enabling reading elementRef
+ hasMoreSpans = elements.next();
+ }
}
return false;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index deca135..7ef8928 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -23,8 +23,8 @@
// TODO: Store payloads in 12 byte instead of the complicated ByteBuffer stuff!
// Todo: Use copyFrom() instead of clone()
-/**
- * @author Nils Diewald
+/**
+ * @author Nils Diewald, margaretha
*
* Use copyFrom instead of clone
*/
@@ -40,6 +40,8 @@
private LinkedList<KorapTermSpan> memory;
private KorapTermSpan overflow, current, temp;
+ public boolean isElementRef = false; // A dummy flag for
+
public static final ElementSpans EMPTY_ELEMENT_SPANS
= new EmptyElementSpans();
@@ -308,11 +310,19 @@
// Copy some payloads like start character and end character
this.current.payload.put(payload.bytes, payload.offset, 8);
- // Copy rest of payloads after the end position and elementref
- this.current.payload.put(payload.bytes, payload.offset + 12, payload.length - 12);
- this.current.end = readEndPostion(payload);
- this.current.elementRef = readElementRef(payload);
+ this.current.end = readEndPostion(payload);
+
+ if (isElementRef ){
+ // Copy rest of payloads after the end position and elementref
+ this.current.payload.put(payload.bytes, payload.offset + 14, payload.length - 14);
+ this.current.elementRef = readElementRef(payload);
+ }
+ else{
+ // Copy rest of payloads after the end position
+ this.current.payload.put(payload.bytes, payload.offset + 12, payload.length - 12);
+ this.current.elementRef = 0;
+ }
}
else {
this.current.end = this.current.start;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
index fff2cdf..e435d50 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
@@ -14,112 +14,98 @@
import de.ids_mannheim.korap.query.SimpleSpanQuery;
/** An abstract class for Span enumeration whose two child spans are matched by
- * their positions and do not have a partial overlap.
+ * their positions and do not have a partial overlap.
*
- * @author margaretha
+ * @author margaretha
* */
public abstract class NonPartialOverlappingSpans extends SimpleSpans{
- private Logger log = LoggerFactory.getLogger(NonPartialOverlappingSpans.class);
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
- public NonPartialOverlappingSpans(SimpleSpanQuery simpleSpanQuery,
- AtomicReaderContext context,
- Bits acceptDocs,
- Map<Term,TermContext> termContexts) throws IOException {
- super(simpleSpanQuery, context, acceptDocs, termContexts);
+ private Logger log = LoggerFactory.getLogger(NonPartialOverlappingSpans.class);
+
+ public NonPartialOverlappingSpans(SimpleSpanQuery simpleSpanQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term,TermContext> termContexts) throws IOException {
+ super(simpleSpanQuery, context, acceptDocs, termContexts);
- // Warning: not implemented, results in errors for SpanNextQuery
- // This.collectPayloads = simpleSpanQuery.isCollectPayloads()
- collectPayloads = true;
- hasMoreSpans = secondSpans.next();
- };
+ // Warning: not implemented, results in errors for SpanNextQuery
+ // This.collectPayloads = simpleSpanQuery.isCollectPayloads()
+ collectPayloads = true;
+ hasMoreSpans = secondSpans.next();
+
+ }
@Override
- public boolean next() throws IOException {
+ public boolean next() throws IOException {
// Warning: this does not work for overlapping spans
// e.g. get multiple second spans in a firstspan
- hasMoreSpans &= firstSpans.next();
- isStartEnumeration=false;
- matchPayload.clear();
- return advance();
- };
+ hasMoreSpans &= firstSpans.next();
+ isStartEnumeration=false;
+ matchPayload.clear();
+ return advance();
+ }
- /** Advance is a lucene terminology to search for the next match.
- * */
+ /** Advance is a lucene terminology to search for the next match.
+ * */
protected boolean advance() throws IOException {
- // The complexity is linear for searching in a document.
- // It's better if we can skip to >= position in a document.
- while (hasMoreSpans && ensureSameDoc(firstSpans,secondSpans)) {
- int matchCase = findMatch();
- if (matchCase == 0){
- if (DEBUG) {
- log.trace("Match doc#: {}",
- matchDocNumber);
- log.trace("Match positions: {}-{}",
- matchStartPosition,
- matchEndPosition);
- };
- doCollectPayloads();
- return true;
- }
- else if (matchCase == 1)
- hasMoreSpans = secondSpans.next();
- else
- hasMoreSpans = firstSpans.next();
- };
- return false;
- };
+ // The complexity is linear for searching in a document.
+ // It's better if we can skip to >= position in a document.
+ while (hasMoreSpans && ensureSameDoc(firstSpans,secondSpans)){
+ int matchCase = findMatch();
+ if (matchCase == 0){
+ log.trace("Match doc#: {}",matchDocNumber);
+ log.trace("Match positions: {}-{}", matchStartPosition,
+ matchEndPosition);
+ doCollectPayloads();
+ return true;
+ }
+ else if (matchCase == 1) {
+ hasMoreSpans = secondSpans.next();
+ }
+ else{
+ hasMoreSpans = firstSpans.next();
+ }
+ }
+ return false;
+ }
/** Specify the condition for a match
* @return 0 iff match is found,
* -1 to advance the firstspan,
* 1 to advance the secondspan
- **/
- protected abstract int findMatch();
+ * */
+ protected abstract int findMatch();
- /** Collecting available payloads from the current first and second spans */
- private void doCollectPayloads() throws IOException {
- if (collectPayloads){
-
- if (DEBUG)
- log.trace("Collect payloads");
-
- if (firstSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = firstSpans.getPayload();
-
- if (DEBUG)
- log.trace("Found {} payloads in firstSpans", payload.size());
-
- matchPayload.addAll(payload);
- };
-
- if (secondSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = secondSpans.getPayload();
-
- if (DEBUG)
- log.trace("Found {} payloads in secondSpans", payload.size());
-
- matchPayload.addAll(payload);
- };
- };
- };
+ /** Collecting available payloads from the current first and second spans */
+ private void doCollectPayloads() throws IOException {
+ if (collectPayloads){
+ log.trace("Collect payloads");
+ if (firstSpans.isPayloadAvailable()) {
+ Collection<byte[]> payload = firstSpans.getPayload();
+ log.trace("Found {} payloads in firstSpans", payload.size());
+ matchPayload.addAll(payload);
+ }
+ if (secondSpans.isPayloadAvailable()) {
+ Collection<byte[]> payload = secondSpans.getPayload();
+ log.trace("Found {} payloads in secondSpans", payload.size());
+ matchPayload.addAll(payload);
+ }
+ }
+ }
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (firstSpans.doc() < target)){
- if (!firstSpans.skipTo(target)){
- hasMoreSpans = false;
- return false;
- };
- };
- matchPayload.clear();
- return advance();
- };
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)){
+ if (!firstSpans.skipTo(target)){
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ matchPayload.clear();
+ return advance();
+ }
- @Override
- public long cost() {
- return firstSpans.cost() + secondSpans.cost();
- };
-};
+ @Override
+ public long cost() {
+ return firstSpans.cost() + secondSpans.cost();
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index 1f713bd..4400f6d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
@@ -43,8 +44,7 @@
matchDocNumber= -1;
matchStartPosition= -1;
matchEndPosition= -1;
- matchPayload = new LinkedList<byte[]>(); // why linkedlist?
- // -> Can be an array as well, don't know if it comes from me or from you (ndiewald)
+ matchPayload = new ArrayList<byte[]>();
// Get the enumeration of the two spans to match
firstSpans = simpleSpanQuery.getFirstClause().
@@ -107,7 +107,7 @@
}
@Override
- public String toString() { // who does call this?
+ public String toString() {
return getClass().getName() + "("+query.toString()+")@"+
(isStartEnumeration?"START":(hasMoreSpans?(doc()+":"+
start()+"-"+end()):"END"));