Performance improvements
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 88e2fd3..f9a758a 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -131,6 +131,10 @@
// Logger
private final static Logger log = LoggerFactory.getLogger(KorapIndex.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+
public KorapIndex () throws IOException {
this((Directory) new RAMDirectory());
};
@@ -545,10 +549,12 @@
regex.append("(.){1,}|_[0-9]+");
- log.trace("The final regexString is {}", regex.toString());
+ if (DEBUG)
+ log.trace("The final regexString is {}", regex.toString());
RegExp regexObj = new RegExp(regex.toString(), RegExp.COMPLEMENT);
fst = new CompiledAutomaton(regexObj.toAutomaton());
- log.trace("The final regexObj is {}", regexObj.toString());
+ if (DEBUG)
+ log.trace("The final regexObj is {}", regexObj.toString());
};
@@ -578,7 +584,8 @@
continue;
// We've found the correct document! Hurray!
- log.trace("We've found a matching document");
+ if (DEBUG)
+ log.trace("We've found a matching document");
HashSet<String> fieldsToLoadLocal = new HashSet<>(fieldsToLoad);
fieldsToLoadLocal.add(field);
@@ -594,7 +601,8 @@
match.setLocalDocID(localDocID);
match.populateDocument(doc, field, fieldsToLoadLocal);
- log.trace("The document has the id '{}'", match.getDocID());
+ if (DEBUG)
+ log.trace("The document has the id '{}'", match.getDocID());
if (!info) break;
@@ -606,7 +614,8 @@
(Bits) bitset,
new HashMap<Term, TermContext>());
- log.trace("Now search for {}", sentence.toString());
+ if (DEBUG)
+ log.trace("Now search for {}", sentence.toString());
int newStart = -1, newEnd = -1;
@@ -633,7 +642,10 @@
// We have a new match surrounding
if (newStart > -1 && newEnd > -1) {
- log.trace("New match spans from {}-{}", newStart, newEnd);
+ if (DEBUG)
+ log.trace("New match spans from {}-{}",
+ newStart,
+ newEnd);
match.setStartPos(newStart);
match.setEndPos(newEnd);
};
@@ -688,13 +700,14 @@
if (pos >= match.getStartPos() && pos < match.getEndPos()) {
- log.trace(
- ">> {}: {}-{}-{}",
- termString,
- docs.freq(),
- pos,
- docs.getPayload()
- );
+ if (DEBUG)
+ log.trace(
+ ">> {}: {}-{}-{}",
+ termString,
+ docs.freq(),
+ pos,
+ docs.getPayload()
+ );
BytesRef payload = docs.getPayload();
@@ -709,7 +722,8 @@
};
TermInfo ti = new TermInfo(termString, pos, bbTerm).analyze();
if (ti.getEndPos() < match.getEndPos()) {
- log.trace("Add {}", ti.toString());
+ if (DEBUG)
+ log.trace("Add {}", ti.toString());
termList.add(ti);
};
};
@@ -718,7 +732,8 @@
// Add annotations based on the retrieved infos
for (TermInfo t : termList.getTerms()) {
- log.trace("Add term {}/{}:{} to {}({})-{}({})",
+ if (DEBUG)
+ log.trace("Add term {}/{}:{} to {}({})-{}({})",
t.getFoundry(),
t.getLayer(),
t.getValue(),
@@ -800,7 +815,8 @@
public KorapResult search (KorapCollection collection, KorapSearch ks) {
- log.trace("Start search");
+ if (DEBUG)
+ log.trace("Start search");
this.termContexts = new HashMap<Term, TermContext>();
@@ -869,7 +885,8 @@
for (; i < hits; i++) {
- log.trace("Match Nr {}/{}", i, count);
+ if (DEBUG)
+ log.trace("Match Nr {}/{}", i, count);
// There are no more spans to find
if (spans.next() != true)
@@ -910,7 +927,8 @@
ByteBuffer bb = ByteBuffer.allocate(10);
for (byte[] b : spans.getPayload()) {
- log.trace("Found a payload!!! with length {}", b.length);
+ if (DEBUG)
+ log.trace("Found a payload!!! with length {}", b.length);
// Todo element searches!
@@ -923,7 +941,10 @@
int end = bb.getInt() -1;
byte number = bb.get();
- log.trace("Have a payload: {}-{}", start, end);
+ if (DEBUG)
+ log.trace("Have a payload: {}-{}",
+ start,
+ end);
match.addHighlight(start, end, number);
}
@@ -944,9 +965,10 @@
if (bb.getInt(4) > match.potentialEndPosChar)
match.potentialEndPosChar = bb.getInt(4);
- log.trace("Element payload from {} to {}",
- match.potentialStartPosChar,
- match.potentialEndPosChar);
+ if (DEBUG)
+ log.trace("Element payload from {} to {}",
+ match.potentialStartPosChar,
+ match.potentialEndPosChar);
}
else if (b.length == 4) {
@@ -970,7 +992,9 @@
match.internalDocID = docID;
match.populateDocument(doc, field, fieldsToLoadLocal);
- log.trace("I've got a match in {} of {}", match.getDocID(), count);
+ if (DEBUG)
+ log.trace("I've got a match in {} of {}",
+ match.getDocID(), count);
atomicMatches.add(match);
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index e8f062e..a99813b 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -37,6 +37,7 @@
*/
@JsonInclude(Include.NON_NULL)
public class KorapMatch extends KorapDocument {
+
ObjectMapper mapper = new ObjectMapper();
// Snippet information
@@ -92,6 +93,9 @@
// Logger
private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
/**
* Constructs a new KorapMatch object.
* TODo: Maybe that's not necessary!
@@ -203,7 +207,9 @@
if (this.highlight == null)
this.highlight = new ArrayList<Highlight>(16);
- log.trace("Add highlight {} from {} to {}", hl.number, hl.start, hl.end);
+
+ if (DEBUG)
+ log.trace("Add highlight {} from {} to {}", hl.number, hl.start, hl.end);
this._reset();
@@ -376,13 +382,15 @@
return false;
};
- log.trace("Start highlight processing ...");
+ if (DEBUG)
+ log.trace("Start highlight processing ...");
PositionsToOffset pto = this.positionsToOffset;
pto.add(this.localDocID, this.getStartPos());
pto.add(this.localDocID, this.getEndPos() - 1);
- log.trace("PTO now has start and end positions {}-{}", this.getStartPos(), this.getEndPos());
+ if (DEBUG)
+ log.trace("PTO now has start and end positions {}-{}", this.getStartPos(), this.getEndPos());
if (this.highlight != null) {
for (Highlight hl : this.highlight) {
@@ -391,7 +399,8 @@
};
};
- log.trace("All highlights are added");
+ if (DEBUG)
+ log.trace("All highlights are added");
// Get the list of spans for matches and highlighting
if (this.span == null || this.span.size() == 0) {
@@ -665,7 +674,8 @@
for (int s : this.balanceStack) {
sb.append('[').append(s).append(']');
};
- log.trace(sb.toString());
+ if (DEBUG)
+ log.trace(sb.toString());
// class number of the last element
int eold = this.balanceStack.removeLast();
@@ -676,9 +686,14 @@
// Retrieve last combinator on stack
lastComb = this.combine.peekLast();
- log.trace("Closing element is unbalanced - {} " +
- "!= {} with lastComb {}|{}|{}",
- eold, number, lastComb.type, lastComb.number, lastComb.characters);
+ if (DEBUG)
+ log.trace("Closing element is unbalanced - {} " +
+ "!= {} with lastComb {}|{}|{}",
+ eold,
+ number,
+ lastComb.type,
+ lastComb.number,
+ lastComb.characters);
// combinator is opening and the number is not equal to the last
// element on the balanceStack
@@ -691,7 +706,8 @@
// combinator is either closing (??) or another opener
else {
- log.trace("close element a) {}", eold);
+ if (DEBUG)
+ log.trace("close element a) {}", eold);
// Add a closer for the old element (this has following elements)
this.combine.add(new HighlightCombinatorElement((byte) 2, eold, false));
@@ -707,8 +723,10 @@
// Get last combinator on the stack
lastComb = this.combine.peekLast();
- log.trace("LastComb: " + lastComb.type + '|' + lastComb.number + '|' + lastComb.characters + " for " + number);
- log.trace("Stack for checking 2: {}|{}|{}|{}", lastComb.type, lastComb.number, lastComb.characters, number);
+ if (DEBUG) {
+ log.trace("LastComb: " + lastComb.type + '|' + lastComb.number + '|' + lastComb.characters + " for " + number);
+ log.trace("Stack for checking 2: {}|{}|{}|{}", lastComb.type, lastComb.number, lastComb.characters, number);
+ };
if (lastComb.type == 1 && lastComb.number == number) {
while (lastComb.type == 1 && lastComb.number == number) {
@@ -718,7 +736,8 @@
};
}
else {
- log.trace("close element b) {}", number);
+ if (DEBUG)
+ log.trace("close element b) {}", number);
// Add a closer
this.combine.add(new HighlightCombinatorElement((byte) 2, number));
@@ -727,7 +746,8 @@
// Fetch everything from the tempstack and reopen it
for (int e : tempStack) {
- log.trace("Reopen element {}", e);
+ if (DEBUG)
+ log.trace("Reopen element {}", e);
combine.add(new HighlightCombinatorElement((byte) 1, e));
balanceStack.add(e);
};
@@ -747,7 +767,8 @@
int pos = 0;
int oldPos = 0;
- log.trace("Create Snippet");
+ if (DEBUG)
+ log.trace("Create Snippet");
this.snippetStack = new HighlightCombinator();
@@ -792,7 +813,8 @@
if (this.processed && this.snippetHTML != null)
return this.snippetHTML;
- log.trace("Create HTML Snippet");
+ if (DEBUG)
+ log.trace("Create HTML Snippet");
StringBuilder sb = new StringBuilder();
@@ -873,7 +895,8 @@
// TODO: Not very fast - improve!
private ArrayList<int[]> _processHighlightStack () {
- log.trace("Create Stack");
+ if (DEBUG)
+ log.trace("Create Stack");
LinkedList<int[]> openList = new LinkedList<int[]>();
LinkedList<int[]> closeList = new LinkedList<int[]>();
@@ -914,7 +937,8 @@
startPosChar,
endPosChar;
- log.trace("Create Spans");
+ if (DEBUG)
+ log.trace("Create Spans");
int ldid = this.localDocID;
@@ -929,14 +953,17 @@
startPosChar = potentialStartPosChar;
endPosChar = this.positionsToOffset.end(ldid, this.endPos - 1);
- log.trace("startPosChar for PTO is {}({})", startPosChar, this.startPos);
- log.trace("endPosChar for PTO is {}({})", endPosChar, this.endPos);
+ if (DEBUG) {
+ log.trace("startPosChar for PTO is {}({})", startPosChar, this.startPos);
+ log.trace("endPosChar for PTO is {}({})", endPosChar, this.endPos);
+ };
if (endPosChar < potentialEndPosChar)
endPosChar = potentialEndPosChar;
- log.trace("Matchposition: {}-{}", startPosChar, endPosChar);
+ if (DEBUG)
+ log.trace("Matchposition: {}-{}", startPosChar, endPosChar);
// left context
if (leftTokenContext) {
@@ -952,7 +979,12 @@
ldid,
this.endPos + this.rightContextOffset - 1
);
- log.trace("For endOffset {} ({}+{}-1) pto returns {}", (this.endPos + this.rightContextOffset - 1), this.endPos, this.rightContextOffset, endOffsetChar);
+ if (DEBUG)
+ log.trace("For endOffset {} ({}+{}-1) pto returns {}",
+ (this.endPos + this.rightContextOffset - 1),
+ this.endPos,
+ this.rightContextOffset,
+ endOffsetChar);
}
else {
if (endPosChar == -1) {
@@ -979,7 +1011,12 @@
if (endOffsetChar != -1 && endOffsetChar < endPosChar)
endOffsetChar = endPosChar;
- log.trace("Offsetposition {} till {} with contexts {} and {}", startOffsetChar, endOffsetChar, leftContextOffset, rightContextOffset);
+ if (DEBUG)
+ log.trace("Offsetposition {} till {} with contexts {} and {}",
+ startOffsetChar,
+ endOffsetChar,
+ leftContextOffset,
+ rightContextOffset);
if (endOffsetChar > -1 && endOffsetChar < this.getPrimaryDataLength()) {
this.tempSnippet = this.getPrimaryData(startOffsetChar, endOffsetChar);
@@ -996,7 +1033,8 @@
// Todo: Simplify
int[] intArray = new int[]{ startPosChar - startOffsetChar, endPosChar - startOffsetChar, -1, 0};
- log.trace("IntArray: {}", intArray);
+ if (DEBUG)
+ log.trace("IntArray: {}", intArray);
this.span.add(intArray);
// highlights
@@ -1016,9 +1054,11 @@
0 // Dummy value for later
};
- log.trace("IntArray: {}", intArray);
- log.trace("PTO-start: {}", start + startOffsetChar);
- log.trace("PTO-end: {}", end + startOffsetChar);
+ if (DEBUG) {
+ log.trace("IntArray: {}", intArray);
+ log.trace("PTO-start: {}", start + startOffsetChar);
+ log.trace("PTO-end: {}", end + startOffsetChar);
+ };
this.span.add(intArray);
};
diff --git a/src/main/java/de/ids_mannheim/korap/analysis/MultiTermTokenStream.java b/src/main/java/de/ids_mannheim/korap/analysis/MultiTermTokenStream.java
index f0a155a..66462d2 100644
--- a/src/main/java/de/ids_mannheim/korap/analysis/MultiTermTokenStream.java
+++ b/src/main/java/de/ids_mannheim/korap/analysis/MultiTermTokenStream.java
@@ -48,6 +48,9 @@
private final Logger log = LoggerFactory.getLogger(MultiTermTokenStream.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
public MultiTermTokenStream () {
// this.offsetAttr = this.addAttribute(OffsetAttribute.class);
this.charTermAttr = this.addAttribute(CharTermAttribute.class);
@@ -173,7 +176,8 @@
BytesRef payload = new BytesRef();
if (mt.start != mt.end) {
- log.trace("MultiTerm with payload offset: {}-{}", mt.start, mt.end);
+ if (DEBUG)
+ log.trace("MultiTerm with payload offset: {}-{}", mt.start, mt.end);
payload.append(new BytesRef(int2byte(mt.start)));
payload.append(new BytesRef(int2byte(mt.end)));
/*
@@ -187,11 +191,13 @@
// Payload
if (mt.payload != null) {
payload.append(mt.payload());
- log.trace("Create payload[1] {}", payload.toString());
+ if (DEBUG)
+ log.trace("Create payload[1] {}", payload.toString());
};
if (payload.length > 0) {
- log.trace("Set payload[2] {}", payload.toString());
+ if (DEBUG)
+ log.trace("Set payload[2] {}", payload.toString());
payloadAttr.setPayload(payload);
};
@@ -202,7 +208,8 @@
sb.append('$').append(payload.toString());
sb.append(']');
sb.append(" with increment ").append(mt.posIncr);
- log.trace(sb.toString());
+ if (DEBUG)
+ log.trace(sb.toString());
};
this.mtIndex++;
diff --git a/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java b/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java
index 1fa1b35..cab5410 100644
--- a/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java
+++ b/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java
@@ -27,6 +27,10 @@
private final static Logger log = LoggerFactory.getLogger(PositionsToOffset.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+
private class PositionsToOffsetArray {
public int docID;
public int pos;
@@ -70,10 +74,14 @@
};
public void add (PositionsToOffsetArray ptoa) {
- log.trace("Add positionsToOffsetArray {}/{}", ptoa.docID, ptoa.pos);
+ if (DEBUG)
+ log.trace("Add positionsToOffsetArray {}/{}", ptoa.docID, ptoa.pos);
if (this.processed && this.exists(ptoa))
return;
- log.trace("Reopen processing");
+
+ if (DEBUG)
+ log.trace("Reopen processing");
+
this.positions.add(ptoa);
this.processed = false;
};
@@ -147,7 +155,8 @@
if (processed)
return offsets;
- log.trace("Process offsets");
+ if (DEBUG)
+ log.trace("Process offsets");
StringBuilder sb = new StringBuilder().append('_');
@@ -179,10 +188,11 @@
if (termsEnum.seekExact(term.bytes(), true)) {
- log.trace("Search for {} in doc {} with pos {}",
- term.toString(),
- posDoc.docID,
- posDoc.pos);
+ if (DEBUG)
+ log.trace("Search for {} in doc {} with pos {}",
+ term.toString(),
+ posDoc.docID,
+ posDoc.pos);
// Start an iterator to fetch all payloads of the term
DocsAndPositionsEnum docs = termsEnum.docsAndPositions(
@@ -205,7 +215,11 @@
offsetArray[1] = bbOffset.getInt();
offsets.put(posDoc, offsetArray);
- log.trace("Found {}-{} for {}", offsetArray[0], offsetArray[1], term.toString());
+ if (DEBUG)
+ log.trace("Found {}-{} for {}",
+ offsetArray[0],
+ offsetArray[1],
+ term.toString());
}
else {
diff --git a/src/main/java/de/ids_mannheim/korap/index/SpanInfo.java b/src/main/java/de/ids_mannheim/korap/index/SpanInfo.java
index ffd795f..0314dc1 100644
--- a/src/main/java/de/ids_mannheim/korap/index/SpanInfo.java
+++ b/src/main/java/de/ids_mannheim/korap/index/SpanInfo.java
@@ -17,6 +17,9 @@
// Logger
private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
public SpanInfo (PositionsToOffset pto, int localDocID) {
this.terms = new ArrayList<TermInfo>(64);
@@ -46,7 +49,8 @@
// Add character offset information to terms that are
// missing this information
for (TermInfo t : this.terms) {
- log.trace("Check offsets for {} and {}", t.getStartPos(), t.getEndPos());
+ if (DEBUG)
+ log.trace("Check offsets for {} and {}", t.getStartPos(), t.getEndPos());
found = true;
if (t.getStartChar() == -1) {
if (this.startChar.containsKey(t.getStartPos()))
@@ -70,7 +74,8 @@
t.getEndChar()
);
else {
- log.trace("{} can't be found!", t.getAnnotation());
+ if (DEBUG)
+ log.trace("{} can't be found!", t.getAnnotation());
this.pto.add(this.localDocID, t.getStartPos());
this.pto.add(this.localDocID, t.getStartPos());
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
index b9a894a..cc3d12a 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
@@ -13,6 +13,9 @@
// Logger
private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
private String foundry, layer, value, term, type, annotation;
// type can be "term", "pos", "span", "rel-src", "rel-target"
@@ -83,7 +86,8 @@
// Analyze term value
if (ttype != 1) {
- log.trace("Check {} for {}", tterm, prefixRegex.toString());
+ if (DEBUG)
+ log.trace("Check {} for {}", tterm, prefixRegex.toString());
matcher = prefixRegex.matcher(tterm);
if (matcher.matches() && matcher.groupCount() == 3) {
this.annotation = tterm;
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
index e21910e..ad97152 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
@@ -27,7 +27,7 @@
private String field;
/** Constructor. */
- public SpanElementQuery(String field, String term) {
+ public SpanElementQuery (String field, String term) {
StringBuilder sb = new StringBuilder("<>:");
this.field = field;
this.elementStr = term;
@@ -94,43 +94,41 @@
final Terms terms = fields.terms(element.field());
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
- if (termsEnum.seekExact(element.bytes(), true)) {
+ if (termsEnum.seekExact(element.bytes(), true))
state = termsEnum.termState();
- } else {
+ else
state = null;
- }
- } else {
- state = null;
}
- } else {
- state = null;
+ else
+ state = null;
}
- } else {
+ else
+ state = null;
+ }
+ else
state = termContext.get(context.ord);
- };
-
- if (state == null) { // term is not present in that reader
+
+
+ if (state == null) // term is not present in that reader
return ElementSpans.EMPTY_ELEMENT_SPANS;
- };
final TermsEnum termsEnum = context.reader().terms(element.field()).iterator(null);
termsEnum.seekExact(element.bytes(), state);
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
- if (postings != null) {
+ if (postings != null)
return new ElementSpans(postings, element);
- };
// element does exist, but has no positions
throw new IllegalStateException("field \"" + element.field() + "\" was indexed without position data; cannot run SpanElementQuery (element=" + element.text() + ")");
- }
+ };
- public String getElementStr() {
- return elementStr;
- }
+ public String getElementStr () {
+ return elementStr;
+ };
- public void setElementStr(String elementStr) {
- this.elementStr = elementStr;
- };
+ public void setElementStr (String elementStr) {
+ this.elementStr = elementStr;
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index e52ee73..1838f2e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
@@ -26,6 +26,8 @@
private byte number;
private ByteBuffer bb;
private SpanQuery highlight;
+ private Boolean hasmorespans = false;
+
private final Logger log = LoggerFactory.getLogger(ClassSpans.class);
public ClassSpans (SpanQuery highlight, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, byte number) throws IOException {
@@ -69,6 +71,7 @@
log.trace("Forward next");
if (spans.next()) {
+ hasmorespans = true;
highlightedPayload.clear();
@@ -92,13 +95,16 @@
highlightedPayload.add(bb.array());
return true;
};
+ hasmorespans = false;
return false;
};
// inherit javadocs
@Override
public boolean skipTo(int target) throws IOException {
- return spans.skipTo(target);
+ if (hasmorespans && spans.doc() < target)
+ return spans.skipTo(target);
+ return false;
};
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 902a33b..f10809c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -1,5 +1,7 @@
package de.ids_mannheim.korap.query.spans;
+import de.ids_mannheim.korap.query.spans.KorapTermSpan;
+
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.search.DocIdSetIterator;
@@ -20,9 +22,15 @@
// TODO: Store payloads in 12 byte instead of the complicated ByteBuffer stuff!
-import de.ids_mannheim.korap.query.spans.KorapTermSpan;
-
+/**
+ * @author ndiewald
+ */
public class ElementSpans extends Spans {
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+
private byte[] payloadByte = new byte[4];
private ByteBuffer bb = ByteBuffer.allocate(4);
@@ -63,42 +71,53 @@
end = -1;
if (memory.size() > 0) {
- log.trace("There is a memory entry");
+ if (DEBUG)
+ log.trace("There is a memory entry");
_setToCurrent(memory.removeFirst());
- log.trace("Current1: [{}-{}]", position, end);
+ if (DEBUG)
+ log.trace("Current1: [{}-{}]", position, end);
return true;
};
- log.trace("There is no memory entry");
+ if (DEBUG)
+ log.trace("There is no memory entry");
if (count == freq) {
- log.trace("last position in document");
+
+ if (DEBUG)
+ log.trace("last position in document");
// Check for overflow on document boundary
if (overflow.start != -1) {
- log.trace(" but there is an overflow");
+
+ if (DEBUG)
+ log.trace(" but there is an overflow");
_setToCurrent(overflow).clear();
- log.trace("Current2: [{}-{}]", position, end);
+ if (DEBUG)
+ log.trace("Current2: [{}-{}]", position, end);
return true;
};
if (postings == null) {
- log.trace("no more postings");
+ if (DEBUG)
+ log.trace("no more postings");
return false;
};
- log.trace("Go to next doc");
+ if (DEBUG)
+ log.trace("Go to next doc");
doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- log.trace("no more docs");
+ if (DEBUG)
+ log.trace("no more docs");
return false;
};
@@ -114,30 +133,28 @@
int pos = overflow.start;
while (true) {
- /*
- if (DEBUG)
- System.err.println(">> Reset end and payload");
- storedPayload.clear();
- end = -1;
- */
-
- log.trace("pos is {}", pos);
-
- _log_payloads(1);
+ if (DEBUG) {
+ log.trace("pos is {}", pos);
+ _log_payloads(1);
+ };
if (count == freq) {
- log.trace("last position in document");
+ if (DEBUG)
+ log.trace("last position in document");
if (postings == null) {
- log.trace("no more postings");
+ if (DEBUG)
+ log.trace("no more postings");
// Check for overflow on document boundary
if (overflow.start != -1) {
- log.trace(" but there is an overflow");
+ if (DEBUG)
+ log.trace(" but there is an overflow");
_setToCurrent(overflow).clear();
- log.trace("Current3: [{}-{}]", position, end);
+ if (DEBUG)
+ log.trace("Current3: [{}-{}]", position, end);
return true;
};
@@ -145,36 +162,50 @@
return false;
};
- log.trace("go to next doc");
- _log_payloads(2);
+ if (DEBUG) {
+ log.trace("go to next doc");
+ _log_payloads(2);
+ };
if (overflow.start != -1) {
- log.trace("Storing overflow {} ...", overflow.toString());
- log.trace("... in memory with {}-{}", overflow.startChar(), overflow.endChar());
+ if (DEBUG) {
+ log.trace("Storing overflow {} ...", overflow.toString());
+ log.trace("... in memory with {}-{}", overflow.startChar(), overflow.endChar());
+ };
memory.add((KorapTermSpan) overflow.clone());
overflow.clear();
};
- _log_payloads(3);
+ if (DEBUG)
+ _log_payloads(3);
if (memory.size() > 0) {
- log.trace("sort and return first");
- _log_payloads(4);
- Collections.sort(memory);
- _log_payloads(5);
- _setToCurrent(memory.removeFirst());
- _log_payloads(6);
+ if (DEBUG) {
+ log.trace("sort and return first");
+ _log_payloads(4);
+ };
- log.trace("Current4: [{}-{}]]", position, end);
+ Collections.sort(memory);
+
+ if (DEBUG)
+ _log_payloads(5);
+
+ _setToCurrent(memory.removeFirst());
+
+ if (DEBUG)
+ _log_payloads(6);
+
+ if (DEBUG)
+ log.trace("Current4: [{}-{}]]", position, end);
break;
};
doc = postings.nextDoc();
// New doc
- end = -1;
- pos = -1;
+ end = pos = -1;
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- log.trace("no more docs");
+ if (DEBUG)
+ log.trace("no more docs");
return false;
};
@@ -183,55 +214,75 @@
};
- log.trace("Forward postings");
+ if (DEBUG)
+ log.trace("Forward postings");
+
position = postings.nextPosition();
// New pos!
end = -1;
- _log_payloads(9);
- log.trace("CLEAR PAYLOAD");
+
+ if (DEBUG) {
+ _log_payloads(9);
+ log.trace("CLEAR PAYLOAD");
+ };
+
storedPayload.clear();
hasStoredPayload = false;
- _log_payloads(10);
+ if (DEBUG) {
+ _log_payloads(10);
+ log.trace("next position is {}", position);
+ };
+
count++;
- log.trace("next position is {}", position);
-
// There was no overflow
if (pos == -1 || pos == position) {
if (pos == position) {
- log.trace("Add overflow to memory");
+ if (DEBUG)
+ log.trace("Add overflow to memory");
+
memory.add((KorapTermSpan) overflow.clone());
}
else {
- log.trace("There was no overflow");
+ if (DEBUG)
+ log.trace("There was no overflow");
pos = position;
};
- _log_payloads(8);
- log.trace("*****************************");
+ if (DEBUG) {
+ _log_payloads(8);
+ log.trace("*****************************");
+ };
+
_setCurrentTo(overflow);
- log.trace("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
- log.trace("Set overflow and continue: {} ...", overflow.toString());
- log.trace("... with {}-{}", overflow.startChar(), overflow.endChar());
+ if (DEBUG) {
+ log.trace("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
+ log.trace("Set overflow and continue: {} ...", overflow.toString());
+ log.trace("... with {}-{}", overflow.startChar(), overflow.endChar());
+ };
+
continue;
}
// overflow was older
else if (pos != position) {
- log.trace("Overflow was older");
+ if (DEBUG)
+ log.trace("Overflow was older");
// Use memory
if (memory.size() > 0) {
- log.trace("Add overflow to memory");
+ if (DEBUG)
+ log.trace("Add overflow to memory");
memory.add((KorapTermSpan) overflow.clone());
- log.trace("Sort memory");
+ if (DEBUG)
+ log.trace("Sort memory");
// Sort by end position
Collections.sort(memory);
@@ -239,9 +290,10 @@
// Store current information in overflow
_setCurrentTo(overflow);
- log.trace("Set new overflow: {}", overflow.toString());
-
- log.trace("Get first element from sorted memory");
+ if (DEBUG) {
+ log.trace("Set new overflow: {}", overflow.toString());
+ log.trace("Get first element from sorted memory");
+ };
_setToCurrent(memory.removeFirst());
}
@@ -249,25 +301,30 @@
// Nothing in memory - use overflow!
else {
- log.trace("There is nothing in memory");
+ if (DEBUG)
+ log.trace("There is nothing in memory");
/* Make overflow active and store last position in overflow */
_setCurrentTo(tempSpan);
- log.trace("Temp is now {}", overflow.toString());
+ if (DEBUG)
+ log.trace("Temp is now {}", overflow.toString());
_setToCurrent(overflow);
// Store current information in overflow
overflow.copyFrom(tempSpan);
- log.trace("Overflow is now {}", overflow.toString());
+ if (DEBUG)
+ log.trace("Overflow is now {}", overflow.toString());
};
break;
};
};
- log.trace("Current4: [{}-{}]", position, end);
+
+ if (DEBUG)
+ log.trace("Current4: [{}-{}]", position, end);
readPayload = false;
return true;
@@ -276,7 +333,14 @@
private KorapTermSpan _setToCurrent (KorapTermSpan act) {
if (act.payload != null)
act.payload.rewind();
- log.trace("Set to current with {}, meaning {} - {}", act.toString(), act.payload.getInt(0), act.payload.getInt(4));
+
+ if (DEBUG)
+ log.trace("Set to current with {}, meaning {} - {}",
+ act.toString(),
+ act.payload.getInt(0),
+ act.payload.getInt(4)
+ );
+
if (act.payload != null)
act.payload.rewind();
@@ -286,58 +350,73 @@
hasStoredPayload = false;
if (act.payload != null) {
- log.trace("Payload is not null");
+ if (DEBUG)
+ log.trace("Payload is not null");
+
act.payload.rewind();
storedPayload.put(act.payload);
hasStoredPayload = true;
}
- else {
+ else if (DEBUG)
log.trace("Payload is null");
- };
return act;
};
private void _log_payloads (int nr) {
- if (hasStoredPayload) {
+ if (!DEBUG)
+ return;
+
+ if (hasStoredPayload)
log.trace(
"[{}] payload offsets are {}-{}",
nr,
storedPayload.getInt(0),
storedPayload.getInt(4)
);
- }
- else {
+ else
log.trace("[{}] payload is empty", nr);
- };
};
private void _setCurrentTo () {
overflow.start = position;
overflow.end = this.end();
overflow.payload.clear();
- if (hasStoredPayload) {
+
+ if (hasStoredPayload)
overflow.payload.put(storedPayload);
- };
- log.trace("Set current to Overflow {} with {}-{}", overflow.toString(), overflow.startChar(), overflow.endChar());
+
+ if (DEBUG)
+ log.trace("Set current to Overflow {} with {}-{}", overflow.toString(), overflow.startChar(), overflow.endChar());
};
private void _setCurrentTo (KorapTermSpan o) {
- _log_payloads(7);
+
+ if (DEBUG)
+ _log_payloads(7);
+
o.start = position;
o.end = this.end();
o.payload.clear();
+
if (hasStoredPayload) {
storedPayload.rewind();
o.payload.put(storedPayload);
- log.trace("Object now has offset {}-{}", o.payload.getInt(0), o.payload.getInt(4));
+
+ if (DEBUG)
+ log.trace("Object now has offset {}-{}", o.payload.getInt(0), o.payload.getInt(4));
// Import:
o.payload.rewind();
};
- log.trace("Set current to object {} ...", o.toString());
+
+ if (DEBUG)
+ log.trace("Set current to object {} ...", o.toString());
+
if (hasStoredPayload) {
- log.trace("with {}-{} from {}-{}", o.startChar(), o.endChar(), storedPayload.getInt(0), storedPayload.getInt(4));
+ if (DEBUG)
+ log.trace("with {}-{} from {}-{}", o.startChar(), o.endChar(), storedPayload.getInt(0), storedPayload.getInt(4));
+
storedPayload.rewind();
};
};
@@ -352,12 +431,12 @@
overflow.clear();
storedPayload.clear();
hasStoredPayload = false;
+
if (memory != null)
memory.clear();
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+ if (doc == DocIdSetIterator.NO_MORE_DOCS)
return false;
- };
freq = postings.freq();
count = 0;
@@ -403,15 +482,17 @@
if (storedPayload.position() <= 0)
this.getPayloadEndPosition();
- if (hasStoredPayload) {
- log.trace("storedPayload: {} - {}", storedPayload.getInt(0), storedPayload.getInt(4));
- }
- else {
- log.trace("storedPayload is empty");
+ if (DEBUG) {
+ if (hasStoredPayload)
+ log.trace("storedPayload: {} - {}",
+ storedPayload.getInt(0),
+ storedPayload.getInt(4));
+ else
+ log.trace("storedPayload is empty");
};
+
System.arraycopy(storedPayload.array(), 0, offsetCharacters, 0, 8);
- // return Collections.singletonList(storedPayload.array());
return Collections.singletonList(offsetCharacters);
};
@@ -431,30 +512,39 @@
};
private int getPayloadEndPosition () {
- log.trace("getPayloadEndPosition of element ...");
+ if (DEBUG)
+ log.trace("getPayloadEndPosition of element ...");
try {
BytesRef payload = postings.getPayload();
- log.trace(" BytesRef: {}", payload.toString());
+ if (DEBUG)
+ log.trace(" BytesRef: {}", payload.toString());
+
readPayload = true;
storedPayload.clear();
hasStoredPayload = false;
+
if (payload != null) {
- log.trace("Do bit magic");
+ if (DEBUG)
+ log.trace("Do bit magic");
+
storedPayload.put(payload.bytes, payload.offset, 8);
storedPayload.put(payload.bytes, payload.offset + 12, payload.length - 12);
System.arraycopy(payload.bytes, payload.offset + 8, payloadByte, 0, 4);
hasStoredPayload = true;
- log.trace("~~ Bytes: {}-{}-{}",
- storedPayload.getInt(0),
- storedPayload.getInt(4),
- payloadByte);
+ if (DEBUG)
+ log.trace("~~ Bytes: {}-{}-{}",
+ storedPayload.getInt(0),
+ storedPayload.getInt(4),
+ payloadByte);
}
else {
- log.trace("There's no payload available");
+ if (DEBUG)
+ log.trace("There's no payload available");
+
payloadByte = null;
};
@@ -462,13 +552,16 @@
bb.clear();
int t = bb.wrap(payloadByte).getInt();
- log.trace(" |-> {}", t);
+ if (DEBUG)
+ log.trace(" |-> {}", t);
+
return t;
};
}
catch (IOException e) {
- log.trace("IOException {}", e);
+ if (DEBUG)
+ log.trace("IOException {}", e);
};
return -1;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
index c8a223e..66e51bc 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
@@ -56,7 +56,5 @@
.toString();
};
- /*
-equals und hashcode implementieren
- */
+ // equals und hashcode implementieren
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
index 7fc1e57..4d50e14 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
@@ -10,9 +10,12 @@
import org.slf4j.LoggerFactory;
public class KorapTermSpan extends KorapSpan {
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
public ByteBuffer payload;
- private final Logger log = LoggerFactory.getLogger(WithinSpans.class);
+ private final Logger log = LoggerFactory.getLogger(ElementSpans.class);
@Override
public Object clone() {
@@ -24,14 +27,16 @@
this.payload.rewind();
span.payload.put(this.payload);
- log.trace("Clone payload {} to payload {} ...",
- this.payload.toString(),
- span.payload.toString());
- log.trace("... from {}-{} to {}-{}",
- this.startChar(),
- this.endChar(),
- span.startChar(),
- span.endChar());
+ if (DEBUG) {
+ log.trace("Clone payload {} to payload {} ...",
+ this.payload.toString(),
+ span.payload.toString());
+ log.trace("... from {}-{} to {}-{}",
+ this.startChar(),
+ this.endChar(),
+ span.startChar(),
+ span.endChar());
+ };
return span;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index 278f050..6d189a5 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
@@ -16,31 +16,32 @@
* a span is immediately followed by another span.
*
* TODO: nextSpans needs collectPayloads to be explicitly set true. Why?
+ * ndiewald: They don't have to be set explicitely - just make them use it always
* @author margaretha
* */
public class NextSpans extends NonPartialOverlappingSpans {
public NextSpans (SimpleSpanQuery simpleSpanQuery,
- AtomicReaderContext context,
- Bits acceptDocs,
- Map<Term,TermContext> termContexts) throws IOException {
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts) throws IOException {
super(simpleSpanQuery, context, acceptDocs, termContexts);
- }
+ };
/** Check weather the end position of the current firstspan equals
* the start position of the secondspan.
- * */
+ **/
@Override
- protected int findMatch() {
- if (firstSpans.end() == secondSpans.start()) {
- matchDocNumber = firstSpans.doc();
- matchStartPosition = firstSpans.start();
- matchEndPosition = secondSpans.end();
- return 0;
- }
- else if (firstSpans.end() > secondSpans.start())
- return 1;
-
- return -1;
- }
-}
+ protected int findMatch() {
+ if (firstSpans.end() == secondSpans.start()) {
+ matchDocNumber = firstSpans.doc();
+ matchStartPosition = firstSpans.start();
+ matchEndPosition = secondSpans.end();
+ return 0;
+ }
+ else if (firstSpans.end() > secondSpans.start())
+ return 1;
+
+ return -1;
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
index e435d50..fff2cdf 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
@@ -14,98 +14,112 @@
import de.ids_mannheim.korap.query.SimpleSpanQuery;
/** An abstract class for Span enumeration whose two child spans are matched by
- * their positions and do not have a partial overlap.
+ * their positions and do not have a partial overlap.
*
- * @author margaretha
+ * @author margaretha
* */
public abstract class NonPartialOverlappingSpans extends SimpleSpans{
- private Logger log = LoggerFactory.getLogger(NonPartialOverlappingSpans.class);
-
- public NonPartialOverlappingSpans(SimpleSpanQuery simpleSpanQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term,TermContext> termContexts) throws IOException {
- super(simpleSpanQuery, context, acceptDocs, termContexts);
+ private Logger log = LoggerFactory.getLogger(NonPartialOverlappingSpans.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+ public NonPartialOverlappingSpans(SimpleSpanQuery simpleSpanQuery,
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts) throws IOException {
+ super(simpleSpanQuery, context, acceptDocs, termContexts);
- // Warning: not implemented, results in errors for SpanNextQuery
- // This.collectPayloads = simpleSpanQuery.isCollectPayloads()
- collectPayloads = true;
- hasMoreSpans = secondSpans.next();
-
- }
+ // Warning: not implemented, results in errors for SpanNextQuery
+ // This.collectPayloads = simpleSpanQuery.isCollectPayloads()
+ collectPayloads = true;
+ hasMoreSpans = secondSpans.next();
+ };
@Override
- public boolean next() throws IOException {
+ public boolean next() throws IOException {
// Warning: this does not work for overlapping spans
// e.g. get multiple second spans in a firstspan
- hasMoreSpans &= firstSpans.next();
- isStartEnumeration=false;
- matchPayload.clear();
- return advance();
- }
+ hasMoreSpans &= firstSpans.next();
+ isStartEnumeration=false;
+ matchPayload.clear();
+ return advance();
+ };
- /** Advance is a lucene terminology to search for the next match.
- * */
+ /** Advance is a lucene terminology to search for the next match.
+ * */
protected boolean advance() throws IOException {
- // The complexity is linear for searching in a document.
- // It's better if we can skip to >= position in a document.
- while (hasMoreSpans && ensureSameDoc(firstSpans,secondSpans)){
- int matchCase = findMatch();
- if (matchCase == 0){
- log.trace("Match doc#: {}",matchDocNumber);
- log.trace("Match positions: {}-{}", matchStartPosition,
- matchEndPosition);
- doCollectPayloads();
- return true;
- }
- else if (matchCase == 1) {
- hasMoreSpans = secondSpans.next();
- }
- else{
- hasMoreSpans = firstSpans.next();
- }
- }
- return false;
- }
+ // The complexity is linear for searching in a document.
+ // It's better if we can skip to >= position in a document.
+ while (hasMoreSpans && ensureSameDoc(firstSpans,secondSpans)) {
+ int matchCase = findMatch();
+ if (matchCase == 0){
+ if (DEBUG) {
+ log.trace("Match doc#: {}",
+ matchDocNumber);
+ log.trace("Match positions: {}-{}",
+ matchStartPosition,
+ matchEndPosition);
+ };
+ doCollectPayloads();
+ return true;
+ }
+ else if (matchCase == 1)
+ hasMoreSpans = secondSpans.next();
+ else
+ hasMoreSpans = firstSpans.next();
+ };
+ return false;
+ };
/** Specify the condition for a match
* @return 0 iff match is found,
* -1 to advance the firstspan,
* 1 to advance the secondspan
- * */
- protected abstract int findMatch();
+ **/
+ protected abstract int findMatch();
- /** Collecting available payloads from the current first and second spans */
- private void doCollectPayloads() throws IOException {
- if (collectPayloads){
- log.trace("Collect payloads");
- if (firstSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = firstSpans.getPayload();
- log.trace("Found {} payloads in firstSpans", payload.size());
- matchPayload.addAll(payload);
- }
- if (secondSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = secondSpans.getPayload();
- log.trace("Found {} payloads in secondSpans", payload.size());
- matchPayload.addAll(payload);
- }
- }
- }
+ /** Collecting available payloads from the current first and second spans */
+ private void doCollectPayloads() throws IOException {
+ if (collectPayloads){
+
+ if (DEBUG)
+ log.trace("Collect payloads");
+
+ if (firstSpans.isPayloadAvailable()) {
+ Collection<byte[]> payload = firstSpans.getPayload();
+
+ if (DEBUG)
+ log.trace("Found {} payloads in firstSpans", payload.size());
+
+ matchPayload.addAll(payload);
+ };
+
+ if (secondSpans.isPayloadAvailable()) {
+ Collection<byte[]> payload = secondSpans.getPayload();
+
+ if (DEBUG)
+ log.trace("Found {} payloads in secondSpans", payload.size());
+
+ matchPayload.addAll(payload);
+ };
+ };
+ };
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (firstSpans.doc() < target)){
- if (!firstSpans.skipTo(target)){
- hasMoreSpans = false;
- return false;
- }
- }
- matchPayload.clear();
- return advance();
- }
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)){
+ if (!firstSpans.skipTo(target)){
+ hasMoreSpans = false;
+ return false;
+ };
+ };
+ matchPayload.clear();
+ return advance();
+ };
- @Override
- public long cost() {
- return firstSpans.cost() + secondSpans.cost();
- }
-}
+ @Override
+ public long cost() {
+ return firstSpans.cost() + secondSpans.cost();
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
index a2e4b86..ee896f6 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
@@ -65,7 +65,15 @@
die Wrapreihenfolge a,b,c rauskommt!
*/
+/**
+ * @author ndiewald
+ */
+
public class WithinSpans extends Spans {
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
private boolean firstTime = true;
private boolean more = false;
@@ -76,9 +84,15 @@
/** Indicates that the wrap and the embedded spans are in the same doc */
private boolean inSameDoc = false;
- private int wrapDoc;
- private int embeddedDoc;
- private int wrapStart, wrapEnd, embeddedStart, embeddedEnd;
+
+ private int
+ wrapDoc,
+ embeddedDoc,
+ wrapStart,
+ wrapEnd,
+ embeddedStart,
+ embeddedEnd;
+
private Collection<byte[]> embeddedPayload;
// Wrap span
@@ -106,7 +120,8 @@
Map<Term,TermContext> termContexts,
short flag) throws IOException {
- log.trace("Init WithinSpans");
+ if (DEBUG)
+ log.trace("Init WithinSpans");
// Init copies
this.matchPayload = new LinkedList<byte[]>();
@@ -130,17 +145,24 @@
/** Move to the next match, returning true iff any such exists. */
@Override
public boolean next () throws IOException {
- log.trace("Next with doc {}", matchDoc);
+ if (DEBUG)
+ log.trace("Next with doc {}", matchDoc);
// Check for init next
if (firstTime) {
firstTime = false;
if (!embeddedSpans.next() || !wrapSpans.next()) {
- log.trace("No next in firstSpan nor in secondSpan 1");
+
+ if (DEBUG)
+ log.trace("No next in firstSpan nor in secondSpan 1");
+
more = false;
return false;
};
- log.trace("Spans are initialized");
+
+ if (DEBUG)
+ log.trace("Spans are initialized");
+
more = true;
wrapStart = wrapSpans.start();
wrapEnd = wrapSpans.end();
@@ -156,16 +178,20 @@
embeddedPayload.addAll(payload);
};
- log.trace("Init spans: {}", _actPos());
+ if (DEBUG)
+ log.trace("Init spans: {}", _actPos());
if (embeddedDoc == matchDoc) {
inSameDoc = true;
- log.trace("Is now inSameDoc");
+ if (DEBUG)
+ log.trace("Is now inSameDoc");
}
- else {
+ else if (DEBUG) {
log.trace("Is not inSameDoc");
};
- log.trace("Next with doc {} (wrap) and {} (embedded)", wrapDoc, embeddedDoc);
+
+ if (DEBUG)
+ log.trace("Next with doc {} (wrap) and {} (embedded)", wrapDoc, embeddedDoc);
};
matchPayload.clear();
@@ -177,25 +203,27 @@
* @return true iff there is such a match.
*/
private boolean advanceAfterCheck() throws IOException {
- log.trace("advanceAfterChecked inSameDoc: {} and more: {}", inSameDoc, more);
- log.trace("advanceAfterCheck with doc {} (wrap) and {} (embedded)", wrapDoc, embeddedDoc);
+ if (DEBUG) {
+ log.trace("advanceAfterChecked inSameDoc: {} and more: {}", inSameDoc, more);
+ log.trace("advanceAfterCheck with doc {} (wrap) and {} (embedded)", wrapDoc, embeddedDoc);
+ };
// There are more spans, and both spans are either in the
// same doc or can be forwarded to the same doc.
while (more && (inSameDoc || toSameDoc())) {
- log.trace("There are more spans in doc {}", embeddedDoc);
+ if (DEBUG)
+ log.trace("There are more spans in doc {}", embeddedDoc);
/* spans are in the same doc */
- if (within()) {
+ if (within())
return true;
- }
- else {
+ else if (DEBUG)
log.trace("No within");
- };
};
- log.trace("No more matches");
+ if (DEBUG)
+ log.trace("No more matches");
return false; // no more matches
};
@@ -203,7 +231,8 @@
/** Advance the subSpans to the same document */
private boolean toSameDoc () throws IOException {
- log.trace("toSameDoc");
+ if (DEBUG)
+ log.trace("toSameDoc");
/*
wrapDoc = wrapSpansByDoc.doc();
@@ -212,14 +241,17 @@
*/
if (wrapDoc != embeddedDoc) {
- log.trace("Docs not identical: {} vs {}", wrapDoc, embeddedDoc);
+ if (DEBUG)
+ log.trace("Docs not identical: {} vs {}", wrapDoc, embeddedDoc);
spanStore1.clear(); // = new LinkedList<KorapLongSpan>();
spanStore2.clear(); // = new LinkedList<KorapLongSpan>();
if (wrapDoc < embeddedDoc) {
- log.trace("Skip wrap from {} to {}", wrapDoc, embeddedDoc);
- if (!wrapSpansByDoc.skipTo(embeddedDoc)) {
+ if (DEBUG)
+ log.trace("Skip wrap from {} to {}", wrapDoc, embeddedDoc);
+
+ if (wrapSpansByDoc.doc() >= embeddedDoc || !wrapSpansByDoc.skipTo(embeddedDoc)) {
more = false;
inSameDoc = false;
return false;
@@ -227,29 +259,33 @@
wrapDoc = wrapSpans.doc();
}
else if (wrapDoc > embeddedDoc) {
- log.trace("Skip embedded from {} to {}", embeddedSpans.doc(), wrapDoc);
+ if (DEBUG)
+ log.trace("Skip embedded from {} to {}", embeddedSpans.doc(), wrapDoc);
+
// if (!embeddedSpansByDoc.skipTo( wrapDoc )) {
if (wrapDoc != embeddedSpans.doc()) {
- if (embeddedSpans.doc() == DocIdSetIterator.NO_MORE_DOCS || !embeddedSpans.skipTo( wrapDoc )) {
+ if (embeddedSpans.doc() == DocIdSetIterator.NO_MORE_DOCS ||
+ !(embeddedSpans.doc() < wrapDoc && embeddedSpans.skipTo( wrapDoc ))) {
more = false;
inSameDoc = false;
return false;
};
}
- else {
+ else
_add_current();
- // embeddedDoc = embeddedSpans.doc();
- };
+ // embeddedDoc = embeddedSpans.doc();
};
}
- else {
+ else if (DEBUG)
log.trace("Docs identical");
- };
+
embeddedStart = embeddedSpans.start();
embeddedEnd = embeddedSpans.end();
- log.trace("The new embedded start is {}-{}", embeddedStart, embeddedEnd);
- inSameDoc = true;
- return true;
+
+ if (DEBUG)
+ log.trace("The new embedded start is {}-{}", embeddedStart, embeddedEnd);
+
+ return inSameDoc = true;
};
@@ -267,20 +303,27 @@
* Most implementations are considerably more efficient than that.
*/
public boolean skipTo (int target) throws IOException {
- log.trace("skipTo {}", target);
+ if (DEBUG)
+ log.trace("skipTo {}", target);
// Check for init next
if (firstTime) {
firstTime = false;
if (!embeddedSpans.next() || !wrapSpans.next()) {
- log.trace("No next in firstSpan nor in secondSpan 2");
+ if (DEBUG)
+ log.trace("No next in firstSpan nor in secondSpan 2");
more = false;
return false;
};
more = true;
+
+ // WrapSpans
wrapStart = wrapSpans.start();
wrapEnd = wrapSpans.end();
- wrapDoc = embeddedSpans.doc();
+ // wrapDoc = embeddedSpans.doc();
+ wrapDoc = wrapSpans.doc();
+
+ // EmbeddedSpans
embeddedStart = embeddedSpans.start();
embeddedEnd = embeddedSpans.end();
embeddedDoc = embeddedSpans.doc();
@@ -297,10 +340,8 @@
}
// Can't be skipped to target
- else {
- more = false;
- return false;
- };
+ else
+ return more = false;
};
matchPayload.clear();
@@ -317,7 +358,8 @@
private boolean within () throws IOException {
- log.trace("within");
+ if (DEBUG)
+ log.trace("within");
while (more && inSameDoc) {
@@ -338,11 +380,11 @@
// |-|"
// |---|
if (wrapStart > embeddedStart) {
- log.trace("[Case] 1-5 with {}", _actPos());
+ if (DEBUG)
+ log.trace("[Case] 1-5 with {}", _actPos());
- if (this.fetchNext()) {
+ if (this.fetchNext())
continue;
- };
// Forward wrapSpan
if (wrapSpans.next()) {
@@ -354,9 +396,7 @@
};
};
- this.more = false;
- this.inSameDoc = false;
- return false;
+ return this.more = this.inSameDoc = false;
};
// Get wrapEnd
@@ -366,11 +406,14 @@
embedded.start = embeddedStart;
embedded.end = embeddedEnd;
embedded.doc = embeddedDoc;
+
if (embeddedPayload != null)
embedded.payload = embeddedPayload;
this.spanStore1.add(embedded);
- log.trace("pushed to spanStore1: {}", embedded.toString());
+
+ if (DEBUG)
+ log.trace("pushed to spanStore1: {}", embedded.toString());
// Case 12
@@ -380,22 +423,31 @@
// |---|
// |-|
if (wrapEnd <= embeddedStart) {
- log.trace("[Case] 12-13 with {}", _actPos());
+ if (DEBUG)
+ log.trace("[Case] 12-13 with {}", _actPos());
// Copy content of spanStores
if (!spanStore1.isEmpty()) {
- log.trace("First store is not empty - copy to second store!");
+ if (DEBUG)
+ log.trace("First store is not empty - copy to second store!");
+
spanStore2.addAll(0, (LinkedList<KorapLongSpan>) spanStore1.clone());
spanStore1.clear();
- log.trace("Second store is now: {}", spanStore2.toString());
+
+ if (DEBUG)
+ log.trace("Second store is now: {}", spanStore2.toString());
};
// Forward wrapSpan
- log.trace("Try to forward wrapspan");
+ if (DEBUG)
+ log.trace("Try to forward wrapspan");
if (wrapSpans.next()) {
wrapDoc = wrapSpans.doc();
- log.trace("wrapDoc is now {} while embeddedDoc is {}", wrapDoc, embeddedDoc);
+
+ if (DEBUG)
+ log.trace("wrapDoc is now {} while embeddedDoc is {}", wrapDoc, embeddedDoc);
+
if (this.toSameDoc()) {
wrapStart = wrapSpans.start();
wrapEnd = wrapSpans.end();
@@ -403,13 +455,11 @@
continue;
};
}
- else {
+ else if (DEBUG) {
log.trace("Unable to forward wrapspan");
};
- this.inSameDoc = false;
- this.more = false;
- return false;
+ return this.inSameDoc = this.more = false;
}
@@ -420,12 +470,17 @@
// |---|
// |-|
if (wrapEnd > embeddedEnd) {
- log.trace("[Case] 6 with {}", _actPos());
+ if (DEBUG)
+ log.trace("[Case] 6 with {}", _actPos());
// neither match nor endWith
if (this.flag < (short) 2) {
- _setMatch(embedded);
- log.trace("MATCH1!! with {}", _actPos());
+
+ _setMatch(embedded.payload);
+
+ if (DEBUG)
+ log.trace("MATCH1!! with {}", _actPos());
+
fetchTwoNext();
return true;
};
@@ -438,10 +493,14 @@
// |---|
// |---|
else if (wrapEnd == embeddedEnd) {
- log.trace("[Case] 7 with {}", _actPos());
+ if (DEBUG)
+ log.trace("[Case] 7 with {}", _actPos());
- _setMatch(embedded);
- log.trace("MATCH2!! with {}", _actPos());
+ _setMatch(embedded.payload);
+
+ if (DEBUG)
+ log.trace("MATCH2!! with {}", _actPos());
+
fetchTwoNext();
return true;
};
@@ -450,7 +509,9 @@
// |-|
// |---|
// wrapEnd < embeddedEnd
- log.trace("[Case] 8 with {}", _actPos());
+ if (DEBUG)
+ log.trace("[Case] 8 with {}", _actPos());
+
fetchTwoNext();
continue;
};
@@ -462,12 +523,16 @@
// |---|
// |-|
if (wrapEnd > embeddedEnd) {
- log.trace("[Case] 9 with {}", _actPos());
+ if (DEBUG)
+ log.trace("[Case] 9 with {}", _actPos());
// neither match nor endWith
if (this.flag == (short) 0) {
- _setMatch(embedded);
- log.trace("MATCH3!! with {}", _actPos());
+ _setMatch(embedded.payload);
+
+ if (DEBUG)
+ log.trace("MATCH3!! with {}", _actPos());
+
fetchTwoNext();
return true;
};
@@ -479,12 +544,16 @@
// |---|
// |-|
else if (wrapEnd == embeddedEnd) {
- log.trace("[Case] 10 with {}", _actPos());
+ if (DEBUG)
+ log.trace("[Case] 10 with {}", _actPos());
// neither match nor endWith
if (this.flag == (short) 0 || this.flag == (short) 2) {
- _setMatch(embedded);
- log.trace("MATCH4!! with {}", _actPos());
+ _setMatch(embedded.payload);
+
+ if (DEBUG)
+ log.trace("MATCH4!! with {}", _actPos());
+
fetchTwoNext();
return true;
};
@@ -497,30 +566,35 @@
// |---|
// |---|
// wrapEnd < embeddedEnd
- log.trace("[Case] 11 with {}", _actPos());
+ if (DEBUG)
+ log.trace("[Case] 11 with {}", _actPos());
+
fetchTwoNext();
continue;
};
- this.more = false;
- return false;
+ return this.more = false;
};
private boolean fetchNext () throws IOException {
-
// Fetch span from first store
if (spanStore1.isEmpty()) {
- log.trace("First store is empty");
+
+ if (DEBUG)
+ log.trace("First store is empty");
+
return fetchTwoNext();
};
KorapLongSpan current = spanStore1.removeFirst();
- log.trace("Fetch from first store: {}", current.toString());
+ if (DEBUG)
+ log.trace("Fetch from first store: {}", current.toString());
embeddedStart = current.start;
embeddedEnd = current.end;
embeddedDoc = current.doc;
+
if (current.payload != null)
embeddedPayload = current.payload;
@@ -532,22 +606,28 @@
// Fetch span from second store
if (spanStore2.isEmpty()) {
- log.trace("Second store is empty");
+ if (DEBUG)
+ log.trace("Second store is empty");
// Forward spans
if (this.embeddedSpans.next()) {
- log.trace("Forwarded embeddedSpans");
+ if (DEBUG)
+ log.trace("Forwarded embeddedSpans");
if (this.embeddedSpans.doc() != wrapDoc && !spanStore1.isEmpty()) {
- log.trace("No docmatch and still stuff in store");
- log.trace("First store is not empty - copy to second store!");
+ if (DEBUG) {
+ log.trace("No docmatch and still stuff in store");
+ log.trace("First store is not empty - copy to second store!");
+ };
+
spanStore2.addAll(0, (LinkedList<KorapLongSpan>) spanStore1.clone());
spanStore1.clear();
_add_current();
- log.trace("Second store is now: {}", spanStore2.toString());
+ if (DEBUG)
+ log.trace("Second store is now: {}", spanStore2.toString());
}
else {
embeddedStart = embeddedSpans.start();
@@ -555,37 +635,44 @@
embeddedDoc = embeddedSpans.doc();
if (embeddedSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = embeddedSpans.getPayload();
- // Maybe just clear
- embeddedPayload = new ArrayList<byte[]>(payload.size());
- embeddedPayload.addAll(payload);
+ embeddedPayload.clear();
+ embeddedPayload.addAll(embeddedSpans.getPayload());
};
return this.toSameDoc();
};
}
- else {
+ else if (DEBUG)
log.trace("Forwarded embeddedSpans failed");
- };
- log.trace("EmbeddedDoc: " + embeddedDoc);
+
+ if (DEBUG)
+ log.trace("EmbeddedDoc: " + embeddedDoc);
// Forward wrapSpan
- log.trace("Try to forward wrapspan");
+ if (DEBUG)
+ log.trace("Try to forward wrapspan");
+
if (wrapSpans.next()) {
wrapDoc = wrapSpans.doc();
if (this.toSameDoc()) {
wrapStart = wrapSpans.start();
wrapEnd = wrapSpans.end();
- log.trace("WrapSpan forwarded");
+ if (DEBUG)
+ log.trace("WrapSpan forwarded");
// Copy content of spanStores
if (!spanStore1.isEmpty()) {
- log.trace("First store is not empty - copy to second store!");
+
+ if (DEBUG)
+ log.trace("First store is not empty - copy to second store!");
+
spanStore2.addAll(0, (LinkedList<KorapLongSpan>) spanStore1.clone());
spanStore1.clear();
- log.trace("Second store is now: {}", spanStore2.toString());
+
+ if (DEBUG)
+ log.trace("Second store is now: {}", spanStore2.toString());
};
return this.fetchTwoNext();
@@ -593,14 +680,16 @@
};
// Don't know.
- log.trace("No more fetchNext()");
+ if (DEBUG)
+ log.trace("No more fetchNext()");
- more = false;
- return false;
+ return more = false;
};
KorapLongSpan current = spanStore2.removeFirst();
- log.trace("Fetch from second store: {}", current.toString());
+
+ if (DEBUG)
+ log.trace("Fetch from second store: {}", current.toString());
embeddedStart = current.start;
embeddedEnd = current.end;
@@ -614,19 +703,17 @@
/*
TODO: Maybe ignore "embedded" parameter and use embeddedPayload directly
*/
- private void _setMatch (KorapLongSpan embedded) throws IOException {
+ private void _setMatch (Collection<byte[]> embeddedPayload) throws IOException {
matchStart = wrapStart;
matchEnd = wrapEnd;
matchDoc = embeddedDoc;
matchPayload.clear();
- if (embedded.payload != null)
- matchPayload.addAll(embedded.payload);
+ if (embeddedPayload != null)
+ matchPayload.addAll(embeddedPayload);
- if (wrapSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = wrapSpans.getPayload();
- matchPayload.addAll(payload);
- };
+ if (wrapSpans.isPayloadAvailable())
+ matchPayload.addAll(wrapSpans.getPayload());
};
@@ -637,13 +724,17 @@
embedded.doc = embeddedSpans.doc();
if (embeddedSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = embeddedSpans.getPayload();
- embedded.payload = new ArrayList<byte[]>(payload.size());
- embedded.payload.addAll(payload);
+ if (embedded.payload == null)
+ embedded.payload = new ArrayList<byte[]>(5);
+ else
+ embedded.payload.clear();
+ embedded.payload.addAll(embeddedSpans.getPayload());
};
this.spanStore2.add(embedded);
- log.trace("pushed to spanStore2: {}", embedded.toString());
+
+ if (DEBUG)
+ log.trace("pushed to spanStore2: {}", embedded.toString());
};
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 3e26bdd..e876668 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -2,18 +2,24 @@
# log4j.rootLogger = DEBUG, stdout
+# Spans:
#log4j.logger.de.ids_mannheim.korap.query.spans.ElementSpans = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.query.spans.KorapTermSpan = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.WithinSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.NextSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.SimpleSpans = TRACE, stdout
-#log4j.logger.de.ids_mannheim.korap.query.spans.KorapTermSpan = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.ClassSpans = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
-#log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
-#log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
+
+# Collections
#log4j.logger.de.ids_mannheim.korap.KorapFilter = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
+
+
+# Results:
+#log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.index.PositionsToOffset = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.index.TestSegmentIndex = TRACE, stdout