Fixed some WithinSpan-Bugs and fixed Tests to improve coverage for SpanPositionalQueries
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index 411a50b..b7492e9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
@@ -142,7 +142,12 @@
@Override
public boolean skipTo (int target) throws IOException {
classedPayload.clear();
- if (hasmorespans && spans.doc() < target && spans.skipTo(target))
+
+ if (DEBUG) log.trace("Skip ClassSpans {} -> {}",
+ spans.doc(), target);
+
+ if (hasmorespans && spans.doc() < target &&
+ spans.skipTo(target))
return this.addClassPayload();
return false;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index cc9d064..36b569d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -15,6 +15,9 @@
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import de.ids_mannheim.korap.query.SpanElementQuery;
/**
@@ -28,6 +31,11 @@
private TermSpans termSpans;
private boolean lazyLoaded = false;
+ private final Logger log = LoggerFactory.getLogger(ElementSpans.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+
/**
* Constructs ElementSpans for the given {@link SpanElementQuery}.
*
@@ -149,6 +157,10 @@
@Override
public boolean skipTo(int target) throws IOException {
+
+ if (DEBUG) log.trace("Skip ElementSpans {} -> {}",
+ firstSpans.doc(), target);
+
if (hasMoreSpans &&
firstSpans.doc() < target &&
firstSpans.skipTo(target)) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
index 6235342..f38f102 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
@@ -187,8 +187,13 @@
// Todo: Check for this on document boundaries!
@Override
public boolean skipTo (int target) throws IOException {
- if (DEBUG) log.trace("Skip MatchSpans");
- return spans.skipTo(target);
+ if (DEBUG) log.trace("Skip MatchSpans {} -> {}",
+ this.doc(), target);
+
+ if (this.doc() < target && spans.skipTo(target)) {
+
+ };
+ return false;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
index dc68911..5240580 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
@@ -180,7 +180,7 @@
// Both spans match according to the flag
// Silently the next operations are prepared
if (this.tryMatch && this.doesMatch()) {
-
+
if (this.wrapEnd == -1)
this.wrapEnd = this.wrapSpans.end();
@@ -197,11 +197,11 @@
if (DEBUG)
log.trace(
- " ---- MATCH ---- {}-{} ({})",
- matchStart,
- matchEnd,
- matchDoc
- );
+ " ---- MATCH ---- {}-{} ({})",
+ matchStart,
+ matchEnd,
+ matchDoc
+ );
this.tryMatch = false;
return true;
@@ -241,7 +241,7 @@
log.trace("Fetch next embedded span");
};
- this.embeddedStart = -1;
+ this.embeddedStart = this.embeddedSpans.start();
this.embeddedEnd = -1;
this.embeddedPayload = null;
this.embeddedDoc = this.embeddedSpans.doc();
@@ -279,8 +279,6 @@
);
if (this.embeddedDoc != this.wrapDoc) {
- if (DEBUG)
- log.trace("Delete all span stores");
// Is this always a good idea?
/*
@@ -302,7 +300,7 @@
this.more = true;
this.inSameDoc = true;
this.tryMatch = true;
-
+
this.nextSpanB();
continue;
}
@@ -371,7 +369,7 @@
if (this.wrapSpans.next()) {
// Reset wrapping information
- this.wrapStart = -1;
+ this.wrapStart = this.wrapSpans.start();
this.wrapEnd = -1;
// Retrieve doc information
@@ -467,9 +465,11 @@
log.trace("Current position already is in the same doc");
log.trace("Embedded: {}", _currentEmbedded().toString());
};
+ this.matchDoc = this.embeddedDoc;
return true;
};
+
// Forward till match
while (this.wrapDoc != this.embeddedDoc) {
@@ -480,6 +480,7 @@
if (!wrapSpans.skipTo(this.embeddedDoc)) {
this.more = false;
this.inSameDoc = false;
+ this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
@@ -495,13 +496,25 @@
this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
-
- this.wrapStart = -1;
- this.wrapEnd = -1;
-
- if (wrapDoc == embeddedDoc)
+
+ /*
+ Remove stored information
+ */
+ if (DEBUG)
+ log.trace("Delete all span stores");
+
+ this.spanStore1.clear();
+ this.spanStore2.clear();
+
+ if (wrapDoc == embeddedDoc) {
+ this.wrapStart = this.wrapSpans.start();
+ this.embeddedStart = this.embeddedSpans.start();
+ this.matchDoc = this.embeddedDoc;
return true;
+ };
+ this.wrapStart = -1;
+ this.embeddedStart = -1;
}
// Forward embedInfo
@@ -511,12 +524,10 @@
if (!this.embeddedSpans.skipTo(this.wrapDoc)) {
this.more = false;
this.inSameDoc = false;
+ this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
-
- if (DEBUG)
- log.trace("Skip embedded to doc {}", this.wrapDoc);
-
+
this.embeddedDoc = this.embeddedSpans.doc();
if (this.embeddedDoc == DocIdSetIterator.NO_MORE_DOCS) {
@@ -526,19 +537,26 @@
this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
+
+ if (DEBUG)
+ log.trace("Skip embedded to doc {}", this.embeddedDoc);
- this.embeddedStart = -1;
+ this.embeddedStart = this.embeddedSpans.start();
this.embeddedEnd = -1;
this.embeddedPayload = null;
- if (this.wrapDoc == this.embeddedDoc)
+ if (this.wrapDoc == this.embeddedDoc) {
+ this.matchDoc = this.embeddedDoc;
return true;
+ };
}
else {
+ this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
};
+ this.matchDoc = this.wrapDoc;
return true;
};
@@ -593,13 +611,18 @@
public boolean skipTo (int target) throws IOException {
if (DEBUG)
- log.trace("skipTo document {}", target);
+ log.trace(
+ "skipTo document {}/{} -> {}",
+ this.embeddedDoc,
+ this.wrapDoc,
+ target
+ );
// Initialize spans
if (!this.init())
return false;
- assert target > embeddedDoc;
+ assert target > this.embeddedDoc;
// Only forward embedded spans
if (this.more && (this.embeddedDoc < target)) {
@@ -640,164 +663,163 @@
// Check if the current span constellation does match
// Store backtracking relevant data and say, how to proceed
private boolean doesMatch () {
- if (DEBUG)
- log.trace("In the match test branch");
+ if (DEBUG)
+ log.trace("In the match test branch");
- if (this.wrapStart == -1)
- this.wrapStart = this.wrapSpans.start();
+ if (this.wrapStart == -1)
+ this.wrapStart = this.wrapSpans.start();
- if (this.embeddedStart == -1) {
- this.embeddedStart = this.embeddedSpans.start();
- this.embeddedEnd = this.embeddedSpans.end();
- };
+ if (this.embeddedStart == -1) {
+ this.embeddedStart = this.embeddedSpans.start();
+ this.embeddedEnd = this.embeddedSpans.end();
+ };
- this.wrapEnd = -1;
+ this.wrapEnd = -1;
+ // Shortcut to prevent lazyloading of .end()
+ if (this.wrapStart > this.embeddedStart) {
+ // Can't match for in, rin, ew, sw, and m
+ // and will always lead to next_b
+ if (flag >= WITHIN) {
+ this.nextSpanB();
+ if (DEBUG)
+ _logCurrentCase((byte) 16);
+ return false;
+ };
+ }
- // Shortcut to prevent lazyloading of .end()
- if (this.wrapStart > this.embeddedStart) {
- // Can't match for in, rin, ew, sw, and m
- // and will always lead to next_b
- if (flag >= WITHIN) {
- this.nextSpanB();
- if (DEBUG)
- _logCurrentCase((byte) 14);
- return false;
- };
- }
+ else if (this.wrapStart < this.embeddedStart) {
+ // Can't match for sw and m and will always
+ // lead to next_a
+ if (flag >= STARTSWITH) {
+ this.nextSpanA();
+ if (DEBUG)
+ _logCurrentCase((byte) 15);
+ return false;
+ };
+ };
- else if (this.wrapStart < this.embeddedStart) {
- // Can't match for sw and m and will always
- // lead to next_a
- if (flag >= STARTSWITH) {
- this.nextSpanA();
- if (DEBUG)
- _logCurrentCase((byte) 15);
- return false;
- };
- };
+ // Now check correctly
+ byte currentCase = this.withinCase();
- // Now check correctly
- byte currentCase = this.withinCase();
+ if (DEBUG)
+ _logCurrentCase(currentCase);
- if (DEBUG)
- _logCurrentCase(currentCase);
+ boolean match = false;
- boolean match = false;
+ // Test case
+ if (currentCase >= (byte) 3 && currentCase <= (byte) 11) {
+ switch (flag) {
- // Test case
- if (currentCase >= (byte) 3 && currentCase <= (byte) 11) {
- switch (flag) {
+ case WITHIN:
+ if (currentCase >= 6 && currentCase <= 10 && currentCase != 8)
+ match = true;
+ break;
- case WITHIN:
- if (currentCase >= 6 && currentCase <= 10 && currentCase != 8)
- match = true;
- break;
+ case REAL_WITHIN:
+ if (currentCase == 6 ||
+ currentCase == 9 ||
+ currentCase == 10)
+ match = true;
+ break;
+
+ case MATCH:
+ if (currentCase == 7)
+ match = true;
+ break;
- case REAL_WITHIN:
- if (currentCase == 6 ||
- currentCase == 9 ||
- currentCase == 10)
- match = true;
- break;
+ case STARTSWITH:
+ if (currentCase == 7 ||
+ currentCase == 6)
+ match = true;
+ break;
- case MATCH:
- if (currentCase == 7)
- match = true;
- break;
+ case ENDSWITH:
+ if (currentCase == 7 ||
+ currentCase == 10)
+ match = true;
+ break;
- case STARTSWITH:
- if (currentCase == 7 ||
- currentCase == 6)
- match = true;
- break;
+ case OVERLAP:
+ match = true;
+ break;
- case ENDSWITH:
- if (currentCase == 7 ||
- currentCase == 10)
- match = true;
- break;
+ case REAL_OVERLAP:
+ if (currentCase == 3 ||
+ currentCase == 11)
+ match = true;
+ break;
+ };
+ };
- case OVERLAP:
- match = true;
- break;
-
- case REAL_OVERLAP:
- if (currentCase == 3 ||
- currentCase == 11)
- match = true;
- break;
- };
- };
-
- try {
- this.todo(currentCase);
- }
- catch (IOException e) {
- return false;
- }
- return match;
+ try {
+ this.todo(currentCase);
+ }
+ catch (IOException e) {
+ return false;
+ }
+ return match;
};
private void _logCurrentCase (byte currentCase) {
- log.trace("Current Case is {}", currentCase);
+ log.trace("Current Case is {}", currentCase);
- String _e = _currentEmbedded().toString();
+ String _e = _currentEmbedded().toString();
- log.trace(" |---| {}", _currentWrap().toString());
+ log.trace(" |---| {}", _currentWrap().toString());
- switch (currentCase) {
- case 1:
- log.trace("|-| {}", _e);
+ switch (currentCase) {
+ case 1:
+ log.trace("|-| {}", _e);
+ break;
+ case 2:
+ log.trace("|---| {}", _e);
break;
- case 2:
- log.trace("|---| {}", _e);
- break;
- case 3:
- log.trace(" |---| {}", _e);
- break;
- case 4:
- log.trace(" |-----| {}", _e);
- break;
- case 5:
- log.trace(" |-------| {}", _e);
- break;
- case 6:
- log.trace(" |-| {}", _e);
- break;
- case 7:
- log.trace(" |---| {}", _e);
- break;
- case 8:
- log.trace(" |-----| {}", _e);
- break;
- case 9:
- log.trace(" |-| {}", _e);
- break;
- case 10:
- log.trace(" |-| {}", _e);
- break;
- case 11:
- log.trace(" |---| {}", _e);
- break;
- case 12:
- log.trace(" |-| {}", _e);
- break;
- case 13:
- log.trace(" |-| {}", _e);
- break;
+ case 3:
+ log.trace(" |---| {}", _e);
+ break;
+ case 4:
+ log.trace(" |-----| {}", _e);
+ break;
+ case 5:
+ log.trace(" |-------| {}", _e);
+ break;
+ case 6:
+ log.trace(" |-| {}", _e);
+ break;
+ case 7:
+ log.trace(" |---| {}", _e);
+ break;
+ case 8:
+ log.trace(" |-----| {}", _e);
+ break;
+ case 9:
+ log.trace(" |-| {}", _e);
+ break;
+ case 10:
+ log.trace(" |-| {}", _e);
+ break;
+ case 11:
+ log.trace(" |---| {}", _e);
+ break;
+ case 12:
+ log.trace(" |-| {}", _e);
+ break;
+ case 13:
+ log.trace(" |-| {}", _e);
+ break;
- case 15:
- // Fake case
- log.trace(" |---? {}", _e);
- break;
+ case 15:
+ // Fake case
+ log.trace(" |---? {}", _e);
+ break;
- case 16:
- // Fake case
- log.trace(" |---? {}", _e);
- break;
- };
+ case 16:
+ // Fake case
+ log.trace(" |---? {}", _e);
+ break;
+ };
};
@@ -924,123 +946,123 @@
// Return case number
private byte withinCase () {
- // case 1-5
- if (this.wrapStart > this.embeddedStart) {
+ // case 1-5
+ if (this.wrapStart > this.embeddedStart) {
- // Case 1
- // |-|
- // |-|
- if (this.wrapStart > this.embeddedEnd) {
- return (byte) 1;
- }
+ // Case 1
+ // |-|
+ // |-|
+ if (this.wrapStart > this.embeddedEnd) {
+ return (byte) 1;
+ }
+
+ // Case 2
+ // |-|
+ // |-|
+ else if (this.wrapStart == this.embeddedEnd) {
+ return (byte) 2;
+ };
+
+ // Load wrapEnd
+ this.wrapEnd = this.wrapSpans.end();
+
+ // Case 3
+ // |---|
+ // |---|
+ if (this.wrapEnd > this.embeddedEnd) {
+ return (byte) 3;
+ }
- // Case 2
- // |-|
- // |-|
- else if (this.wrapStart == this.embeddedEnd) {
- return (byte) 2;
- };
+ // Case 4
+ // |-|
+ // |---|
+ else if (this.wrapEnd == this.embeddedEnd) {
+ return (byte) 4;
+ };
+
+ // Case 5
+ // |-|
+ // |---|
+ return (byte) 5;
+ }
+
+ // case 6-8
+ else if (this.wrapStart == this.embeddedStart) {
- // Load wrapEnd
- this.wrapEnd = this.wrapSpans.end();
+ // Load wrapEnd
+ this.wrapEnd = this.wrapSpans.end();
- // Case 3
- // |---|
- // |---|
- if (this.wrapEnd > this.embeddedEnd) {
- return (byte) 3;
- }
+ // Case 6
+ // |---|
+ // |-|
+ if (this.wrapEnd > this.embeddedEnd) {
+ return (byte) 6;
+ }
- // Case 4
- // |-|
- // |---|
- else if (this.wrapEnd == this.embeddedEnd) {
- return (byte) 4;
- };
-
- // Case 5
- // |-|
- // |---|
- return (byte) 5;
- }
+ // Case 7
+ // |---|
+ // |---|
+ else if (this.wrapEnd == this.embeddedEnd) {
+ return (byte) 7;
+ };
- // case 6-8
- else if (this.wrapStart == this.embeddedStart) {
+ // Case 8
+ // |-|
+ // |---|
+ return (byte) 8;
+ }
+
+ // wrapStart < embeddedStart
- // Load wrapEnd
- this.wrapEnd = this.wrapSpans.end();
+ // Load wrapEnd
+ this.wrapEnd = this.wrapSpans.end();
- // Case 6
- // |---|
- // |-|
- if (this.wrapEnd > this.embeddedEnd) {
- return (byte) 6;
- }
-
- // Case 7
- // |---|
- // |---|
- else if (this.wrapEnd == this.embeddedEnd) {
- return (byte) 7;
- };
-
- // Case 8
- // |-|
- // |---|
- return (byte) 8;
- }
-
- // wrapStart < embeddedStart
-
- // Load wrapEnd
- this.wrapEnd = this.wrapSpans.end();
-
- // Case 13
- // |-|
- // |-|
- if (this.wrapEnd < this.embeddedStart) {
- return (byte) 13;
- }
-
- // Case 9
- // |---|
- // |-|
- else if (this.wrapEnd > this.embeddedEnd) {
- return (byte) 9;
- }
-
- // Case 10
- // |---|
- // |-|
- else if (this.wrapEnd == this.embeddedEnd) {
- return (byte) 10;
- }
-
- // Case 11
- // |---|
- // |---|
- else if (this.wrapEnd > this.embeddedStart) {
- return (byte) 11;
- }
+ // Case 13
+ // |-|
+ // |-|
+ if (this.wrapEnd < this.embeddedStart) {
+ return (byte) 13;
+ }
+
+ // Case 9
+ // |---|
+ // |-|
+ else if (this.wrapEnd > this.embeddedEnd) {
+ return (byte) 9;
+ }
+
+ // Case 10
+ // |---|
+ // |-|
+ else if (this.wrapEnd == this.embeddedEnd) {
+ return (byte) 10;
+ }
+
+ // Case 11
+ // |---|
+ // |---|
+ else if (this.wrapEnd > this.embeddedStart) {
+ return (byte) 11;
+ }
- // case 12
- // |-|
- // |-|
- return (byte) 12;
+ // case 12
+ // |-|
+ // |-|
+ return (byte) 12;
};
/** Returns the document number of the current match. Initially invalid. */
@Override
public int doc () {
- return matchDoc;
+ return matchDoc;
};
/** Returns the start position of the embedding wrap. Initially invalid. */
@Override
public int start () {
- return matchStart;
+ return matchStart;
};