Fixed some WithinSpan-Bugs and fixed Tests to improve coverage for SpanPositionalQueries
diff --git a/Changes b/Changes
index cc49dee..ffe30ea 100644
--- a/Changes
+++ b/Changes
@@ -16,7 +16,10 @@
removed KorapSpan, KorapTermSpan and KorapLongSpan,
renamed /analysis to /model,
renamed shrink() to focus(),
- removed KorapPrimaryData (diewald)
+ removed KorapPrimaryData,
+ fixed a lot of wrong tests for WithinSpans (diewald)
+ - [feature] Improved deserialization of SpanSubSpanQueries
+ (margaretha)
0.49.3 2015-02-03
- [documentation] Improved documentation for API classes (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/KorapResult.java b/src/main/java/de/ids_mannheim/korap/KorapResult.java
index 87755cf..3636aa7 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapResult.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapResult.java
@@ -288,6 +288,33 @@
return json;
};
+ /**
+ * Stringifies the matches to give a brief overview on
+ * the result. Mainly used for testing.
+ *
+ * @return The stringified matches
+ */
+ public String getOverview () {
+ StringBuilder sb = new StringBuilder();
+
+ sb.append("Search for: ")
+ .append(this.query)
+ .append("\n");
+
+ int i = 1;
+
+ // Add matches as bracket strings
+ for (KorapMatch km : this.getMatches())
+ sb.append(i++)
+ .append(": ")
+ .append(km.getSnippetBrackets())
+ .append(" (Doc ")
+ .append(km.getLocalDocID())
+ .append(")\n");
+
+ return sb.toString();
+ };
+
// For Collocation Analysis API
@Deprecated
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index 411a50b..b7492e9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
@@ -142,7 +142,12 @@
@Override
public boolean skipTo (int target) throws IOException {
classedPayload.clear();
- if (hasmorespans && spans.doc() < target && spans.skipTo(target))
+
+ if (DEBUG) log.trace("Skip ClassSpans {} -> {}",
+ spans.doc(), target);
+
+ if (hasmorespans && spans.doc() < target &&
+ spans.skipTo(target))
return this.addClassPayload();
return false;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index cc9d064..36b569d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -15,6 +15,9 @@
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import de.ids_mannheim.korap.query.SpanElementQuery;
/**
@@ -28,6 +31,11 @@
private TermSpans termSpans;
private boolean lazyLoaded = false;
+ private final Logger log = LoggerFactory.getLogger(ElementSpans.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+
/**
* Constructs ElementSpans for the given {@link SpanElementQuery}.
*
@@ -149,6 +157,10 @@
@Override
public boolean skipTo(int target) throws IOException {
+
+ if (DEBUG) log.trace("Skip ElementSpans {} -> {}",
+ firstSpans.doc(), target);
+
if (hasMoreSpans &&
firstSpans.doc() < target &&
firstSpans.skipTo(target)) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
index 6235342..f38f102 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
@@ -187,8 +187,13 @@
// Todo: Check for this on document boundaries!
@Override
public boolean skipTo (int target) throws IOException {
- if (DEBUG) log.trace("Skip MatchSpans");
- return spans.skipTo(target);
+ if (DEBUG) log.trace("Skip MatchSpans {} -> {}",
+ this.doc(), target);
+
+ if (this.doc() < target && spans.skipTo(target)) {
+
+ };
+ return false;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
index dc68911..5240580 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
@@ -180,7 +180,7 @@
// Both spans match according to the flag
// Silently the next operations are prepared
if (this.tryMatch && this.doesMatch()) {
-
+
if (this.wrapEnd == -1)
this.wrapEnd = this.wrapSpans.end();
@@ -197,11 +197,11 @@
if (DEBUG)
log.trace(
- " ---- MATCH ---- {}-{} ({})",
- matchStart,
- matchEnd,
- matchDoc
- );
+ " ---- MATCH ---- {}-{} ({})",
+ matchStart,
+ matchEnd,
+ matchDoc
+ );
this.tryMatch = false;
return true;
@@ -241,7 +241,7 @@
log.trace("Fetch next embedded span");
};
- this.embeddedStart = -1;
+ this.embeddedStart = this.embeddedSpans.start();
this.embeddedEnd = -1;
this.embeddedPayload = null;
this.embeddedDoc = this.embeddedSpans.doc();
@@ -279,8 +279,6 @@
);
if (this.embeddedDoc != this.wrapDoc) {
- if (DEBUG)
- log.trace("Delete all span stores");
// Is this always a good idea?
/*
@@ -302,7 +300,7 @@
this.more = true;
this.inSameDoc = true;
this.tryMatch = true;
-
+
this.nextSpanB();
continue;
}
@@ -371,7 +369,7 @@
if (this.wrapSpans.next()) {
// Reset wrapping information
- this.wrapStart = -1;
+ this.wrapStart = this.wrapSpans.start();
this.wrapEnd = -1;
// Retrieve doc information
@@ -467,9 +465,11 @@
log.trace("Current position already is in the same doc");
log.trace("Embedded: {}", _currentEmbedded().toString());
};
+ this.matchDoc = this.embeddedDoc;
return true;
};
+
// Forward till match
while (this.wrapDoc != this.embeddedDoc) {
@@ -480,6 +480,7 @@
if (!wrapSpans.skipTo(this.embeddedDoc)) {
this.more = false;
this.inSameDoc = false;
+ this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
@@ -495,13 +496,25 @@
this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
-
- this.wrapStart = -1;
- this.wrapEnd = -1;
-
- if (wrapDoc == embeddedDoc)
+
+ /*
+ Remove stored information
+ */
+ if (DEBUG)
+ log.trace("Delete all span stores");
+
+ this.spanStore1.clear();
+ this.spanStore2.clear();
+
+ if (wrapDoc == embeddedDoc) {
+ this.wrapStart = this.wrapSpans.start();
+ this.embeddedStart = this.embeddedSpans.start();
+ this.matchDoc = this.embeddedDoc;
return true;
+ };
+ this.wrapStart = -1;
+ this.embeddedStart = -1;
}
// Forward embedInfo
@@ -511,12 +524,10 @@
if (!this.embeddedSpans.skipTo(this.wrapDoc)) {
this.more = false;
this.inSameDoc = false;
+ this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
-
- if (DEBUG)
- log.trace("Skip embedded to doc {}", this.wrapDoc);
-
+
this.embeddedDoc = this.embeddedSpans.doc();
if (this.embeddedDoc == DocIdSetIterator.NO_MORE_DOCS) {
@@ -526,19 +537,26 @@
this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
+
+ if (DEBUG)
+ log.trace("Skip embedded to doc {}", this.embeddedDoc);
- this.embeddedStart = -1;
+ this.embeddedStart = this.embeddedSpans.start();
this.embeddedEnd = -1;
this.embeddedPayload = null;
- if (this.wrapDoc == this.embeddedDoc)
+ if (this.wrapDoc == this.embeddedDoc) {
+ this.matchDoc = this.embeddedDoc;
return true;
+ };
}
else {
+ this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
};
+ this.matchDoc = this.wrapDoc;
return true;
};
@@ -593,13 +611,18 @@
public boolean skipTo (int target) throws IOException {
if (DEBUG)
- log.trace("skipTo document {}", target);
+ log.trace(
+ "skipTo document {}/{} -> {}",
+ this.embeddedDoc,
+ this.wrapDoc,
+ target
+ );
// Initialize spans
if (!this.init())
return false;
- assert target > embeddedDoc;
+ assert target > this.embeddedDoc;
// Only forward embedded spans
if (this.more && (this.embeddedDoc < target)) {
@@ -640,164 +663,163 @@
// Check if the current span constellation does match
// Store backtracking relevant data and say, how to proceed
private boolean doesMatch () {
- if (DEBUG)
- log.trace("In the match test branch");
+ if (DEBUG)
+ log.trace("In the match test branch");
- if (this.wrapStart == -1)
- this.wrapStart = this.wrapSpans.start();
+ if (this.wrapStart == -1)
+ this.wrapStart = this.wrapSpans.start();
- if (this.embeddedStart == -1) {
- this.embeddedStart = this.embeddedSpans.start();
- this.embeddedEnd = this.embeddedSpans.end();
- };
+ if (this.embeddedStart == -1) {
+ this.embeddedStart = this.embeddedSpans.start();
+ this.embeddedEnd = this.embeddedSpans.end();
+ };
- this.wrapEnd = -1;
+ this.wrapEnd = -1;
+ // Shortcut to prevent lazyloading of .end()
+ if (this.wrapStart > this.embeddedStart) {
+ // Can't match for in, rin, ew, sw, and m
+ // and will always lead to next_b
+ if (flag >= WITHIN) {
+ this.nextSpanB();
+ if (DEBUG)
+ _logCurrentCase((byte) 16);
+ return false;
+ };
+ }
- // Shortcut to prevent lazyloading of .end()
- if (this.wrapStart > this.embeddedStart) {
- // Can't match for in, rin, ew, sw, and m
- // and will always lead to next_b
- if (flag >= WITHIN) {
- this.nextSpanB();
- if (DEBUG)
- _logCurrentCase((byte) 14);
- return false;
- };
- }
+ else if (this.wrapStart < this.embeddedStart) {
+ // Can't match for sw and m and will always
+ // lead to next_a
+ if (flag >= STARTSWITH) {
+ this.nextSpanA();
+ if (DEBUG)
+ _logCurrentCase((byte) 15);
+ return false;
+ };
+ };
- else if (this.wrapStart < this.embeddedStart) {
- // Can't match for sw and m and will always
- // lead to next_a
- if (flag >= STARTSWITH) {
- this.nextSpanA();
- if (DEBUG)
- _logCurrentCase((byte) 15);
- return false;
- };
- };
+ // Now check correctly
+ byte currentCase = this.withinCase();
- // Now check correctly
- byte currentCase = this.withinCase();
+ if (DEBUG)
+ _logCurrentCase(currentCase);
- if (DEBUG)
- _logCurrentCase(currentCase);
+ boolean match = false;
- boolean match = false;
+ // Test case
+ if (currentCase >= (byte) 3 && currentCase <= (byte) 11) {
+ switch (flag) {
- // Test case
- if (currentCase >= (byte) 3 && currentCase <= (byte) 11) {
- switch (flag) {
+ case WITHIN:
+ if (currentCase >= 6 && currentCase <= 10 && currentCase != 8)
+ match = true;
+ break;
- case WITHIN:
- if (currentCase >= 6 && currentCase <= 10 && currentCase != 8)
- match = true;
- break;
+ case REAL_WITHIN:
+ if (currentCase == 6 ||
+ currentCase == 9 ||
+ currentCase == 10)
+ match = true;
+ break;
+
+ case MATCH:
+ if (currentCase == 7)
+ match = true;
+ break;
- case REAL_WITHIN:
- if (currentCase == 6 ||
- currentCase == 9 ||
- currentCase == 10)
- match = true;
- break;
+ case STARTSWITH:
+ if (currentCase == 7 ||
+ currentCase == 6)
+ match = true;
+ break;
- case MATCH:
- if (currentCase == 7)
- match = true;
- break;
+ case ENDSWITH:
+ if (currentCase == 7 ||
+ currentCase == 10)
+ match = true;
+ break;
- case STARTSWITH:
- if (currentCase == 7 ||
- currentCase == 6)
- match = true;
- break;
+ case OVERLAP:
+ match = true;
+ break;
- case ENDSWITH:
- if (currentCase == 7 ||
- currentCase == 10)
- match = true;
- break;
+ case REAL_OVERLAP:
+ if (currentCase == 3 ||
+ currentCase == 11)
+ match = true;
+ break;
+ };
+ };
- case OVERLAP:
- match = true;
- break;
-
- case REAL_OVERLAP:
- if (currentCase == 3 ||
- currentCase == 11)
- match = true;
- break;
- };
- };
-
- try {
- this.todo(currentCase);
- }
- catch (IOException e) {
- return false;
- }
- return match;
+ try {
+ this.todo(currentCase);
+ }
+ catch (IOException e) {
+ return false;
+ }
+ return match;
};
private void _logCurrentCase (byte currentCase) {
- log.trace("Current Case is {}", currentCase);
+ log.trace("Current Case is {}", currentCase);
- String _e = _currentEmbedded().toString();
+ String _e = _currentEmbedded().toString();
- log.trace(" |---| {}", _currentWrap().toString());
+ log.trace(" |---| {}", _currentWrap().toString());
- switch (currentCase) {
- case 1:
- log.trace("|-| {}", _e);
+ switch (currentCase) {
+ case 1:
+ log.trace("|-| {}", _e);
+ break;
+ case 2:
+ log.trace("|---| {}", _e);
break;
- case 2:
- log.trace("|---| {}", _e);
- break;
- case 3:
- log.trace(" |---| {}", _e);
- break;
- case 4:
- log.trace(" |-----| {}", _e);
- break;
- case 5:
- log.trace(" |-------| {}", _e);
- break;
- case 6:
- log.trace(" |-| {}", _e);
- break;
- case 7:
- log.trace(" |---| {}", _e);
- break;
- case 8:
- log.trace(" |-----| {}", _e);
- break;
- case 9:
- log.trace(" |-| {}", _e);
- break;
- case 10:
- log.trace(" |-| {}", _e);
- break;
- case 11:
- log.trace(" |---| {}", _e);
- break;
- case 12:
- log.trace(" |-| {}", _e);
- break;
- case 13:
- log.trace(" |-| {}", _e);
- break;
+ case 3:
+ log.trace(" |---| {}", _e);
+ break;
+ case 4:
+ log.trace(" |-----| {}", _e);
+ break;
+ case 5:
+ log.trace(" |-------| {}", _e);
+ break;
+ case 6:
+ log.trace(" |-| {}", _e);
+ break;
+ case 7:
+ log.trace(" |---| {}", _e);
+ break;
+ case 8:
+ log.trace(" |-----| {}", _e);
+ break;
+ case 9:
+ log.trace(" |-| {}", _e);
+ break;
+ case 10:
+ log.trace(" |-| {}", _e);
+ break;
+ case 11:
+ log.trace(" |---| {}", _e);
+ break;
+ case 12:
+ log.trace(" |-| {}", _e);
+ break;
+ case 13:
+ log.trace(" |-| {}", _e);
+ break;
- case 15:
- // Fake case
- log.trace(" |---? {}", _e);
- break;
+ case 15:
+ // Fake case
+ log.trace(" |---? {}", _e);
+ break;
- case 16:
- // Fake case
- log.trace(" |---? {}", _e);
- break;
- };
+ case 16:
+ // Fake case
+ log.trace(" |---? {}", _e);
+ break;
+ };
};
@@ -924,123 +946,123 @@
// Return case number
private byte withinCase () {
- // case 1-5
- if (this.wrapStart > this.embeddedStart) {
+ // case 1-5
+ if (this.wrapStart > this.embeddedStart) {
- // Case 1
- // |-|
- // |-|
- if (this.wrapStart > this.embeddedEnd) {
- return (byte) 1;
- }
+ // Case 1
+ // |-|
+ // |-|
+ if (this.wrapStart > this.embeddedEnd) {
+ return (byte) 1;
+ }
+
+ // Case 2
+ // |-|
+ // |-|
+ else if (this.wrapStart == this.embeddedEnd) {
+ return (byte) 2;
+ };
+
+ // Load wrapEnd
+ this.wrapEnd = this.wrapSpans.end();
+
+ // Case 3
+ // |---|
+ // |---|
+ if (this.wrapEnd > this.embeddedEnd) {
+ return (byte) 3;
+ }
- // Case 2
- // |-|
- // |-|
- else if (this.wrapStart == this.embeddedEnd) {
- return (byte) 2;
- };
+ // Case 4
+ // |-|
+ // |---|
+ else if (this.wrapEnd == this.embeddedEnd) {
+ return (byte) 4;
+ };
+
+ // Case 5
+ // |-|
+ // |---|
+ return (byte) 5;
+ }
+
+ // case 6-8
+ else if (this.wrapStart == this.embeddedStart) {
- // Load wrapEnd
- this.wrapEnd = this.wrapSpans.end();
+ // Load wrapEnd
+ this.wrapEnd = this.wrapSpans.end();
- // Case 3
- // |---|
- // |---|
- if (this.wrapEnd > this.embeddedEnd) {
- return (byte) 3;
- }
+ // Case 6
+ // |---|
+ // |-|
+ if (this.wrapEnd > this.embeddedEnd) {
+ return (byte) 6;
+ }
- // Case 4
- // |-|
- // |---|
- else if (this.wrapEnd == this.embeddedEnd) {
- return (byte) 4;
- };
-
- // Case 5
- // |-|
- // |---|
- return (byte) 5;
- }
+ // Case 7
+ // |---|
+ // |---|
+ else if (this.wrapEnd == this.embeddedEnd) {
+ return (byte) 7;
+ };
- // case 6-8
- else if (this.wrapStart == this.embeddedStart) {
+ // Case 8
+ // |-|
+ // |---|
+ return (byte) 8;
+ }
+
+ // wrapStart < embeddedStart
- // Load wrapEnd
- this.wrapEnd = this.wrapSpans.end();
+ // Load wrapEnd
+ this.wrapEnd = this.wrapSpans.end();
- // Case 6
- // |---|
- // |-|
- if (this.wrapEnd > this.embeddedEnd) {
- return (byte) 6;
- }
-
- // Case 7
- // |---|
- // |---|
- else if (this.wrapEnd == this.embeddedEnd) {
- return (byte) 7;
- };
-
- // Case 8
- // |-|
- // |---|
- return (byte) 8;
- }
-
- // wrapStart < embeddedStart
-
- // Load wrapEnd
- this.wrapEnd = this.wrapSpans.end();
-
- // Case 13
- // |-|
- // |-|
- if (this.wrapEnd < this.embeddedStart) {
- return (byte) 13;
- }
-
- // Case 9
- // |---|
- // |-|
- else if (this.wrapEnd > this.embeddedEnd) {
- return (byte) 9;
- }
-
- // Case 10
- // |---|
- // |-|
- else if (this.wrapEnd == this.embeddedEnd) {
- return (byte) 10;
- }
-
- // Case 11
- // |---|
- // |---|
- else if (this.wrapEnd > this.embeddedStart) {
- return (byte) 11;
- }
+ // Case 13
+ // |-|
+ // |-|
+ if (this.wrapEnd < this.embeddedStart) {
+ return (byte) 13;
+ }
+
+ // Case 9
+ // |---|
+ // |-|
+ else if (this.wrapEnd > this.embeddedEnd) {
+ return (byte) 9;
+ }
+
+ // Case 10
+ // |---|
+ // |-|
+ else if (this.wrapEnd == this.embeddedEnd) {
+ return (byte) 10;
+ }
+
+ // Case 11
+ // |---|
+ // |---|
+ else if (this.wrapEnd > this.embeddedStart) {
+ return (byte) 11;
+ }
- // case 12
- // |-|
- // |-|
- return (byte) 12;
+ // case 12
+ // |-|
+ // |-|
+ return (byte) 12;
};
/** Returns the document number of the current match. Initially invalid. */
@Override
public int doc () {
- return matchDoc;
+ return matchDoc;
};
/** Returns the start position of the embedding wrap. Initially invalid. */
@Override
public int start () {
- return matchStart;
+ return matchStart;
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
index f4e4a8d..75c407f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
@@ -410,9 +410,9 @@
// abcabcabac
FieldDocument fd = new FieldDocument();
- // The payload should be ignored
fd.addTV("base",
"abcabcabac",
+ // The payload should be ignored
"[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + // |<>:p#0-10<i>9]" +
"[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]" +
"[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]" +
@@ -423,8 +423,8 @@
"[(7-8)s:b|i:b|_7#7-8]" +
"[(8-9)s:a|i:a|_8#8-9]" +
"[(9-10)s:c|i:c|_9#9-10]");
-
ki.addDoc(fd);
+ fd = new FieldDocument();
fd.addTV("base",
"gbcgbcgbgc",
"[(0-1)s:g|i:g|_0#0-1|-:t$<i>10|<>:p#0-10$<i>9]" +
@@ -438,6 +438,7 @@
"[(8-9)s:g|i:g|_8#8-9]" +
"[(9-10)s:c|i:c|_9#9-10]");
ki.addDoc(fd);
+ fd = new FieldDocument();
fd.addTV("base",
"gbcgbcgbgc",
"[(0-1)s:g|i:g|_0#0-1|-:t$<i>10]" +
@@ -451,18 +452,19 @@
"[(8-9)s:g|i:g|_8#8-9]" +
"[(9-10)s:c|i:c|_9#9-10]");
ki.addDoc(fd);
+ fd = new FieldDocument();
+ // contains(<p>, focus(3: contains({2:<s>}, {3:a})))
fd.addTV("base",
- "abcabcabac",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10|<>:p#0-10$<i>9]" +
+ "acabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10|<>:p#0-9$<i>8]" +
"[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]" +
- "[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]" +
- "[(3-4)s:a|i:a|_3#3-4]" +
- "[(4-5)s:b|i:b|_4#4-5]" +
- "[(5-6)s:c|i:c|_5#5-6]" +
- "[(6-7)s:a|i:a|_6#6-7]" +
- "[(7-8)s:b|i:b|_7#7-8]" +
- "[(8-9)s:a|i:a|_8#8-9]" +
- "[(9-10)s:c|i:c|_9#9-10]");
+ "[(2-3)s:a|i:a|_2#2-3|<>:s#2-7$<i>7]" +
+ "[(3-4)s:b|i:b|_3#3-4]" +
+ "[(4-5)s:c|i:c|_4#4-5]" +
+ "[(5-6)s:a|i:a|_5#5-6]" +
+ "[(6-7)s:b|i:b|_6#6-7]" +
+ "[(7-8)s:a|i:a|_7#7-8]" +
+ "[(8-9)s:c|i:c|_8#8-9]");
ki.addDoc(fd);
ki.commit();
@@ -472,6 +474,7 @@
assertEquals("Documents", 4, kc.numberOf("documents"));
+ // within(<p>, focus(3:within({2:<s>}, {3:a})))
sq = new SpanWithinQuery(
new SpanElementQuery("base", "p"),
new SpanMatchModifyClassQuery(
@@ -483,7 +486,11 @@
);
fail("Skipping may go horribly wrong! (Known issue)");
+
kr = kc.search(sq);
+ // System.err.println(kr.getOverview());
+
+
assertEquals(kr.getQuery(), "spanContain(<base:p />, focus(3: spanContain({2: <base:s />}, {3: base:s:a})))");
assertEquals(12, kr.getTotalResults());
assertEquals("[a{2:bc{3:a}b}cabac]", kr.getMatch(0).getSnippetBrackets());
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
index 6b38784..a6f9eb3 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
@@ -378,17 +378,18 @@
// <a><a><a>h</a>hij</a>hij</a>
FieldDocument fd = new FieldDocument();
fd.addTV("base",
- "h i j h i j h i j ",
- "[(0-3)s:h|<>:a#0-27$<i>7|<>:a#0-18$<i>4|<>:a#0-36$<i>10]" + // 1
- "[(3-6)s:h]" + // 2
- "[(12-15)s:i]" + // 3
- "[(15-18)s:j]" + // 4
- "[(18-21)s:h]" + // 5
- "[(21-24)s:i]" + // 6
- "[(24-27)s:j]" + // 7
- "[(27-30)s:h]" + // 8
- "[(30-33)s:i]" + // 9
- "[(33-36)s:j]"); // 10
+ // <a><a>hhij</a>hijh</a>ij</a>
+ "h h i j h i j h i j ",
+ "[s:h|_0#0-3|<>:a#0-24$<i>7|<>:a#0-12$<i>3|<>:a#0-30$<i>9]" + // 1
+ "[s:h|_1#3-6]" + // 2
+ "[s:i|_2#6-9]" + // 3
+ "[s:j|_3#9-12]" + // 4
+ "[s:h|_4#12-15]" + // 5
+ "[s:i|_5#15-18]" + // 6
+ "[s:j|_6#18-21]" + // 7
+ "[s:h|_7#21-24]" + // 8
+ "[s:i|_8#24-27]" + // 9
+ "[s:j|_9#27-30]"); // 10
ki.addDoc(fd);
// Save documents
@@ -404,11 +405,11 @@
assertEquals("totalResults", kr.getTotalResults(), 3);
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
- assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
+ assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
assertEquals("EndPos (1)", 7, kr.getMatch(1).endPos);
assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
- assertEquals("EndPos (2)", 10, kr.getMatch(2).endPos);
+ assertEquals("EndPos (2)", 9, kr.getMatch(2).endPos);
sq = new SpanWithinQuery(
new SpanElementQuery("base", "a"),
@@ -417,11 +418,13 @@
kr = ki.search(sq, (short) 10);
- assertEquals("totalResults", kr.getTotalResults(), 9);
+ assertEquals("totalResults", kr.getTotalResults(), 10);
+
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
- assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
+ assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
- assertEquals("EndPos (1)", 4, kr.getMatch(1).endPos);
+ assertEquals("EndPos (1)", 3, kr.getMatch(1).endPos);
+
assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
assertEquals("EndPos (2)", 7, kr.getMatch(2).endPos);
assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
@@ -429,13 +432,16 @@
assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos);
assertEquals("EndPos (4)", 7, kr.getMatch(4).endPos);
assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos);
- assertEquals("EndPos (5)", 10, kr.getMatch(5).endPos);
+ assertEquals("EndPos (5)", 7, kr.getMatch(5).endPos);
+
assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
- assertEquals("EndPos (6)", 10, kr.getMatch(6).endPos);
+ assertEquals("EndPos (6)", 9, kr.getMatch(6).endPos);
assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos);
- assertEquals("EndPos (7)", 10, kr.getMatch(7).endPos);
+ assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos);
assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos);
- assertEquals("EndPos (8)", 10, kr.getMatch(8).endPos);
+ assertEquals("EndPos (8)", 9, kr.getMatch(8).endPos);
+ assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos);
+ assertEquals("EndPos (9)", 9, kr.getMatch(9).endPos);
};
@Test
@@ -446,18 +452,18 @@
// <a><a><a>h</a>hij</a>hij</a>h
FieldDocument fd = new FieldDocument();
fd.addTV("base",
- "h i j h i j h i j h ",
- "[(0-3)s:h|<>:a#0-27$<i>7|<>:a#0-18$<i>4|<>:a#0-36$<i>10]" + // 1
+ "h h i j h i j h i j h ",
+ "[(0-3)s:h|<>:a#0-21$<i>6|<>:a#0-12$<i>3|<>:a#0-30$<i>9]" + // 1
"[(3-6)s:h]" + // 2
- "[(12-15)s:i]" + // 3
- "[(15-18)s:j]" + // 4
- "[(18-21)s:h]" + // 5
- "[(21-24)s:i]" + // 6
- "[(24-27)s:j]" + // 7
- "[(27-30)s:h]" + // 8
- "[(30-33)s:i]" + // 9
- "[(33-36)s:j]" + // 10
- "[(37-40)s:h]");
+ "[(6-9)s:i]" + // 3
+ "[(9-12)s:j]" + // 4
+ "[(12-15)s:h]" + // 5
+ "[(15-18)s:i]" + // 6
+ "[(18-21)s:j]" + // 7
+ "[(21-24)s:h]" + // 8
+ "[(24-27)s:i]" + // 9
+ "[(27-30)s:j]" + // 10
+ "[(30-33)s:h]");
ki.addDoc(fd);
// Save documents
@@ -470,11 +476,11 @@
assertEquals("totalResults", kr.getTotalResults(), 3);
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
- assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
+ assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
- assertEquals("EndPos (1)", 7, kr.getMatch(1).endPos);
+ assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
- assertEquals("EndPos (2)", 10, kr.getMatch(2).endPos);
+ assertEquals("EndPos (2)", 9, kr.getMatch(2).endPos);
sq = new SpanWithinQuery(
new SpanElementQuery("base", "a"),
@@ -485,23 +491,23 @@
assertEquals("totalResults", kr.getTotalResults(), 9);
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
- assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
+ assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
- assertEquals("EndPos (1)", 4, kr.getMatch(1).endPos);
+ assertEquals("EndPos (1)", 3, kr.getMatch(1).endPos);
assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
- assertEquals("EndPos (2)", 7, kr.getMatch(2).endPos);
+ assertEquals("EndPos (2)", 6, kr.getMatch(2).endPos);
assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
- assertEquals("EndPos (3)", 7, kr.getMatch(3).endPos);
+ assertEquals("EndPos (3)", 6, kr.getMatch(3).endPos);
assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos);
- assertEquals("EndPos (4)", 7, kr.getMatch(4).endPos);
+ assertEquals("EndPos (4)", 6, kr.getMatch(4).endPos);
assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos);
- assertEquals("EndPos (5)", 10, kr.getMatch(5).endPos);
+ assertEquals("EndPos (5)", 9, kr.getMatch(5).endPos);
assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
- assertEquals("EndPos (6)", 10, kr.getMatch(6).endPos);
+ assertEquals("EndPos (6)", 9, kr.getMatch(6).endPos);
assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos);
- assertEquals("EndPos (7)", 10, kr.getMatch(7).endPos);
+ assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos);
assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos);
- assertEquals("EndPos (8)", 10, kr.getMatch(8).endPos);
+ assertEquals("EndPos (8)", 9, kr.getMatch(8).endPos);
};
@@ -513,19 +519,19 @@
// <a><a><a>h</a>hij</a>hij</a>h<a>i</i>
FieldDocument fd = new FieldDocument();
fd.addTV("base",
- "h i j h i j h i j h i ",
- "[(0-3)s:h|<>:a#0-27$<i>7|<>:a#0-18$<i>4|<>:a#0-36$<i>10]" + // 1
+ "h h i j h i j h i j h i ",
+ "[(0-3)s:h|<>:a#0-21$<i>7|<>:a#0-15$<i>4|<>:a#0-30$<i>10]" + // 1
"[(3-6)s:h]" + // 2
- "[(12-15)s:i]" + // 3
- "[(15-18)s:j]" + // 4
- "[(18-21)s:h]" + // 5
- "[(21-24)s:i]" + // 6
- "[(24-27)s:j]" + // 7
- "[(27-30)s:h]" + // 8
- "[(30-33)s:i]" + // 9
- "[(33-36)s:j]" + // 10
- "[(37-40)s:h]" + // 11
- "[(40-43)s:i|<>:a#40-43$<i>12]"); // 12
+ "[(6-9)s:i]" + // 3
+ "[(9-12)s:j]" + // 4
+ "[(12-15)s:h]" + // 5
+ "[(15-18)s:i]" + // 6
+ "[(18-21)s:j]" + // 7
+ "[(21-24)s:h]" + // 8
+ "[(24-27)s:i]" + // 9
+ "[(27-30)s:j]" + // 10
+ "[(30-33)s:h]" + // 11
+ "[(33-36)s:i|<>:a#33-36$<i>12]"); // 12
ki.addDoc(fd);
// Save documents
@@ -554,25 +560,29 @@
kr = ki.search(sq, (short) 10);
- assertEquals("totalResults", kr.getTotalResults(), 9);
+ assertEquals("totalResults", kr.getTotalResults(), 11);
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
assertEquals("EndPos (1)", 4, kr.getMatch(1).endPos);
assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
- assertEquals("EndPos (2)", 7, kr.getMatch(2).endPos);
+ assertEquals("EndPos (2)", 4, kr.getMatch(2).endPos);
+
assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
assertEquals("EndPos (3)", 7, kr.getMatch(3).endPos);
assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos);
assertEquals("EndPos (4)", 7, kr.getMatch(4).endPos);
assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos);
- assertEquals("EndPos (5)", 10, kr.getMatch(5).endPos);
+ assertEquals("EndPos (5)", 7, kr.getMatch(5).endPos);
+
assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
assertEquals("EndPos (6)", 10, kr.getMatch(6).endPos);
assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos);
assertEquals("EndPos (7)", 10, kr.getMatch(7).endPos);
assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos);
assertEquals("EndPos (8)", 10, kr.getMatch(8).endPos);
+ assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos);
+ assertEquals("EndPos (9)", 10, kr.getMatch(9).endPos);
};
@@ -584,19 +594,19 @@
// <a><a><a>h</a>hij</a>hij</a>h<a>h</h>
FieldDocument fd = new FieldDocument();
fd.addTV("base",
- "h i j h i j h i j h i ",
- "[(0-3)s:h|<>:a#0-27$<i>7|<>:a#0-18$<i>4|<>:a#0-36$<i>10]" + // 1
- "[(3-6)s:h]" + // 2
- "[(12-15)s:i]" + // 3
- "[(15-18)s:j]" + // 4
- "[(18-21)s:h]" + // 5
- "[(21-24)s:i]" + // 6
- "[(24-27)s:j]" + // 7
- "[(27-30)s:h]" + // 8
- "[(30-33)s:i]" + // 9
- "[(33-36)s:j]" + // 10
- "[(37-40)s:h]" + // 11
- "[(40-43)s:h|<>:a#40-43$<i>12]"); // 12
+ "h h i j h i j h i j h h ",
+ "[(0-3)s:h|_0#0-3|<>:a#0-18$<i>6|<>:a#0-15$<i>4|<>:a#0-27$<i>8]" + // 1
+ "[(3-6)s:h|_1#3-6]" + // 2
+ "[(6-9)s:i|_2#6-9]" + // 3
+ "[(9-12)s:j|_3#9-12]" + // 4
+ "[(12-15)s:h|_4#12-15]" + // 5
+ "[(15-18)s:i|_5#15-18]" + // 6
+ "[(18-21)s:j|_6#18-21]" + // 7
+ "[(21-24)s:h|_7#21-24]" + // 8
+ "[(24-27)s:i|_8#24-27]" + // 9
+ "[(27-30)s:j|_9#27-30]" + // 10
+ "[(30-33)s:h|_10#30-33|<>:a#30-36$<i>12]" + // 11
+ "[(33-36)s:h|_11#33-36|<>:a#33-36$<i>12]"); // 12
ki.addDoc(fd);
// Save documents
@@ -608,15 +618,18 @@
KorapResult kr = ki.search(sq, (short) 10);
- assertEquals("totalResults", kr.getTotalResults(), 4);
+ assertEquals("totalResults", kr.getTotalResults(), 5);
+
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
- assertEquals("EndPos (1)", 7, kr.getMatch(1).endPos);
+ assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
- assertEquals("EndPos (2)", 10, kr.getMatch(2).endPos);
- assertEquals("StartPos (3)", 11, kr.getMatch(3).startPos);
+ assertEquals("EndPos (2)", 8, kr.getMatch(2).endPos);
+ assertEquals("StartPos (3)", 10, kr.getMatch(3).startPos);
assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos);
+ assertEquals("StartPos (4)", 11, kr.getMatch(4).startPos);
+ assertEquals("EndPos (4)", 12, kr.getMatch(4).endPos);
sq = new SpanWithinQuery(
new SpanElementQuery("base", "a"),
@@ -625,27 +638,37 @@
kr = ki.search(sq, (short) 15);
- assertEquals("totalResults", kr.getTotalResults(), 10);
+ assertEquals("totalResults", kr.getTotalResults(), 13);
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
assertEquals("EndPos (1)", 4, kr.getMatch(1).endPos);
assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
- assertEquals("EndPos (2)", 7, kr.getMatch(2).endPos);
+ assertEquals("EndPos (2)", 4, kr.getMatch(2).endPos);
+
assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
- assertEquals("EndPos (3)", 7, kr.getMatch(3).endPos);
+ assertEquals("EndPos (3)", 6, kr.getMatch(3).endPos);
assertEquals("StartPos (4)", 0, kr.getMatch(4).startPos);
- assertEquals("EndPos (4)", 7, kr.getMatch(4).endPos);
+ assertEquals("EndPos (4)", 6, kr.getMatch(4).endPos);
assertEquals("StartPos (5)", 0, kr.getMatch(5).startPos);
- assertEquals("EndPos (5)", 10, kr.getMatch(5).endPos);
+ assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
+
assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
- assertEquals("EndPos (6)", 10, kr.getMatch(6).endPos);
+ assertEquals("EndPos (6)", 8, kr.getMatch(6).endPos);
assertEquals("StartPos (7)", 0, kr.getMatch(7).startPos);
- assertEquals("EndPos (7)", 10, kr.getMatch(7).endPos);
+ assertEquals("EndPos (7)", 8, kr.getMatch(7).endPos);
assertEquals("StartPos (8)", 0, kr.getMatch(8).startPos);
- assertEquals("EndPos (8)", 10, kr.getMatch(8).endPos);
- assertEquals("StartPos (9)", 11, kr.getMatch(9).startPos);
- assertEquals("EndPos (9)", 12, kr.getMatch(9).endPos);
+ assertEquals("EndPos (8)", 8, kr.getMatch(8).endPos);
+ assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos);
+ assertEquals("EndPos (9)", 8, kr.getMatch(9).endPos);
+
+ assertEquals("StartPos (10)", 10, kr.getMatch(10).startPos);
+ assertEquals("EndPos (10)", 12, kr.getMatch(10).endPos);
+ assertEquals("StartPos (11)", 10, kr.getMatch(11).startPos);
+ assertEquals("EndPos (11)", 12, kr.getMatch(11).endPos);
+
+ assertEquals("StartPos (12)", 11, kr.getMatch(12).startPos);
+ assertEquals("EndPos (12)", 12, kr.getMatch(12).endPos);
};