Heavily simplified shrinking; preserve classes in all cases
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index d3a42ad..c09ff16 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -197,6 +197,9 @@
// highlightoffsets have 11 bytes (iis)!
public void addPayload (List<byte[]> payload) {
+ if (DEBUG)
+ log.trace("Add payloads to match");
+
// Reverse to make embedding of highlights correct
Collections.reverse(payload);
try {
@@ -216,13 +219,24 @@
bb.rewind();
int start = bb.getInt();
- int end = bb.getInt() -1;
+ int end = bb.getInt();
byte number = bb.get();
if (DEBUG)
- log.trace("Have a payload: {}-{}", start, end);
+ log.trace(
+ "Have a highlight of class {} in {}-{} inside of {}-{}",
+ number,
+ start,
+ end,
+ this.getStartPos(),
+ this.getEndPos()
+ );
- this.addHighlight(start, end, number);
+ // Ignore classes out of match range and set by the system
+ if ((number & 0xFF) <= 128 &&
+ start >= this.getStartPos() &&
+ end <= this.getEndPos())
+ this.addHighlight(start, end - 1, number);
}
// Element payload for match!
@@ -248,12 +262,6 @@
this.potentialStartPosChar,
this.potentialEndPosChar
);
- }
-
- else if (b.length == 4) {
- bb.put(b);
- bb.rewind();
- log.warn("Unknown[4]: {}", bb.getInt());
};
// Clear bytebuffer
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
index eb3ed27..f6bcbef 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
@@ -53,7 +53,7 @@
this.number = number;
this.divide = divide;
this.wrapQuery = wrapQuery;
- this.bb = ByteBuffer.allocate(20);
+ this.bb = ByteBuffer.allocate(9);
this.wrappedPayload = new ArrayList<byte[]>(6);
};
@@ -105,54 +105,41 @@
// Iterate over all payloads and find the maximum span per class
for (byte[] payload : spans.getPayload()) {
- bb.clear();
- bb.put(payload);
- bb.position(8);
+
+ // No class payload
+ if (payload.length != 9) {
+ if (DEBUG)
+ log.trace("Ignore old payload {}", payload);
+ continue;
+ };
// Todo: Implement Divide
// Found class payload of structure <i>start<i>end<b>class
- if (payload.length == 9) {
+ // and classes are matches!
+ if (payload[8] == this.number) {
+ bb.clear();
+ bb.put(payload);
+ bb.rewind();
+ tempStart = bb.getInt();
+ tempEnd = bb.getInt();
- // and classes are matches!
- if (bb.get() == this.number) {
- bb.rewind();
- tempStart = bb.getInt();
- tempEnd = bb.getInt();
+ if (DEBUG)
+ log.trace("Found matching class {}-{}", tempStart, tempEnd);
- if (DEBUG)
- log.trace("Found matching class {}-{}", tempStart, tempEnd);
-
- // Set start position
- if (start == -1)
- start = tempStart;
- else if (tempStart < start)
- start = tempStart;
-
- // Set end position
- if (tempEnd > end)
- end = tempEnd;
- }
-
- // Definately keep class information
- else {
- wrappedPayload.add(payload);
- };
- }
-
- // No class payload
- else {
-
- // Keep as we won't shrink
- if (start == -1) {
- if (DEBUG)
- log.trace("Remember old payload {}", payload);
- wrappedPayload.add(payload);
- }
- else if (DEBUG) {
- if (DEBUG)
- log.trace("Ignore old payload {}", payload);
- };
+ // Set start position
+ if (start == -1)
+ start = tempStart;
+ else if (tempStart < start)
+ start = tempStart;
+
+ // Set end position
+ if (tempEnd > end)
+ end = tempEnd;
};
+
+ // Definately keep class information
+ // Even if it is already used for shrinking
+ wrappedPayload.add(payload);
};
};
@@ -168,16 +155,6 @@
end
);
- // Only keep class information
- // This may change later on ...
- for (int i = wrappedPayload.size() - 1; i >= 0; i--) {
- if (wrappedPayload.get(i).length != 9) {
- if (DEBUG)
- log.trace("Forget old payload {}", wrappedPayload.get(i));
- wrappedPayload.remove(i);
- };
- };
-
return true;
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
index fb8850d..8e6534d 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
@@ -87,7 +87,7 @@
assertEquals("totalResults", 1, kr.totalResults());
assertEquals("StartPos (0)", 8, kr.match(0).startPos);
assertEquals("EndPos (0)", 9, kr.match(0).endPos);
- assertEquals("SnippetBrackets (0)", "... cabcab[a]c", kr.match(0).snippetBrackets());
+ assertEquals("SnippetBrackets (0)", "... cabcab[{1:a}]c", kr.match(0).snippetBrackets());
sq = new SpanMatchModifyClassQuery(
new SpanNextQuery(
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
@@ -100,20 +100,20 @@
assertEquals("totalResults", 3, kr.totalResults());
assertEquals("StartPos (0)", 1, kr.match(0).startPos);
assertEquals("EndPos (0)", 2, kr.match(0).endPos);
- assertEquals("SnippetBrackets (0)", "a[b]cabcab ...", kr.match(0).snippetBrackets());
+ assertEquals("SnippetBrackets (0)", "a[{3:b}]cabcab ...", kr.match(0).snippetBrackets());
- assertEquals("<span class=\"context-left\">a</span><span class=\"match\">b</span><span class=\"context-right\">cabcab<span class=\"more\"></span></span>", kr.match(0).snippetHTML());
+ assertEquals("<span class=\"context-left\">a</span><span class=\"match\"><em class=\"class-3 level-0\">b</em></span><span class=\"context-right\">cabcab<span class=\"more\"></span></span>", kr.match(0).snippetHTML());
assertEquals("StartPos (1)", 4, kr.match(1).startPos);
assertEquals("EndPos (1)", 5, kr.match(1).endPos);
- assertEquals("SnippetBrackets (1)", "abca[b]cabac", kr.match(1).snippetBrackets());
+ assertEquals("SnippetBrackets (1)", "abca[{3:b}]cabac", kr.match(1).snippetBrackets());
- assertEquals("<span class=\"context-left\">abca</span><span class=\"match\">b</span><span class=\"context-right\">cabac</span>", kr.match(1).snippetHTML());
+ assertEquals("<span class=\"context-left\">abca</span><span class=\"match\"><em class=\"class-3 level-0\">b</em></span><span class=\"context-right\">cabac</span>", kr.match(1).snippetHTML());
assertEquals("StartPos (2)", 7, kr.match(2).startPos);
assertEquals("EndPos (2)", 8, kr.match(2).endPos);
- assertEquals("SnippetBrackets (2)", "... bcabca[b]ac", kr.match(2).snippetBrackets());
+ assertEquals("SnippetBrackets (2)", "... bcabca[{3:b}]ac", kr.match(2).snippetBrackets());
@@ -133,23 +133,23 @@
// System.err.println(kr.toJSON());
assertEquals("totalResults", 1, kr.totalResults());
- assertEquals("SnippetBrackets (0)", "... bcabca[b{1:a}]c", kr.match(0).snippetBrackets());
+ assertEquals("SnippetBrackets (0)", "... bcabca[{2:b{1:a}}]c", kr.match(0).snippetBrackets());
- assertEquals("SnippetHTML (0) 1", "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><span class=\"match\">b<em class=\"class-1 level-0\">a</em></span><span class=\"context-right\">c</span>", kr.match(0).snippetHTML());
+ assertEquals("SnippetHTML (0) 1", "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><span class=\"match\"><em class=\"class-2 level-0\">b<em class=\"class-1 level-1\">a</em></em></span><span class=\"context-right\">c</span>", kr.match(0).snippetHTML());
// Offset tokens
kr = ki.search(sq, 0, (short) 10, true, (short) 2, true, (short) 2);
assertEquals("totalResults", 1, kr.totalResults());
- assertEquals("SnippetBrackets (0)", "... ca[b{1:a}]c", kr.match(0).snippetBrackets());
+ assertEquals("SnippetBrackets (0)", "... ca[{2:b{1:a}}]c", kr.match(0).snippetBrackets());
// Offset Characters
kr = ki.search(sq, 0, (short) 10, false, (short) 1, false, (short) 0);
assertEquals("totalResults", 1, kr.totalResults());
- assertEquals("SnippetBrackets (0)", "... a[b{1:a}] ...", kr.match(0).snippetBrackets());
+ assertEquals("SnippetBrackets (0)", "... a[{2:b{1:a}}] ...", kr.match(0).snippetBrackets());
- assertEquals("SnippetHTML (0) 2", "<span class=\"context-left\"><span class=\"more\"></span>a</span><span class=\"match\">b<em class=\"class-1 level-0\">a</em></span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.match(0).snippetHTML());
+ assertEquals("SnippetHTML (0) 2", "<span class=\"context-left\"><span class=\"more\"></span>a</span><span class=\"match\"><em class=\"class-2 level-0\">b<em class=\"class-1 level-1\">a</em></em></span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.match(0).snippetHTML());
sq = new SpanMatchModifyClassQuery(
new SpanClassQuery(
@@ -165,10 +165,10 @@
assertEquals("totalResults", 2, kr.totalResults());
assertEquals("StartPos (0)", 1, kr.match(0).startPos);
assertEquals("EndPos (0)", 3, kr.match(0).endPos);
- assertEquals("SnippetBrackets (0)", "a[{1:b}{2:c}]abcaba ...", kr.match(0).snippetBrackets());
+ assertEquals("SnippetBrackets (0)", "a[{3:{1:b}{2:c}}]abcaba ...", kr.match(0).snippetBrackets());
assertEquals("StartPos (1)", 4, kr.match(1).startPos);
assertEquals("EndPos (1)", 6, kr.match(1).endPos);
- assertEquals("SnippetBrackets (1)", "abca[{1:b}{2:c}]abac", kr.match(1).snippetBrackets());
+ assertEquals("SnippetBrackets (1)", "abca[{3:{1:b}{2:c}}]abac", kr.match(1).snippetBrackets());
assertEquals("Document count", 1, ki.numberOf("base", "documents"));
assertEquals("Token count", 10, ki.numberOf("base", "t"));
@@ -202,10 +202,10 @@
assertEquals("totalResults", 2, kr.totalResults());
assertEquals("StartPos (0)", 1, kr.match(0).startPos);
assertEquals("EndPos (0)", 3, kr.match(0).endPos);
- assertEquals("SnippetBrackets (0)", "a[bc]abcaba ...", kr.match(0).snippetBrackets());
+ assertEquals("SnippetBrackets (0)", "a[{1:bc}]abcaba ...", kr.match(0).snippetBrackets());
assertEquals("StartPos (1)", 4, kr.match(1).startPos);
assertEquals("EndPos (1)", 6, kr.match(1).endPos);
- assertEquals("SnippetBrackets (1)", "abca[bc]abac", kr.match(1).snippetBrackets());
+ assertEquals("SnippetBrackets (1)", "abca[{1:bc}]abac", kr.match(1).snippetBrackets());
assertEquals(1, ki.numberOf("base", "documents"));
assertEquals(10, ki.numberOf("base", "t"));
@@ -344,7 +344,7 @@
KorapMatch km = kr.match(0);
assertEquals("StartPos (0)", 1, km.startPos);
assertEquals("EndPos (0)", 2, km.endPos);
- assertEquals("SnippetBrackets (0)", "a[b]cabcab ...", km.getSnippetBrackets());
+ assertEquals("SnippetBrackets (0)", "a[{3:b}]cabcab ...", km.getSnippetBrackets());
sq = new SpanMatchModifyClassQuery(
new SpanMatchModifyClassQuery(
@@ -360,7 +360,7 @@
km = kr.match(0);
assertEquals("StartPos (0)", 0, km.startPos);
assertEquals("EndPos (0)", 1, km.endPos);
- assertEquals("SnippetBrackets (0)", "[a]bcabca ...", km.getSnippetBrackets());
+ assertEquals("SnippetBrackets (0)", "[{2:a}]bcabca ...", km.getSnippetBrackets());
// TODO: Check ID
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index b2606bc..6ee8366 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -633,10 +633,10 @@
assertEquals(
kr.getMatch(0).getSnippetBrackets(),
"... Initiative\" eine neue politische Gruppierung ins " +
- "[Leben] gerufen hatten. Pressemeldungen zufolge haben sich ..."
+ "[{1:Leben}] gerufen hatten. Pressemeldungen zufolge haben sich ..."
);
- // Try with high class
+ // Try with high class - don't highlight
ks = new KorapSearch(
kq.shrink(129, kq.contains(kq.tag("base/s:s"), kq._(129, kq.seg("s:Leben"))))
);