Bugfix for big class highlights
diff --git a/CHANGES b/CHANGES
index 8dac796..b0d8311 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,6 @@
0.48 2014-11-06
- [feature] Retrieval of token lists (diewald)
+ - [bugfix] Classes can now be highlighted to 127.
0.47 2014-11-05
- [feature] Support new index format with more metadata (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index 5291821..b8afd1d 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -1001,9 +1001,9 @@
end--;
// Set levels for highlights
- FixedBitSet level = new FixedBitSet(16);
- level.set(0, 15);
- byte[] levelCache = new byte[16];
+ FixedBitSet level = new FixedBitSet(255);
+ level.set(0, 255);
+ byte[] levelCache = new byte[255];
// First element of sorted array
HighlightCombinatorElement elem = this.snippetArray.getFirst();
diff --git a/src/main/java/de/ids_mannheim/korap/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/match/HighlightCombinatorElement.java
index 036e244..cafb8f4 100644
--- a/src/main/java/de/ids_mannheim/korap/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/match/HighlightCombinatorElement.java
@@ -76,6 +76,8 @@
};
sb.append('>');
}
+
+ // Highlight - < 256
else {
// Get the first free level slot
byte pos;
diff --git a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
index d775bcc..69cc84f 100644
--- a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
+++ b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
@@ -14,8 +14,8 @@
import de.ids_mannheim.korap.util.QueryException;
-
-import static de.ids_mannheim.korap.Test.*;
+import static de.ids_mannheim.korap.TestSimple.*;
+//import static de.ids_mannheim.korap.Test.*;
import static org.junit.Assert.*;
import org.junit.Test;
@@ -263,4 +263,71 @@
assertEquals("[{3:{1:a}}] ...", kr.getMatch(8).getSnippetBrackets());
assertEquals("... [{3:{2:a}}]", kr.getMatch(9).getSnippetBrackets());
};
+
+
+ @Test
+ public void highlightGreaterClassBug () throws IOException, QueryException {
+
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001", "00002"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+
+ // 15
+ String json = getString(getClass().getResource("/queries/bugs/greater_highlights_15.jsonld").getFile());
+
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getQuery(),"{15: tokens:s:Alphabet}");
+ assertEquals(kr.totalResults(),7);
+ assertEquals(kr.getStartIndex(),0);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [{15:Alphabet}] stammende Urform des Buchstaben ist wahrscheinlich ...");
+ assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><em class=\"class-15 level-0\">Alphabet</em></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
+
+
+ json = getString(getClass().getResource("/queries/bugs/greater_highlights_16.jsonld").getFile());
+
+ // 16
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getQuery(),"{16: tokens:s:Alphabet}");
+ assertEquals(kr.totalResults(),7);
+ assertEquals(kr.getStartIndex(),0);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [{16:Alphabet}] stammende Urform des Buchstaben ist wahrscheinlich ...");
+ assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><em class=\"class-16 level-0\">Alphabet</em></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
+
+ // 127
+ json = getString(getClass().getResource("/queries/bugs/greater_highlights_127.jsonld").getFile());
+
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getQuery(),"{127: tokens:s:Alphabet}");
+ assertEquals(kr.totalResults(),7);
+ assertEquals(kr.getStartIndex(),0);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [{127:Alphabet}] stammende Urform des Buchstaben ist wahrscheinlich ...");
+ assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><em class=\"class-127 level-0\">Alphabet</em></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
+
+ // 255
+ json = getString(getClass().getResource("/queries/bugs/greater_highlights_255.jsonld").getFile());
+
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getQuery(),"{255: tokens:s:Alphabet}");
+ assertEquals(kr.totalResults(),7);
+ assertEquals(kr.getStartIndex(),0);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [Alphabet] stammende Urform des Buchstaben ist wahrscheinlich ...");
+ assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\">Alphabet</span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
+
+ // 300
+ json = getString(getClass().getResource("/queries/bugs/greater_highlights_300.jsonld").getFile());
+
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getErrstr(),"Class numbers limited to 255");
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index c28a74f..36632e6 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -1056,19 +1056,4 @@
assertEquals((long) 1, map.get("#__music:###:singing"));
assertEquals(11, map.size());
};
-
- public static String getString (String path) {
- StringBuilder contentBuilder = new StringBuilder();
- try {
- BufferedReader in = new BufferedReader(new FileReader(path));
- String str;
- while ((str = in.readLine()) != null) {
- contentBuilder.append(str);
- };
- in.close();
- } catch (IOException e) {
- fail(e.getMessage());
- }
- return contentBuilder.toString();
- };
};
diff --git a/src/test/resources/queries/bugs/greater_highlights_127.jsonld b/src/test/resources/queries/bugs/greater_highlights_127.jsonld
new file mode 100644
index 0000000..cac72db
--- /dev/null
+++ b/src/test/resources/queries/bugs/greater_highlights_127.jsonld
@@ -0,0 +1,20 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type" : "korap:group",
+ "class" : 127,
+ "classOut" : 127,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Alphabet",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ }
+}
diff --git a/src/test/resources/queries/bugs/greater_highlights_15.jsonld b/src/test/resources/queries/bugs/greater_highlights_15.jsonld
new file mode 100644
index 0000000..c9f3b93
--- /dev/null
+++ b/src/test/resources/queries/bugs/greater_highlights_15.jsonld
@@ -0,0 +1,20 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type" : "korap:group",
+ "class" : 15,
+ "classOut" : 15,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Alphabet",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ }
+}
diff --git a/src/test/resources/queries/bugs/greater_highlights_16.jsonld b/src/test/resources/queries/bugs/greater_highlights_16.jsonld
new file mode 100644
index 0000000..7f429f7
--- /dev/null
+++ b/src/test/resources/queries/bugs/greater_highlights_16.jsonld
@@ -0,0 +1,20 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type" : "korap:group",
+ "class" : 16,
+ "classOut" : 16,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Alphabet",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ }
+}
diff --git a/src/test/resources/queries/bugs/greater_highlights_255.jsonld b/src/test/resources/queries/bugs/greater_highlights_255.jsonld
new file mode 100644
index 0000000..43b0c80
--- /dev/null
+++ b/src/test/resources/queries/bugs/greater_highlights_255.jsonld
@@ -0,0 +1,20 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type" : "korap:group",
+ "class" : 255,
+ "classOut" : 255,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Alphabet",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ }
+}
diff --git a/src/test/resources/queries/bugs/greater_highlights_300.jsonld b/src/test/resources/queries/bugs/greater_highlights_300.jsonld
new file mode 100644
index 0000000..fc1a994
--- /dev/null
+++ b/src/test/resources/queries/bugs/greater_highlights_300.jsonld
@@ -0,0 +1,20 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type" : "korap:group",
+ "class" : 300,
+ "classOut" : 300,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Alphabet",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ }
+}