Bughunting in highlighting
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java
index a045e5f..16730ce 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanClassQuery.java
@@ -65,7 +65,8 @@
@Override
public Spans getSpans (final AtomicReaderContext context,
- Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts) throws IOException {
return (Spans) new ClassSpans(
this.highlight,
context,
@@ -105,7 +106,7 @@
/** Returns true iff <code>o</code> is equal to this. */
@Override
- public boolean equals(Object o) {
+ public boolean equals (Object o) {
if (this == o) return true;
if (!(o instanceof SpanClassQuery)) return false;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index 1838f2e..6de3fb7 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
@@ -24,13 +24,20 @@
private Collection<byte[]> payload;
private final Spans spans;
private byte number;
- private ByteBuffer bb;
private SpanQuery highlight;
private Boolean hasmorespans = false;
- private final Logger log = LoggerFactory.getLogger(ClassSpans.class);
+ private ByteBuffer bb = ByteBuffer.allocate(9);
- public ClassSpans (SpanQuery highlight, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, byte number) throws IOException {
+ private final static Logger log = LoggerFactory.getLogger(ClassSpans.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+ public ClassSpans (SpanQuery highlight,
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts,
+ byte number) throws IOException {
spans = highlight.getSpans(context, acceptDocs, termContexts);
this.number = number;
this.highlight = highlight;
@@ -39,18 +46,11 @@
@Override
public Collection<byte[]> getPayload() throws IOException {
- /*
- for (byte[] x: highlightedPayload) {
- ByteBuffer b = ByteBuffer.wrap(x, 0, x.length);
- log.trace(">> Get Payload: {}-{} in class {}", b.getInt(), b.getInt(), b.get());
- };
- */
return highlightedPayload;
};
@Override
public boolean isPayloadAvailable() {
- // return highlightedPayload.isEmpty() == false;
return true;
};
@@ -68,7 +68,8 @@
// inherit javadocs
@Override
public boolean next() throws IOException {
- log.trace("Forward next");
+ if (DEBUG)
+ log.trace("Forward next");
if (spans.next()) {
hasmorespans = true;
@@ -77,22 +78,34 @@
if (spans.isPayloadAvailable()) {
highlightedPayload.addAll(spans.getPayload());
- log.trace("Found payload");
+ if (DEBUG)
+ log.trace("Found payload");
};
-
- log.trace("Start to create class {} with span {} - {}",
- number,
- spans.start(),
- spans.end());
+ if (DEBUG)
+ log.trace("Start to create class {} with span {} - {}",
+ number,
+ spans.start(),
+ spans.end());
// Todo: Better allocate using a Factory!
- bb = ByteBuffer.allocate(9);
-
+ //private
+ bb.clear();
bb.putInt(spans.start()).putInt(spans.end()).put(number);
+ /*
+ if (DEBUG)
+ log.trace("Results in {} with {}", bb.toString(), bb.array());
+ */
// Add highlight information as byte after offsets
highlightedPayload.add(bb.array());
+ /*
+ if (DEBUG) {
+ bb.rewind();
+ log.trace("That was a class from {}-{} of class {}", bb.getInt(), bb.getInt(), bb.get());
+ };
+ */
+
return true;
};
hasmorespans = false;
@@ -102,6 +115,7 @@
// inherit javadocs
@Override
public boolean skipTo(int target) throws IOException {
+ highlightedPayload.clear();
if (hasmorespans && spans.doc() < target)
return spans.skipTo(target);
return false;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index a44ed49..531b927 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -27,10 +27,6 @@
*/
public class ElementSpans extends Spans {
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
-
private byte[] payloadByte = new byte[4];
private ByteBuffer bb = ByteBuffer.allocate(4);
@@ -46,6 +42,8 @@
private KorapTermSpan overflow, tempSpan;
private final static Logger log = LoggerFactory.getLogger(ElementSpans.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
/**
diff --git a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
index d177eb3..c1b2d1c 100644
--- a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
+++ b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
@@ -23,7 +23,7 @@
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
-public class TestBenchmarkElementSpans {
+public class TestBenchmarkSpans {
@Test
public void checkBenchmark1 () throws IOException {
@@ -152,6 +152,80 @@
@Test
+ public void checkBenchmarkClasses () throws IOException {
+ // [orth=Der]{1:[orth=Mann]{2:[orth=und]}}
+
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
+
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
+
+ long t1 = 0, t2 = 0;
+ // Without classes
+ String json = getString(getClass().getResource("/queries/benchmark5-ohne.jsonld").getFile());
+
+ int rounds = 2000;
+
+ KorapResult kr = new KorapResult();
+
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
+
+ double seconds = (double)(t2-t1) / 1000000000.0;
+
+ System.out.println("It took " + seconds + " seconds without classes");
+
+ t1 = 0;
+ t2 = 0;
+ // With classes
+ json = getString(getClass().getResource("/queries/benchmark5.jsonld").getFile());
+
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
+
+ seconds = (double)(t2-t1) / 1000000000.0;
+
+ System.out.println("It took " + seconds + " seconds with classes");
+
+
+ // System.err.println(kr.toJSON());
+
+ System.err.println(kr.getMatch(3).getSnippetBrackets());
+
+
+ // 2000 rounds:
+ // It took 10.872934435 seconds without classes
+ // It took 22.581117396 seconds with classes
+
+ // It took 10.703933598 seconds without classes
+ // It took 19.354674517 seconds with classes
+
+ // It took 10.939948726 seconds without classes
+ // It took 16.998470662 seconds with classes
+
+ // It took 10.900975837 seconds without classes
+ // It took 14.902590949 seconds with classes
+
+ // It took 10.365989238 seconds without classes
+ // It took 13.833405885 seconds with classes
+
+ };
+
+
+
+
+ @Test
public void checkBenchmarkIndexDocuments () throws IOException {
long t1 = 0, t2 = 0;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index 0127a65..bb2ff5f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -187,6 +187,11 @@
System.err.println(kr.toJSON());
*/
+
+ kr = ki.search(query, 0, (short) 1, true, (short) 2, false, (short) 5);
+ assertEquals("... Buchstabe des [{1:{2:lateinischen} Alphabets}] und ...", kr.match(0).getSnippetBrackets());
+
+
kr = ki.search(query, 0, (short) 50, true, (short) 2, false, (short) 5);
// System.err.println(kr.toJSON());
diff --git a/src/test/resources/queries/benchmark5-ohne.jsonld b/src/test/resources/queries/benchmark5-ohne.jsonld
new file mode 100644
index 0000000..725be7a
--- /dev/null
+++ b/src/test/resources/queries/benchmark5-ohne.jsonld
@@ -0,0 +1,33 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "korap:group",
+ "operation": "operation:sequence",
+ "operands": [
+ {
+ "@type": "korap:token",
+ "wrap" : {
+ "@type": "korap:term",
+ "layer": "orth",
+ "key" : "der"
+ }
+ },
+ {
+ "@type": "korap:token",
+ "wrap" : {
+ "@type": "korap:term",
+ "layer": "orth",
+ "key" : "Mann"
+ }
+ },
+ {
+ "@type": "korap:token",
+ "wrap" : {
+ "@type": "korap:term",
+ "layer": "orth",
+ "key" : "und"
+ }
+ }
+ ]
+ }
+}
diff --git a/src/test/resources/queries/benchmark5.jsonld b/src/test/resources/queries/benchmark5.jsonld
new file mode 100644
index 0000000..332d23f
--- /dev/null
+++ b/src/test/resources/queries/benchmark5.jsonld
@@ -0,0 +1,54 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "korap:group",
+ "operation": "operation:sequence",
+ "operands": [
+ {
+ "@type": "korap:token",
+ "wrap" : {
+ "@type": "korap:term",
+ "layer": "orth",
+ "key" : "der"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:class",
+ "class" : 1,
+ "operands" : [
+ {
+ "@type": "korap:group",
+ "operation": "operation:sequence",
+ "operands" : [
+ {
+ "@type": "korap:token",
+ "wrap" : {
+ "@type": "korap:term",
+ "layer": "orth",
+ "key" : "Mann"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:class",
+ "class" : 2,
+ "operands" : [
+ {
+ "@type": "korap:token",
+ "wrap" : {
+ "@type": "korap:term",
+ "layer": "orth",
+ "key" : "und"
+ }
+ }
+ ]
+ }
+
+ ]
+ }
+ ]
+ }
+ ]
+ }
+}