Minor fixes and a documented bug in submatches (probably an index bug though)
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceExclusionSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceExclusionSpan.java
index 520ff75..b8fc20f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceExclusionSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceExclusionSpan.java
@@ -35,7 +35,9 @@
private int minDistance, maxDistance;
private int firstSpanPostion;
-
+
+ public static final boolean DEBUG = false;
+
public ElementDistanceExclusionSpan(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts, boolean isOrdered)
@@ -227,9 +229,10 @@
matchPayload.addAll(match.getPayloads());
setMatchFirstSpan(match);
-
- log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
- matchEndPosition);
+
+ if (DEBUG)
+ log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
+ matchEndPosition);
}
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
index 94a528b..a12b565 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
@@ -10,12 +10,12 @@
import org.slf4j.LoggerFactory;
public class KorapTermSpan extends KorapSpan {
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
public ByteBuffer payload;
private final Logger log = LoggerFactory.getLogger(ElementSpans.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
@Override
public Object clone() {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
index 0a4f490..1c831ab 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
@@ -29,9 +29,12 @@
private SpanQuery highlight;
private final Logger log = LoggerFactory.getLogger(MatchModifyClassSpans.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
private int start = -1, end;
- private int tempStart, tempEnd = 0;
+ private int tempStart = 0, tempEnd = 0;
public MatchModifyClassSpans (SpanQuery highlight,
@@ -72,7 +75,8 @@
// inherit javadocs
@Override
public boolean next() throws IOException {
- log.trace("||> Forward next");
+ if (DEBUG)
+ log.trace("||> Forward next");
highlightedPayload.clear();
@@ -87,6 +91,7 @@
if (spans.isPayloadAvailable()) {
end = 0;
+ // Iterate over all payloads and find the maximum span per class
for (byte[] payload : spans.getPayload()) {
bb.clear();
bb.put(payload);
@@ -99,7 +104,8 @@
tempStart = bb.getInt();
tempEnd = bb.getInt();
- log.trace("Found matching class {}-{}", tempStart, tempEnd);
+ if (DEBUG)
+ log.trace("Found matching class {}-{}", tempStart, tempEnd);
if (start == -1)
start = tempStart;
@@ -109,14 +115,19 @@
if (tempEnd > end)
end = tempEnd;
}
+
+ // Doesn't mark an important payload - but should be kept
else {
- log.trace("Remember old payload {}", payload);
+ if (DEBUG)
+ log.trace("Remember old payload {}", payload);
highlightedPayload.add(payload);
};
};
- log.trace("All payload processed, now clean up");
+ if (DEBUG)
+ log.trace("All payload processed, now clean up");
+ // We have a payload found that is a class for modification!
if (start != -1) {
int i = highlightedPayload.size() - 1;
@@ -126,12 +137,15 @@
bb.rewind();
if (bb.getInt() < start || bb.getInt() > end) {
bb.rewind();
- log.trace("Remove highlight {} with {}-{} for {}-{}", i, bb.getInt(), bb.getInt(), start, end);
+ if (DEBUG)
+ log.trace("Remove highlight {} with {}-{} for {}-{}", i, bb.getInt(), bb.getInt(), start, end);
highlightedPayload.remove(i);
continue;
};
- bb.rewind();
- log.trace("Highlight {} will stay with {}-{} for {}-{}", i, bb.getInt(), bb.getInt(), start, end);
+ if (DEBUG) {
+ bb.rewind();
+ log.trace("Highlight {} will stay with {}-{} for {}-{}", i, bb.getInt(), bb.getInt(), start, end);
+ };
};
/*
* Todo: SPLIT
@@ -146,7 +160,7 @@
start = spans.start();
end = spans.end();
}
- else {
+ else if (DEBUG) {
log.trace("Start to shrink to {} - {} class: {}",
start, end, number);
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
index b15e94d..51ec7bd 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
@@ -3,6 +3,9 @@
import java.io.IOException;
import java.util.Map;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@@ -25,7 +28,11 @@
* @author margaretha
* */
public class MultipleDistanceSpans extends DistanceSpans{
-
+
+ private final static Logger log = LoggerFactory.getLogger(MultipleDistanceSpans.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
private DistanceSpans x,y;
private boolean isOrdered;
@@ -126,8 +133,9 @@
setMatchFirstSpan(span.getMatchFirstSpan());
if (!exclusion) setMatchSecondSpan(span.getMatchSecondSpan());
- log.trace("doc# {}, start {}, end {}",matchDocNumber,
- matchStartPosition,matchEndPosition);
+ if (DEBUG)
+ log.trace("doc# {}, start {}, end {}",matchDocNumber,
+ matchStartPosition,matchEndPosition);
}
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/OrderedDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/OrderedDistanceSpans.java
index 1fb9474..1dd1a04 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/OrderedDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/OrderedDistanceSpans.java
@@ -17,6 +17,8 @@
* */
public abstract class OrderedDistanceSpans extends DistanceSpans {
+ public static final boolean DEBUG = false;
+
protected boolean hasMoreFirstSpans;
protected int minDistance,maxDistance;
@@ -106,9 +108,10 @@
matchPayload.addAll(secondSpans.getPayload());
}
}
-
- log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
- matchEndPosition);
+
+ if (DEBUG)
+ log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
+ matchEndPosition);
}
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
index 923debc..abd33e9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
@@ -21,7 +21,10 @@
private long matchCost;
private List<CandidateSpan> matchList;
private Logger log = LoggerFactory.getLogger(RepetitionSpans.class);
-
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+
public RepetitionSpans(SpanRepetitionQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts)
@@ -102,8 +105,9 @@
if (collectPayloads && candidateSpan.getPayloads() != null) {
matchPayload.addAll(candidateSpan.getPayloads());
}
- log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
- matchEndPosition);
+ if (DEBUG)
+ log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
+ matchEndPosition);
}
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
index 843fe9d..7934f34 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
@@ -29,7 +29,10 @@
private long matchCost;
private int matchListSpanNum;
protected int currentDocNum;
-
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
public UnorderedDistanceSpans(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
@@ -95,6 +98,7 @@
if (currentFirstSpan.getEnd() < currentSecondSpan.getEnd() ||
isLastCandidateSmaller(currentFirstSpan, currentSecondSpan)){
+ if (DEBUG)
log.trace("current target: "+firstSpanList.get(0).getStart() +" "+firstSpanList.get(0).getEnd());
// System.out.println("candidates:");
// for (CandidateSpan cs: secondSpanList) {
@@ -107,6 +111,7 @@
updateList(firstSpanList);
}
else {
+ if (DEBUG)
log.trace("current target: "+secondSpanList.get(0).getStart() +" "+secondSpanList.get(0).getEnd());
// System.out.println("candidates:");
// for (CandidateSpan cs: firstSpanList) {
@@ -120,13 +125,17 @@
}
}
else if (firstSpanList.isEmpty()){
+ if (DEBUG) {
log.trace("current target: "+secondSpanList.get(0).getStart() +" "+secondSpanList.get(0).getEnd());
log.trace("candidates: empty");
+ };
updateList(secondSpanList);
}
- else{
+ else{
+ if (DEBUG) {
log.trace("current target: "+firstSpanList.get(0).getStart() +" "+firstSpanList.get(0).getEnd());
log.trace("candidates: empty");
+ };
updateList(firstSpanList);
}
}
@@ -200,10 +209,12 @@
if (matchListSpanNum == 1)
setMatchFirstSpan(cs.getChildSpan());
else setMatchSecondSpan(cs.getChildSpan());
-
- log.trace("Match doc#={} start={} end={}",matchDocNumber,matchStartPosition,matchEndPosition);
- log.trace("firstspan "+getMatchFirstSpan().getStart()+" "+ getMatchFirstSpan().getEnd());
- log.trace("secondspan "+getMatchSecondSpan().getStart()+" "+ getMatchSecondSpan().getEnd());
+
+ if (DEBUG) {
+ log.trace("Match doc#={} start={} end={}",matchDocNumber,matchStartPosition,matchEndPosition);
+ log.trace("firstspan "+getMatchFirstSpan().getStart()+" "+ getMatchFirstSpan().getEnd());
+ log.trace("secondspan "+getMatchSecondSpan().getStart()+" "+ getMatchSecondSpan().getEnd());
+ };
}
@Override
diff --git a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
index 4d8c191..8bcc0eb 100644
--- a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
+++ b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
@@ -206,10 +206,30 @@
System.out.println("It took " + seconds + " seconds with classes");
+ t1 = 0;
+ t2 = 0;
+ // With submatch
+ json = getString(getClass().getResource("/queries/benchmark5-submatch.jsonld").getFile());
+
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
+
+ seconds = (double)(t2-t1) / 1000000000.0;
+
+ System.out.println("It took " + seconds + " seconds with submatches");
+
+ /** HERE IS A BUG! */
+
+ System.err.println(kr.toJSON());
+
+
// System.err.println(kr.toJSON());
- System.err.println(kr.getMatch(3).getSnippetBrackets());
+ // System.err.println(kr.getMatch(3).getSnippetBrackets());
// 2000 rounds:
@@ -228,6 +248,13 @@
// It took 10.365989238 seconds without classes
// It took 13.833405885 seconds with classes
+ // It took 15.368675425 seconds without classes
+ // It took 18.347603186 seconds with classes
+ // It took 15.941057294 seconds with submatches
+
+ // It took 15.241253549 seconds without classes
+ // It took 17.30375624 seconds with classes
+ // It took 15.367171254 seconds with submatches
};
diff --git a/src/test/resources/queries/benchmark5-submatch.jsonld b/src/test/resources/queries/benchmark5-submatch.jsonld
new file mode 100644
index 0000000..a9cad71
--- /dev/null
+++ b/src/test/resources/queries/benchmark5-submatch.jsonld
@@ -0,0 +1,61 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "korap:group",
+ "operation": "operation:sequence",
+ "operands": [
+ {
+ "@type": "korap:token",
+ "wrap" : {
+ "@type": "korap:term",
+ "layer": "orth",
+ "key" : "der"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:submatch",
+ "classRef" : [2],
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:class",
+ "class" : 1,
+ "operands" : [
+ {
+ "@type": "korap:group",
+ "operation": "operation:sequence",
+ "operands" : [
+ {
+ "@type": "korap:token",
+ "wrap" : {
+ "@type": "korap:term",
+ "layer": "orth",
+ "key" : "Mann"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:class",
+ "class" : 2,
+ "operands" : [
+ {
+ "@type": "korap:token",
+ "wrap" : {
+ "@type": "korap:term",
+ "layer": "orth",
+ "key" : "und"
+ }
+ }
+ ]
+ }
+
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+}