Fixed focus bug with element payloads
Change-Id: Iee16846b9e8619d899bbd4a04321712bc11f7d28
diff --git a/misc/payloads.md b/misc/payloads.md
index 65ac2bf..a63f492 100644
--- a/misc/payloads.md
+++ b/misc/payloads.md
@@ -71,8 +71,8 @@
65 Milestone (with optional TUI and certainty)
### Relation payloads
-Each relation are indexed with two instances for both directions.
-he direction of a relation is determined by the following symbols:
+Each relation is indexed with two instances for both directions.
+The direction of a relation is determined by the following symbols:
> source to target
< target to source
@@ -84,7 +84,7 @@
* the target token/span positions for \< relation.
Relation payloads are varied based on the types of their left and
-ight parts, which again can be either a source or a target of the
+right parts, which again can be either a source or a target of the
relation.
* If the left part of a relation is an element, the end position
diff --git a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
index e963a22..684b020 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
@@ -17,6 +17,8 @@
public static final boolean DEBUG = false;
+ // TODO: Support various terms - including relations!
+
private String foundry, layer, value, term, type, annotation;
// type can be "term", "pos", "span", "rel-src", "rel-target"
@@ -29,7 +31,7 @@
private byte depth = (byte) 0;
private Pattern prefixRegex = Pattern
- .compile("(?:([^/]+)/)?([^:/]+)(?::(.+?))?");
+ .compile("(?:([^/]+)/)?([^:/]+)(?::(.+?))?");
private Matcher matcher;
@@ -50,47 +52,48 @@
int lastPos = this.payload.position();
this.payload.rewind();
+ // TODO: Use PTI!
switch (tterm.charAt(0)) {
- case '<':
- // "<>:mate/l:..."
- if (tterm.charAt(1) == '>') {
- // span
- this.type = "span";
- tterm = tterm.substring(3);
- ttype = 2;
- }
- // rel-target
- else {
- this.type = "relTarget";
- tterm = tterm.substring(2);
- ttype = 3;
- }
- ;
- break;
-
- case '>':
- // rel-src
- this.type = "relSrc";
+ case '<':
+ // "<>:mate/l:..."
+ if (tterm.charAt(1) == '>') {
+ // span
+ this.type = "span";
+ tterm = tterm.substring(3);
+ ttype = 2;
+ }
+ // rel-target
+ else {
+ this.type = "relTarget";
tterm = tterm.substring(2);
ttype = 3;
- break;
+ }
+ ;
+ break;
- case '_':
- // pos
- this.type = "pos";
- ttype = 1;
- tterm = tterm.substring(1);
- break;
+ case '>':
+ // rel-src
+ this.type = "relSrc";
+ tterm = tterm.substring(2);
+ ttype = 3;
+ break;
- default:
- // term
- this.type = "term";
+ case '_':
+ // pos
+ this.type = "pos";
+ ttype = 1;
+ tterm = tterm.substring(1);
+ break;
+
+ default:
+ // term
+ this.type = "term";
};
// Analyze term value
if (ttype != 1) {
- this.payload.get(); // Ignore PTI
+ this.payload.get(); // Ignore PTI - temporary!!!
if (DEBUG)
log.trace("Check {} for {}", tterm, prefixRegex.toString());
@@ -121,12 +124,16 @@
// for spans and relations
if (ttype > 1) {
- // Unsure if this is correct
- this.endPos = this.payload.getInt() - 1;
+ if (this.type.equals("relTarget")) {
+ this.endPos = this.startPos;
+ this.startPos = this.payload.getInt() - 1;
+ }
+ else {
+ this.endPos = this.payload.getInt() - 1;
+ };
};
// Ignore link id for the moment
-
if (ttype == 2 && this.payload.position() < lastPos) {
this.depth = this.payload.get();
};
@@ -220,7 +227,7 @@
@Override
- public int compareTo (TermInfo obj) {
+ public int compareTo (TermInfo obj) {
this.analyze();
obj.analyze();
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
index 84fe076..f5caa29 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
@@ -167,7 +167,7 @@
// this may be problematic for other calculated payloads!
if ((!matchTemporaryClass && payload.length == 10)
- || (matchTemporaryClass && payload.length == 11)) {
+ || (matchTemporaryClass && payload.length == 11)) {
if (payload[0] == 0) {
if (classNumbers.contains(payload[9])) {
@@ -190,6 +190,11 @@
}
}
+ // Remove span elements
+ else if (payload[0] == (byte) 64) {
+ continue;
+ };
+
if (//payload.length == 8 ||
(removeTemporaryClasses && payload.length == 11)) {
continue;
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 1f2e338..9bbef70 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -228,7 +228,7 @@
for (byte[] b : payload) {
if (DEBUG)
- log.trace("Found a payload with length {}", b.length);
+ log.trace("Found a payload of pti {}", b[0]);
// Todo element searches!
@@ -244,14 +244,20 @@
if (DEBUG)
log.trace(
"Have a highlight of class {} in {}-{} inside of {}-{}",
- number, start, end, this.getStartPos(),
+ number, start, end,
+ this.getStartPos(),
this.getEndPos());
// Ignore classes out of match range and set by the system
- // TODO: This may be decidable by PT!!
+ // TODO: This may be decidable by PTI!
if ((number & 0xFF) <= 128 && start >= this.getStartPos()
- && end <= this.getEndPos())
+ && end <= this.getEndPos()) {
+ log.trace("Add highlight of class {}!", number);
this.addHighlight(start, end - 1, number);
+ }
+ else if (DEBUG) {
+ log.trace("Don't add highlight of class {}!", number);
+ };
}
// Element payload for match!
@@ -259,8 +265,9 @@
else if (b[0] == (byte) 64) {
bb.put(b);
+ bb.position(1); // Ignore pti
-
+ // Wasn't set before
if (this.potentialStartPosChar == -1) {
this.potentialStartPosChar = bb.getInt(1);
}
@@ -905,11 +912,10 @@
if (processed)
return true;
-
// Relevant details are missing
if (this.positionsToOffset == null || this.localDocID == -1) {
log.warn("You have to define "
- + "positionsToOffset and localDocID first " + "before");
+ + "positionsToOffset and localDocID first before");
return false;
};
@@ -1140,10 +1146,8 @@
if (this.startMore)
sb.append("... ");
- for (HighlightCombinatorElement hce : this.snippetArray.list()) {
-
+ for (HighlightCombinatorElement hce : this.snippetArray.list())
sb.append(hce.toBrackets(this));
- };
if (this.endMore)
sb.append(" ...");
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
index 7ca253f..488f3e1 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
@@ -415,9 +415,6 @@
assertEquals(kr.getSerialQuery(),
"focus(3: spanContain({2: <base:s />}, {3: base:s:b}))");
- System.err.println(kr.getMatch(0).getSnippetBrackets());
-
-
assertEquals(kr.getMatch(0).getSnippetBrackets(), "a[{3:b}]cabcab ...");
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java b/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java
index 0f281ea..98d9948 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java
@@ -21,10 +21,11 @@
byte[] b = new byte[16];
ByteBuffer bb = ByteBuffer.allocate(16);
+ bb.put((byte) 64); // span PTI
bb.putInt(20); // startOffset
bb.putInt(25); // endOffset
bb.putInt(7); // endPos
- bb.put((byte) 4);
+ bb.put((byte) 4); // depth
TermInfo term = new TermInfo("<>:mate/p:NN", 4, bb).analyze();
assertEquals("type", term.getType(), "span");
@@ -50,7 +51,10 @@
assertEquals("depth", term.getDepth(), 0);
bb.clear();
- bb.putInt(17).put((byte) 2);
+ bb.put((byte) 32); // term-to-term
+ bb.putInt(17); // right-part-token-position
+ bb.putShort((short) 1); // left-part-tui
+ bb.putShort((short) 1); // right-part-tui
term = new TermInfo(">:xip/p:ADJ", 11, bb).analyze();
assertEquals("type", term.getType(), "relSrc");
assertEquals("value", term.getValue(), "ADJ");
@@ -63,14 +67,15 @@
assertEquals("depth", term.getDepth(), 0);
bb.clear();
+ bb.put((byte) 32); // term-to-term
bb.putInt(24);
- term = new TermInfo("<:xip/m:number:pl", 20, bb).analyze();
+ term = new TermInfo("<:xip/m:number:pl", 40, bb).analyze();
assertEquals("type", term.getType(), "relTarget");
assertEquals("value", term.getValue(), "number:pl");
assertEquals("foundry", term.getFoundry(), "xip");
assertEquals("layer", term.getLayer(), "m");
- assertEquals("startPos", term.getStartPos(), 20);
- assertEquals("endPos", term.getEndPos(), 23);
+ assertEquals("startPos", term.getStartPos(), 23);
+ assertEquals("endPos", term.getEndPos(), 40);
assertEquals("startChar", term.getStartChar(), -1);
assertEquals("endChar", term.getEndChar(), -1);
assertEquals("depth", term.getDepth(), 0);
@@ -89,6 +94,7 @@
assertEquals("depth", term.getDepth(), 0);
bb.clear();
+ bb.put((byte) 64); // span PTI
bb.putInt(20); // startOffset
bb.putInt(25); // endOffset
bb.putInt(24); // endPos
@@ -104,6 +110,7 @@
assertEquals("depth", term.getDepth(), 0);
bb.clear();
+ bb.put((byte) 64); // span PTI
bb.putInt(20); // startOffset
bb.putInt(25); // endOffset
bb.putInt(24); // endPos
@@ -118,4 +125,4 @@
assertEquals("endChar", term.getEndChar(), 25);
assertEquals("depth", term.getDepth(), 0);
};
-};
\ No newline at end of file
+};