Support highlighting of relations including spans
Change-Id: I14cd60d4dbf6923bc4a8908a4681a5bc4ef5ae43
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 2066932..4420b5f 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1211,8 +1211,15 @@
t.getAnnotation());
}
else if (t.getType() == "relSrc") {
- match.addRelation(t.getStartPos(), t.getEndPos(),
- t.getAnnotation());
+ // This only respects relSrc!
+ // May require more information for bidirectional relations
+ match.addRelation(
+ t.getStartPos(),
+ t.getEndPos(),
+ t.getTargetStartPos(),
+ t.getTargetEndPos(),
+ t.getAnnotation()
+ );
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
index 3f05ea8..3f132c2 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
@@ -16,7 +16,6 @@
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
-
// TODO: Support various terms - including relations!
private String foundry, layer, value, term, type, annotation;
@@ -26,7 +25,13 @@
private ByteBuffer payload;
private boolean analyzed = false;
- private int startChar = -1, endChar = -1, startPos = -1, endPos = -1;
+ private int
+ startChar = -1, // character offset for start of span
+ endChar = -1, // character offset for end of span
+ startPos = -1, // start position of source
+ endPos = -1, // end position of source
+ targetStartPos = -1, // start position of target
+ targetEndPos = -1; // end position of target
private byte depth = (byte) 0;
@@ -97,15 +102,25 @@
this.type = "term";
};
+ int pti = 0;
+
// Analyze term value
if (ttype != 1) {
- this.payload.get(); // Ignore PTI - temporary!!!
+ pti = this.payload.get(); // Ignore PTI - temporary!!!
- if (DEBUG)
- log.trace("Check {} for {}", tterm, prefixRegex.toString());
+ if (DEBUG) {
+ log.trace(
+ "Check {} with {} for {}",
+ tterm,
+ pti,
+ prefixRegex.toString()
+ );
+ };
+
matcher = prefixRegex.matcher(tterm);
- if (matcher.matches() && matcher.groupCount() == 3) {
+
+ if (matcher.matches() && matcher.groupCount() == 3) {
this.annotation = tterm;
if (matcher.group(1) != null)
this.foundry = matcher.group(1);
@@ -116,7 +131,7 @@
};
}
- // for positions
+ // for positions (aka offset tokens)
else {
this.value = tterm;
this.startChar = this.payload.getInt();
@@ -131,10 +146,66 @@
// for spans, relations and attributes
if (ttype > 1 && ttype != 4) {
+
+ // relSrc
if (this.type.equals("relTarget")) {
this.endPos = this.startPos;
this.startPos = this.payload.getInt() - 1;
}
+
+ // Token-to-token relation
+ else if (pti == 32) {
+ /*
+ * 1 byte for PTI (32),
+ * 1 integer for the right part token position,
+ * 1 short for the left-part TUI,
+ * 1 short for right-part TUI and
+ * 1 short for the relation TUI.
+ */
+ this.targetStartPos = this.payload.getInt() -1;
+ }
+
+ // Token-to-span relation
+ else if (pti == 33) {
+ /*
+ * 1 byte for PTI (33),
+ * 1 integer for the start span offset of the right part,
+ * 1 integer for the end span offset of the right part,
+ * 1 integer for the start position of the right part,
+ * 1 integer for the end position of the right part,
+ * and 0-3 TUIs as above.
+ */
+ // Ignore offsets
+ this.payload.getInt();
+ this.payload.getInt();
+
+ this.endPos = this.startPos;
+ this.targetStartPos = this.payload.getInt();
+ this.targetEndPos = this.payload.getInt();
+ }
+ else if (pti == 34) {
+ /*
+ * 1 byte for PTI (34),
+ * 1 integer for the start span offset of the left part,
+ * 1 integer for the end span offset of the left part,
+ * 1 integer for end position of the left part,
+ * 1 integer for end position of the right part, and
+ * and 0-3 TUIs as above.
+ */
+ }
+ else if (pti == 35) {
+ /*
+ * 1 byte for PTI (35),
+ * 1 integer for the start span offset of the left part,
+ * 1 integer for the end span offset of the left part,
+ * 1 integer for the start span offset of the right part,
+ * 1 integer for the end span offset of the right part,
+ * 1 integer for end position of the left part,
+ * 1 integer for the start position of the right part,
+ * 1 integer for end position of the right part,
+ * and 0-3 TUIs as above.
+ */
+ }
else {
this.endPos = this.payload.getInt() - 1;
};
@@ -145,6 +216,11 @@
this.depth = this.payload.get();
};
+ /*
+ * TODO:
+ * Analyze TUI for attributes
+ */
+
// payloads can have different meaning
analyzed = true;
return this;
@@ -185,6 +261,15 @@
return this.endPos;
};
+ public int getTargetStartPos () {
+ return this.targetStartPos;
+ };
+
+
+ public int getTargetEndPos () {
+ return this.targetEndPos;
+ };
+
public byte getDepth () {
return this.depth;
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 0d0eb63..0c89db2 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -426,16 +426,17 @@
* @param annotation
* Annotation string.
*/
- public void addRelation (int src, int target, String annotation) {
+ public void addRelation (int srcStart, int srcEnd, int targetStart, int targetEnd, String annotation) {
if (DEBUG)
- log.trace("Add relation {}: {} and {}", annotation, src, target);
+ log.trace("Add relation {}: {}-{}->{}-{}",
+ annotation, srcStart, srcEnd, targetStart, targetEnd);
- this.addHighlight(new Highlight(src, src, annotation, target));
+ this.addHighlight(new Highlight(srcStart, srcStart, annotation, targetStart));
int id = identifierNumberCounter--;
- identifierNumber.put(id, target);
- this.addHighlight(new Highlight(target, target, id));
+ identifierNumber.put(id, targetStart);
+ this.addHighlight(new Highlight(targetStart, targetStart, id));
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java b/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java
index 98d9948..766c714 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java
@@ -61,7 +61,7 @@
assertEquals("foundry", term.getFoundry(), "xip");
assertEquals("layer", term.getLayer(), "p");
assertEquals("startPos", term.getStartPos(), 11);
- assertEquals("endPos", term.getEndPos(), 16);
+ assertEquals("targetStartPos", term.getTargetStartPos(), 16);
assertEquals("startChar", term.getStartChar(), -1);
assertEquals("endChar", term.getEndChar(), -1);
assertEquals("depth", term.getDepth(), 0);