Fix duplicate xml:id in relation serialization by introducing continuation elements (aka joins)
Change-Id: I4b1144e3ece1c2e2ae1212412f2fac10c8d1b2ad
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index ba506dd..b3f045d 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -1334,6 +1334,10 @@
StringBuilder sb = new StringBuilder();
StringBuilder rightContext = new StringBuilder();
+ // Remember ids already defined to
+ // have joined elements
+ HashSet<String> joins = new HashSet<>(100);
+
// Snippet stack sizes
short start = (short) 0;
short end = this.snippetArray.size();
@@ -1358,7 +1362,7 @@
// First element is textual
if (elem.type == 0) {
- sb.append(elem.toHTML(this, level, levelCache));
+ sb.append(elem.toHTML(this, level, levelCache, joins));
// Move start position
start++;
};
@@ -1372,7 +1376,9 @@
// Last element is textual
if (elem != null && elem.type == 0) {
- rightContext.append(elem.toHTML(this, level, levelCache));
+ rightContext.append(
+ elem.toHTML(this, level, levelCache, joins)
+ );
// decrement end
end--;
@@ -1391,7 +1397,9 @@
elem = this.snippetArray.get(i);
// UNTESTED
if (elem != null) {
- String elemString = elem.toHTML(this, level, levelCache);
+ String elemString = elem.toHTML(
+ this, level, levelCache, joins
+ );
if (DEBUG) {
log.trace("Add node {}", elemString);
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
index 0d22c0c..346a259 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
@@ -99,7 +99,7 @@
// Just some debug information
if (DEBUG) {
StringBuilder sb = new StringBuilder(
- "Stack for checking with class ");
+ "Stack for checking with number ");
sb.append(number).append(" is ");
for (int s : this.balanceStack) {
sb.append('[').append(s).append(']');
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index a74ba24..2f8e274 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -58,7 +58,7 @@
// Return html fragment for this combinator element
- public String toHTML (Match match, FixedBitSet level, byte[] levelCache) {
+public String toHTML (Match match, FixedBitSet level, byte[] levelCache, HashSet joins) {
// Opening
if (this.type == 1) {
@@ -71,10 +71,26 @@
// This is a relation target
else if (this.number < -1) {
- sb.append("<span xml:id=\"")
- .append(escapeHTML(
- match.getPosID(match.getClassID(this.number))))
- .append("\">");
+
+ // Create id
+ String id = escapeHTML(
+ match.getPosID(match.getClassID(this.number))
+ );
+
+ // ID already in use - create join
+ if (joins.contains(id)) {
+ sb.append("<span xlink:type=\"join\" xlink:href=\"#")
+ .append(id)
+ .append("\">");
+ }
+
+ // Not yet in use - create
+ else {
+ sb.append("<span xml:id=\"")
+ .append(id)
+ .append("\">");
+ joins.add(id);
+ };
}
// This is an annotation
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 11f3d4a..8c4956e 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -348,9 +348,6 @@
Match km = ki.getMatchInfo("match-WDD17/982/72848-p15844-15846", "tokens",
"lwc", "d", true, true, true);
- // TODO:
- // This test is broken - it should not introduce
- // IDs multiple times
String snippet = km.getSnippetHTML();
assertEquals(
"SnippetBrackets (with Spans)",
@@ -370,7 +367,7 @@
"<span xlink:title=\"lwc/d:--\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15839-15840\">AL</span>"+
"</span>"+
"</span>"+
- "<span xml:id=\"token-WDD17/982/72848-p15839-15840\">"+
+ "<span xlink:type=\"join\" xlink:href=\"#token-WDD17/982/72848-p15839-15840\">"+
":"+
"<span xml:id=\"token-WDD17/982/72848-p15840-15846\">"+
"<span xml:id=\"token-WDD17/982/72848-p15840\">"+
@@ -378,7 +375,8 @@
"</span>"+
"</span>"+
"</span>"+
- "<span xml:id=\"token-WDD17/982/72848-p15840-15846\">"+
+ // "<span xml:id=\"token-WDD17/982/72848-p15840-15846\">"+
+ "<span xlink:type=\"join\" xlink:href=\"#token-WDD17/982/72848-p15840-15846\">"+
" "+
"<span xlink:title=\"lwc/d:NK\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15842\">den</span>"+
" "+