Fixed token identifier serialization for new corpora
Change-Id: I7abe855645b08a2cc2ce05f47e6f4ef2a89fe53d
diff --git a/Changes b/Changes
index ad32006..263becf 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+0.55.9 2017-09-12
+ - [bugfix] Serialize token identifier correctly for
+ new corpora with text siglen (diewald)
+
0.55.8 2017-09-05
- [feature] Retrieve and display pagebreaks (diewald)
- [bugfix] Handle invalid dates in input (diewald)
diff --git a/pom.xml b/pom.xml
index 758f9fc..7204854 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.55.8</version>
+ <version>0.55.9</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index ffc2b5a..b7f735a 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1205,12 +1205,14 @@
t.getStartChar(), t.getStartPos(),
t.getEndChar(), t.getEndPos());
- if (t.getType() == "term" || t.getType() == "span")
+ if (t.getType() == "term" || t.getType() == "span") {
match.addAnnotation(t.getStartPos(), t.getEndPos(),
t.getAnnotation());
- else if (t.getType() == "relSrc")
+ }
+ else if (t.getType() == "relSrc") {
match.addRelation(t.getStartPos(), t.getEndPos(),
t.getAnnotation());
+ };
};
break;
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 145ac6f..0d0eb63 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -427,6 +427,11 @@
* Annotation string.
*/
public void addRelation (int src, int target, String annotation) {
+
+ if (DEBUG)
+ log.trace("Add relation {}: {} and {}", annotation, src, target);
+
+
this.addHighlight(new Highlight(src, src, annotation, target));
int id = identifierNumberCounter--;
identifierNumber.put(id, target);
@@ -672,6 +677,9 @@
@JsonIgnore
public String getPosID (int pos) {
+ if (DEBUG)
+ log.trace("Retrieve the identifier for pos");
+
// Identifier already given
if (this.identifier != null)
return this.identifier;
@@ -683,10 +691,23 @@
PosIdentifier id = new PosIdentifier();
// Get prefix string corpus/doc
+ // <legacy>
id.setCorpusID(this.getCorpusID());
id.setDocID(this.getDocID());
+ // </legacy>
+ id.setTextSigle(this.getTextSigle());
id.setPos(pos);
+ if (DEBUG)
+ log.trace(
+ "The identifier is {} in {} ({}-{}) {}",
+ id.toString(),
+ this.getTextSigle(),
+ this.getCorpusID(),
+ this.getDocID(),
+ pos
+ );
+
return id.toString();
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index 4f43656..9270847 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -7,6 +7,9 @@
import java.util.*;
import java.io.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/*
Class for elements with highlighting information
*/
@@ -23,6 +26,11 @@
public String characters;
public boolean terminal = true;
+ // Logger
+ private final static Logger log = LoggerFactory.getLogger(Match.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
// Constructor for highlighting elements
public HighlightCombinatorElement (byte type, int number) {
@@ -78,6 +86,12 @@
}
else {
Relation rel = match.getRelationID(this.number);
+
+ if (DEBUG) {
+ log.trace("Annotation is a relation with id {}", this.number);
+ log.trace("Resulting in relation {}: {}", rel.annotation, rel.ref);
+ };
+
sb.append("xlink:title=\"")
.append(escapeHTML(rel.annotation))
.append("\" xlink:type=\"simple\" xlink:href=\"#")
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/PosIdentifier.java b/src/main/java/de/ids_mannheim/korap/response/match/PosIdentifier.java
index eaa8d9e..a7f7729 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/PosIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/PosIdentifier.java
@@ -5,7 +5,6 @@
public class PosIdentifier extends DocIdentifier {
private int pos;
-
public PosIdentifier () {};
@@ -21,16 +20,24 @@
public String toString () {
- if (this.docID == null)
+ if (this.textSigle == null && this.docID == null)
return null;
- StringBuilder sb = new StringBuilder("word-");
+ StringBuilder sb = new StringBuilder("token-");
+ // Get prefix string text sigle
+ if (this.textSigle != null) {
+ sb.append(this.textSigle);
+ }
// Get prefix string corpus/doc
- if (this.corpusID != null) {
- sb.append(this.corpusID).append('!');
- };
- sb.append(this.docID);
+ else {
+ // <legacy>
+ if (this.corpusID != null) {
+ sb.append(this.corpusID).append('!');
+ };
+ sb.append(this.docID);
+ // </legacy>
+ };
sb.append("-p");
sb.append(this.pos);
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 0500050..f6c6bf5 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -107,7 +107,7 @@
assertEquals(id.getCorpusID(), "c1");
assertEquals(id.getDocID(), "d1");
assertEquals(id.getPos(), 8);
- assertEquals(id.toString(), "word-c1!d1-p8");
+ assertEquals(id.toString(), "token-c1!d1-p8");
};
@@ -370,12 +370,12 @@
+ "<mark>"
+ "<span title=\"x/o:erstens\">"
+ "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" "
- + "xlink:href=\"#word-c1!d1-p3\">"
+ + "xlink:href=\"#token-c1!d1-p3\">"
+ "a" + "</span>"
+ "</span>"
+ "<span title=\"x/o:zweitens\">" + "b" + "</span>"
+ "<span title=\"x/o:drittens\">" + "c" + "</span>"
- + "<span xml:id=\"word-c1!d1-p3\">"
+ + "<span xml:id=\"token-c1!d1-p3\">"
+ "<span title=\"x/o:viertens\">" + "a" + "</span>"
+ "</span>"
+ "<span title=\"x/o:fünftens\">" + "b" + "</span>"
@@ -395,12 +395,12 @@
+ "</span>" + "<span class=\"match\">"+"<mark>"
+"<span title=\"x/o:erstens\">"
+"<span xlink:title=\"x/rel:a\" " + "xlink:type=\"simple\" "
- +"xlink:href=\"#word-c1!d1-p3\">a</span>"
+ +"xlink:href=\"#token-c1!d1-p3\">a</span>"
+"</span>"
+"<span title=\"x/o:zweitens\">b</span>"
+"<mark class=\"class-7 level-0\">"
+"<span title=\"x/o:drittens\">c</span>"
- +"<span xml:id=\"word-c1!d1-p3\">"
+ +"<span xml:id=\"token-c1!d1-p3\">"
+"<span title=\"x/o:viertens\">a</span>"
+"</span>"
+"</mark>"
@@ -491,19 +491,19 @@
+ "<span title=\"f/m:eins\">" + "<span title=\"f/y:one\">"
+ "<span title=\"it/is:1\">"
+ "<span title=\"x/o:erstens\">"
- + "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">"
+ + "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" xlink:href=\"#token-c1!d1-p3\">"
+ "a</span>" + "</span>" + "</span>"
+ "</span>" + "</span>"
+ "<span title=\"f/m:zwei\">" + "<span title=\"f/y:two\">"
+ "<span title=\"it/is:2\">"
+ "<span title=\"x/o:zweitens\">"
- + "<span xlink:title=\"x/rel:b\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">"
+ + "<span xlink:title=\"x/rel:b\" xlink:type=\"simple\" xlink:href=\"#token-c1!d1-p3\">"
+ "b</span>" + "</span>"
+ "</span>" + "</span>" + "</span>"
+ "<span title=\"f/m:drei\">" + "<span title=\"f/y:three\">"
+ "<span title=\"it/is:3\">"
+ "<span title=\"x/o:drittens\">c</span>" + "</span>"
- + "</span>" + "</span>" + "<span xml:id=\"word-c1!d1-p3\">"
+ + "</span>" + "</span>" + "<span xml:id=\"token-c1!d1-p3\">"
+ "<span title=\"f/m:vier\">" + "<span title=\"f/y:four\">"
+ "<span title=\"it/is:4\">"
+ "<span title=\"x/o:viertens\">a</span>" + "</span>"
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSampleIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSampleIndex.java
index da683a7..eb08591 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSampleIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSampleIndex.java
@@ -247,7 +247,7 @@
true);
assertEquals(km.getSnippetBrackets(), "... [[{malt/d:DET>132567:meine} {#132567:{malt/d:ATTR>132567:eigne}} {malt/d:PN>132564:Erfahrung}]] ...");
- assertEquals(km.getSnippetHTML(), "... [[{malt/d:DET>132567:meine} {#132567:{malt/d:ATTR>132567:eigne}} {malt/d:PN>132564:Erfahrung}]] ...");
+ assertEquals(km.getSnippetHTML(), "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\"><mark><span xlink:title=\"malt/d:DET\" xlink:type=\"simple\" xlink:href=\"#token-GOE/AGD/00000-p132567\">meine</span> <span xml:id=\"token-GOE/AGD/00000-p132567\"><span xlink:title=\"malt/d:ATTR\" xlink:type=\"simple\" xlink:href=\"#token-GOE/AGD/00000-p132567\">eigne</span></span> <span xlink:title=\"malt/d:PN\" xlink:type=\"simple\" xlink:href=\"#token-GOE/AGD/00000-p132564\">Erfahrung</span></mark></span><span class=\"context-right\"><span class=\"more\"></span></span>");
}
}