Fixed escaping in snippets (HTML and brackets)
Change-Id: I51b4b44998e6bfb0750e716c82d57ea5a820c741
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index 55837b8..42f1c08 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -58,7 +58,7 @@
else if (this.number < -1) {
sb.append("<span xml:id=\"")
- .append(match.getPosID(match.getClassID(this.number)))
+ .append(escapeHTML(match.getPosID(match.getClassID(this.number))))
.append("\">");
}
@@ -66,14 +66,14 @@
sb.append("<span ");
if (this.number < 2048) {
sb.append("title=\"")
- .append(match.getAnnotationID(this.number))
+ .append(escapeHTML(match.getAnnotationID(this.number)))
.append('"');
}
else {
Relation rel = match.getRelationID(this.number);
- sb.append("xlink:title=\"").append(rel.annotation)
+ sb.append("xlink:title=\"").append(escapeHTML(rel.annotation))
.append("\" xlink:type=\"simple\" xlink:href=\"#")
- .append(match.getPosID(rel.ref)).append('"');
+ .append(escapeHTML(match.getPosID(rel.ref))).append('"');
};
sb.append('>');
}
@@ -146,6 +146,7 @@
else if (this.number != 0)
sb.append(this.number).append(':');
};
+
return sb.toString();
}
else if (this.type == 2) {
@@ -153,6 +154,6 @@
return "]";
return "}";
};
- return this.characters;
+ return escapeBrackets(this.characters);
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
index b41cd2e..10f0303 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
@@ -8,9 +8,10 @@
private ArrayList<int[]> pos = new ArrayList<>(8);
- // TODO: "contains" is necessary for a compatibility bug in Kustvakt
- Pattern idRegex = Pattern.compile("^(?:match-|contains-)"
- + "(?:([^!]+?)[!\\.])?" + "([^!]+)-p([0-9]+)-([0-9]+)"
+ // Remember: "contains" is necessary for a compatibility bug in Kustvakt
+ Pattern idRegex = Pattern.compile("^(?:match-|contains-)" +
+ "(?:([^!]+?)[!\\.])?" +
+ "([^!]+)[-/]p([0-9]+)-([0-9]+)"
+ "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)" + "(?:c.+?)?$");
Pattern posRegex = Pattern.compile("\\(([0-9]+)\\)([0-9]+)-([0-9]+)");
@@ -18,6 +19,12 @@
public MatchIdentifier () {};
+ /**
+ * Construct a new MatchIdentifier.
+ * Due to lots of internal changes and compatibility reasons,
+ * the structure of the identifier has changed a lot.
+ * The constructor supports different legacy structures for test compatibility.
+ */
public MatchIdentifier (String id) {
// Replace for legacy reasons with incompatible versions of Kustvakt
@@ -26,17 +33,25 @@
Matcher matcher = idRegex.matcher(id);
if (matcher.matches()) {
+ // textSigle is provided directly
+ if (matcher.group(1) == null && id.contains("/")) {
+ // Todo: potentially use UID!
+ this.setTextSigle(matcher.group(2));
+ }
+
// <legacy>
- // and test compatibility
- if (id.contains("!") || !id.contains("_")) {
+ else if (id.contains("!") || !id.contains("_")) {
this.setCorpusID(matcher.group(1));
this.setDocID(matcher.group(2));
}
- // </legacy>
+ // </legacy>
+
+ // textSigle is provided indirectly
+ // <legacy>
else {
- // this.getCorpusID() + "." + this.getDocID()
this.setTextSigle(matcher.group(1) + '.' + matcher.group(2));
};
+ // </legacy>
this.setStartPos(Integer.parseInt(matcher.group(3)));
this.setEndPos(Integer.parseInt(matcher.group(4)));
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillString.java b/src/main/java/de/ids_mannheim/korap/util/KrillString.java
index cc1f357..8a11d33 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillString.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillString.java
@@ -58,6 +58,17 @@
/**
+ * Escape Bracket relevant characters.
+ *
+ * @param text
+ * The string to escape.
+ * @return The secured string.
+ */
+ public static String escapeBrackets (String text) {
+ return text.replaceAll("([\\{\\}\\[\\]\\\\])", "\\\\$1");
+ };
+
+ /**
* Add surrounding double quotes.
*
* @param text