Unescape HTML entities in snippets
Change-Id: Icd1b538a67eb8e2c3f7b4a57cccfd8a4a6182e2e
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java
index 6750948..8726934 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java
@@ -8,17 +8,13 @@
public Snippet (String snippetstr) {
- String[] split = snippetstr.split("</?mark>");
- String splitleft = split[0];
- String splitmatch = split[1];
- String splitright = split[2];
- //(?i) makes the regex case insensitive.
- String splitleftr = splitleft.replaceAll("(?i)</?span[^>]*>", "");
- this.setLeft(splitleftr.trim());
- String splitmatchr = splitmatch.replaceAll("(?i)</?span[^>]*>", "");
- this.setMark(splitmatchr.trim());
- String splitrightr = splitright.replaceAll("(?i)</?span[^>]*>", "");
- this.setRight(splitrightr.trim());
+ String[] split = snippetstr
+ .replaceAll("(?i)</?span[^>]*>", "")
+ .split("</?mark>");
+
+ this.setLeft(unescapeHTML(split[0].trim()));
+ this.setMark(unescapeHTML(split[1].trim()));
+ this.setRight(unescapeHTML(split[2].trim()));
}
@@ -50,4 +46,16 @@
public void setMark (String mark) {
this.mark = mark;
}
+
+ private static String unescapeHTML (String text) {
+ if (text == null)
+ return "";
+
+ return text
+ .replace(""", "\"")
+ .replace("'", "'")
+ .replace("<", "<")
+ .replace(">", ">")
+ .replace("&", "&");
+ };
}