Unescape HTML entities in snippets
Change-Id: Icd1b538a67eb8e2c3f7b4a57cccfd8a4a6182e2e
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/IdsExportService.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/IdsExportService.java
index e4b08d5..9e85cae 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/IdsExportService.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/IdsExportService.java
@@ -58,8 +58,8 @@
* - Add format to exporters
* - Add file suffix to exporters
* - Add "..." to snippets in RTF exporter
- * - Fix SGML entities in RTF exporter
* - Test Snippet-Export with multiple classes.
+ * - Test Snippet-Export with cutted matches.
*/
@Path("/")
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java
index 633448e..888dec1 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java
@@ -130,7 +130,7 @@
};
// Based on jrtf by Christian Ullenboom
- static void rtfText(Writer w, String rawText) throws IOException {
+ private static void rtfText(Writer w, String rawText) throws IOException {
char c;
for (int i = 0; i < rawText.length(); i++) {
c = rawText.charAt( i );
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java
index 6750948..8726934 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java
@@ -8,17 +8,13 @@
public Snippet (String snippetstr) {
- String[] split = snippetstr.split("</?mark>");
- String splitleft = split[0];
- String splitmatch = split[1];
- String splitright = split[2];
- //(?i) makes the regex case insensitive.
- String splitleftr = splitleft.replaceAll("(?i)</?span[^>]*>", "");
- this.setLeft(splitleftr.trim());
- String splitmatchr = splitmatch.replaceAll("(?i)</?span[^>]*>", "");
- this.setMark(splitmatchr.trim());
- String splitrightr = splitright.replaceAll("(?i)</?span[^>]*>", "");
- this.setRight(splitrightr.trim());
+ String[] split = snippetstr
+ .replaceAll("(?i)</?span[^>]*>", "")
+ .split("</?mark>");
+
+ this.setLeft(unescapeHTML(split[0].trim()));
+ this.setMark(unescapeHTML(split[1].trim()));
+ this.setRight(unescapeHTML(split[2].trim()));
}
@@ -50,4 +46,16 @@
public void setMark (String mark) {
this.mark = mark;
}
+
+ private static String unescapeHTML (String text) {
+ if (text == null)
+ return "";
+
+ return text
+ .replace(""", "\"")
+ .replace("'", "'")
+ .replace("<", "<")
+ .replace(">", ">")
+ .replace("&", "&");
+ };
}