| hebasta | e049676 | 2020-03-30 00:11:01 +0200 | [diff] [blame] | 1 | package de.ids_mannheim.korap.plkexport; |
| 2 | |
| Akron | 1c8a768 | 2020-11-16 19:06:02 +0100 | [diff] [blame] | 3 | import java.util.regex.Pattern; |
| 4 | |
| hebasta | e049676 | 2020-03-30 00:11:01 +0200 | [diff] [blame] | 5 | public class Snippet { |
| 6 | |
| Akron | 1c8a768 | 2020-11-16 19:06:02 +0100 | [diff] [blame] | 7 | private String left, right, mark; |
| 8 | private boolean leftMore, rightMore; |
| hebasta | e049676 | 2020-03-30 00:11:01 +0200 | [diff] [blame] | 9 | |
| Akron | 1c8a768 | 2020-11-16 19:06:02 +0100 | [diff] [blame] | 10 | private static Pattern leftMoreP = |
| 11 | Pattern.compile("(?i)<span[^>]*?class=\"more\".+<mark>"); |
| 12 | private static Pattern rightMoreP = |
| 13 | Pattern.compile("(?i)</mark>.+<span[^>]*?class=\"more\""); |
| hebasta | e049676 | 2020-03-30 00:11:01 +0200 | [diff] [blame] | 14 | |
| 15 | public Snippet (String snippetstr) { |
| Akron | 1c8a768 | 2020-11-16 19:06:02 +0100 | [diff] [blame] | 16 | |
| 17 | // Check the context |
| 18 | this.leftMore = this.rightMore = false; |
| 19 | if (leftMoreP.matcher(snippetstr).find()) { |
| 20 | this.leftMore = true; |
| 21 | }; |
| 22 | if (rightMoreP.matcher(snippetstr).find()) { |
| 23 | this.rightMore = true; |
| 24 | }; |
| 25 | |
| 26 | // Split the match |
| Akron | c408ccb | 2020-11-16 18:22:12 +0100 | [diff] [blame] | 27 | String[] split = snippetstr |
| 28 | .replaceAll("(?i)</?span[^>]*>", "") |
| 29 | .split("</?mark>"); |
| 30 | |
| 31 | this.setLeft(unescapeHTML(split[0].trim())); |
| 32 | this.setMark(unescapeHTML(split[1].trim())); |
| 33 | this.setRight(unescapeHTML(split[2].trim())); |
| hebasta | e049676 | 2020-03-30 00:11:01 +0200 | [diff] [blame] | 34 | } |
| 35 | |
| hebasta | e049676 | 2020-03-30 00:11:01 +0200 | [diff] [blame] | 36 | public String getLeft () { |
| 37 | return left; |
| 38 | } |
| 39 | |
| 40 | |
| 41 | public void setLeft (String left) { |
| 42 | this.left = left; |
| 43 | } |
| 44 | |
| 45 | |
| 46 | public String getRight () { |
| 47 | return right; |
| 48 | } |
| 49 | |
| 50 | |
| 51 | public void setRight (String right) { |
| 52 | this.right = right; |
| 53 | } |
| 54 | |
| 55 | |
| 56 | public String getMark () { |
| 57 | return mark; |
| 58 | } |
| 59 | |
| 60 | |
| 61 | public void setMark (String mark) { |
| 62 | this.mark = mark; |
| 63 | } |
| Akron | c408ccb | 2020-11-16 18:22:12 +0100 | [diff] [blame] | 64 | |
| Akron | 1c8a768 | 2020-11-16 19:06:02 +0100 | [diff] [blame] | 65 | |
| 66 | public boolean hasMoreLeft () { |
| 67 | return leftMore; |
| 68 | }; |
| 69 | |
| 70 | |
| 71 | public boolean hasMoreRight () { |
| 72 | return rightMore; |
| 73 | }; |
| 74 | |
| 75 | |
| Akron | c408ccb | 2020-11-16 18:22:12 +0100 | [diff] [blame] | 76 | private static String unescapeHTML (String text) { |
| 77 | if (text == null) |
| 78 | return ""; |
| 79 | |
| 80 | return text |
| 81 | .replace(""", "\"") |
| 82 | .replace("'", "'") |
| 83 | .replace("<", "<") |
| 84 | .replace(">", ">") |
| 85 | .replace("&", "&"); |
| 86 | }; |
| hebasta | e049676 | 2020-03-30 00:11:01 +0200 | [diff] [blame] | 87 | } |