blob: 678a5621b85162439ddfbe341433c94339b09b68 [file] [log] [blame]
hebastae0496762020-03-30 00:11:01 +02001package de.ids_mannheim.korap.plkexport;
2
Akron1c8a7682020-11-16 19:06:02 +01003import java.util.regex.Pattern;
4
hebastae0496762020-03-30 00:11:01 +02005public class Snippet {
6
Akron1c8a7682020-11-16 19:06:02 +01007 private String left, right, mark;
8 private boolean leftMore, rightMore;
hebastae0496762020-03-30 00:11:01 +02009
Akron1c8a7682020-11-16 19:06:02 +010010 private static Pattern leftMoreP =
11 Pattern.compile("(?i)<span[^>]*?class=\"more\".+<mark>");
12 private static Pattern rightMoreP =
13 Pattern.compile("(?i)</mark>.+<span[^>]*?class=\"more\"");
hebastae0496762020-03-30 00:11:01 +020014
15 public Snippet (String snippetstr) {
Akron1c8a7682020-11-16 19:06:02 +010016
17 // Check the context
18 this.leftMore = this.rightMore = false;
19 if (leftMoreP.matcher(snippetstr).find()) {
20 this.leftMore = true;
21 };
22 if (rightMoreP.matcher(snippetstr).find()) {
23 this.rightMore = true;
24 };
25
26 // Split the match
Akronc408ccb2020-11-16 18:22:12 +010027 String[] split = snippetstr
28 .replaceAll("(?i)</?span[^>]*>", "")
29 .split("</?mark>");
30
31 this.setLeft(unescapeHTML(split[0].trim()));
32 this.setMark(unescapeHTML(split[1].trim()));
33 this.setRight(unescapeHTML(split[2].trim()));
hebastae0496762020-03-30 00:11:01 +020034 }
35
hebastae0496762020-03-30 00:11:01 +020036 public String getLeft () {
37 return left;
38 }
39
40
41 public void setLeft (String left) {
42 this.left = left;
43 }
44
45
46 public String getRight () {
47 return right;
48 }
49
50
51 public void setRight (String right) {
52 this.right = right;
53 }
54
55
56 public String getMark () {
57 return mark;
58 }
59
60
61 public void setMark (String mark) {
62 this.mark = mark;
63 }
Akronc408ccb2020-11-16 18:22:12 +010064
Akron1c8a7682020-11-16 19:06:02 +010065
66 public boolean hasMoreLeft () {
67 return leftMore;
68 };
69
70
71 public boolean hasMoreRight () {
72 return rightMore;
73 };
74
75
Akronc408ccb2020-11-16 18:22:12 +010076 private static String unescapeHTML (String text) {
77 if (text == null)
78 return "";
79
80 return text
81 .replace("&quot;", "\"")
82 .replace("&apos;", "'")
83 .replace("&lt;", "<")
84 .replace("&gt;", ">")
85 .replace("&amp;", "&");
86 };
hebastae0496762020-03-30 00:11:01 +020087}