| Nils Diewald | ff0f874 | 2015-02-26 20:42:45 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.response.match; |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 2 | |
| 3 | import org.apache.lucene.util.FixedBitSet; |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 4 | import de.ids_mannheim.korap.response.Match; |
| Nils Diewald | ff0f874 | 2015-02-26 20:42:45 +0000 | [diff] [blame] | 5 | import de.ids_mannheim.korap.response.match.Relation; |
| Nils Diewald | c383ed0 | 2015-02-26 21:35:22 +0000 | [diff] [blame] | 6 | import static de.ids_mannheim.korap.util.KrillString.*; |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 7 | import java.util.*; |
| 8 | import java.io.*; |
| 9 | |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 10 | import org.slf4j.Logger; |
| 11 | import org.slf4j.LoggerFactory; |
| 12 | |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 13 | /* |
| 14 | Class for elements with highlighting information |
| 15 | */ |
| 16 | public class HighlightCombinatorElement { |
| 17 | |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 18 | // Number -1: Match |
| 19 | // Number -99998: Context |
| 20 | private final static int CONTEXT = -99998; |
| 21 | |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 22 | // Type 0: Textual data |
| 23 | // Type 1: Opening |
| 24 | // Type 2: Closing |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 25 | // Type 3: Empty |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 26 | public byte type; |
| 27 | |
| 28 | public int number = 0; |
| 29 | |
| 30 | public String characters; |
| 31 | public boolean terminal = true; |
| 32 | |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 33 | // Logger |
| 34 | private final static Logger log = LoggerFactory.getLogger(Match.class); |
| 35 | |
| 36 | // This advices the java compiler to ignore all loggings |
| Akron | 04f0095 | 2018-03-06 18:56:54 +0100 | [diff] [blame] | 37 | public static final boolean DEBUG = false; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 38 | |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 39 | // Constructor for highlighting elements |
| 40 | public HighlightCombinatorElement (byte type, int number) { |
| Nils Diewald | ff0f874 | 2015-02-26 20:42:45 +0000 | [diff] [blame] | 41 | this.type = type; |
| 42 | this.number = number; |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 43 | }; |
| 44 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 45 | |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 46 | // Constructor for highlighting elements, |
| 47 | // that may not be terminal, i.e. they were closed and will |
| 48 | // be reopened for overlapping issues. |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 49 | public HighlightCombinatorElement (byte type, int number, |
| 50 | boolean terminal) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 51 | this.type = type; |
| 52 | this.number = number; |
| Nils Diewald | ff0f874 | 2015-02-26 20:42:45 +0000 | [diff] [blame] | 53 | this.terminal = terminal; |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 54 | }; |
| 55 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 56 | |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 57 | // Constructor for textual data |
| 58 | public HighlightCombinatorElement (String characters) { |
| Nils Diewald | ff0f874 | 2015-02-26 20:42:45 +0000 | [diff] [blame] | 59 | this.type = (byte) 0; |
| 60 | this.characters = characters; |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 61 | }; |
| 62 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 63 | |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 64 | // Return html fragment for this combinator element |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 65 | public String toHTML (Match match, FixedBitSet level, byte[] levelCache, HashSet joins) { |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 66 | |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 67 | // Opening |
| 68 | if (this.type == 1) { |
| 69 | StringBuilder sb = new StringBuilder(); |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 70 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 71 | // This is the surrounding match mark |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 72 | if (this.number == -1) { |
| 73 | sb.append("<mark>"); |
| 74 | } |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 75 | |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 76 | // This is context |
| 77 | else if (this.number == CONTEXT) { |
| 78 | // DO nothing |
| 79 | } |
| 80 | |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 81 | // This is a relation target |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 82 | else if (this.number < -1) { |
| Akron | 1c126b4 | 2018-01-30 19:48:48 +0100 | [diff] [blame] | 83 | |
| 84 | // Create id |
| 85 | String id = escapeHTML( |
| 86 | match.getPosID(match.getClassID(this.number)) |
| 87 | ); |
| 88 | |
| 89 | // ID already in use - create join |
| 90 | if (joins.contains(id)) { |
| Akron | d578930 | 2018-01-31 09:18:14 +0100 | [diff] [blame] | 91 | sb.append("<span xlink:show=\"other\" data-action=\"join\" xlink:href=\"#") |
| Akron | 1c126b4 | 2018-01-30 19:48:48 +0100 | [diff] [blame] | 92 | .append(id) |
| 93 | .append("\">"); |
| 94 | } |
| 95 | |
| 96 | // Not yet in use - create |
| 97 | else { |
| 98 | sb.append("<span xml:id=\"") |
| 99 | .append(id) |
| 100 | .append("\">"); |
| 101 | joins.add(id); |
| 102 | }; |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 103 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 104 | |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 105 | // This is an annotation |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 106 | else if (this.number >= 256) { |
| 107 | sb.append("<span "); |
| 108 | if (this.number < 2048) { |
| 109 | sb.append("title=\"") |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 110 | .append(escapeHTML( |
| 111 | match.getAnnotationID(this.number))) |
| 112 | .append('"'); |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 113 | } |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 114 | |
| 115 | // This is a relation source |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 116 | else { |
| 117 | Relation rel = match.getRelationID(this.number); |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 118 | |
| 119 | if (DEBUG) { |
| 120 | log.trace("Annotation is a relation with id {}", this.number); |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 121 | log.trace("Resulting in relation {}: {}-{}", rel.annotation, rel.refStart, rel.refEnd); |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 122 | }; |
| 123 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 124 | sb.append("xlink:title=\"") |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 125 | .append(escapeHTML(rel.annotation)) |
| Akron | d578930 | 2018-01-31 09:18:14 +0100 | [diff] [blame] | 126 | .append("\" xlink:show=\"none\" xlink:href=\"#") |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 127 | .append(escapeHTML(match.getPosID(rel.refStart, rel.refEnd))) |
| 128 | .append('"'); |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 129 | }; |
| 130 | sb.append('>'); |
| 131 | } |
| Nils Diewald | 52bd1cd | 2014-11-06 20:44:24 +0000 | [diff] [blame] | 132 | |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 133 | // This is a highlight |
| 134 | // < 256 |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 135 | else { |
| 136 | // Get the first free level slot |
| 137 | byte pos; |
| 138 | if (levelCache[this.number] != '\0') { |
| 139 | pos = levelCache[this.number]; |
| 140 | } |
| 141 | else { |
| 142 | pos = (byte) level.nextSetBit(0); |
| 143 | level.clear(pos); |
| 144 | levelCache[this.number] = pos; |
| 145 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 146 | sb.append("<mark class=\"class-").append(this.number) |
| 147 | .append(" level-").append(pos).append("\">"); |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 148 | }; |
| 149 | return sb.toString(); |
| 150 | } |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 151 | |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 152 | // This is a Closing tag |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 153 | else if (this.type == 2) { |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 154 | if (this.number == CONTEXT) |
| 155 | return ""; |
| 156 | |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 157 | if (this.number < -1 || this.number >= 256) |
| 158 | return "</span>"; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 159 | |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 160 | if (this.number == -1) |
| 161 | return "</mark>"; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 162 | |
| Nils Diewald | dcd5ab1 | 2015-02-20 02:59:09 +0000 | [diff] [blame] | 163 | if (this.terminal) |
| 164 | level.set((int) levelCache[this.number]); |
| 165 | return "</mark>"; |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 166 | } |
| 167 | |
| 168 | // Empty element |
| 169 | else if (this.type == 3) { |
| 170 | return "<span class=\"pb\" data-after=\"" + number + "\"></span>"; |
| 171 | }; |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 172 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 173 | // HTML encode primary data |
| 174 | return escapeHTML(this.characters); |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 175 | }; |
| 176 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 177 | |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 178 | // Return bracket fragment for this combinator element |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 179 | public String toBrackets (Match match) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 180 | if (this.type == 1) { |
| 181 | StringBuilder sb = new StringBuilder(); |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 182 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 183 | // Match |
| 184 | if (this.number == -1) { |
| 185 | sb.append("["); |
| 186 | } |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 187 | |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 188 | // This is context |
| 189 | else if (this.number == CONTEXT) { |
| 190 | // DO nothing |
| 191 | } |
| 192 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 193 | // Identifier |
| 194 | else if (this.number < -1) { |
| 195 | sb.append("{#"); |
| 196 | sb.append(match.getClassID(this.number)); |
| 197 | sb.append(':'); |
| 198 | } |
| 199 | |
| 200 | // Highlight, Relation, Span |
| 201 | else { |
| 202 | sb.append("{"); |
| 203 | if (this.number >= 256) { |
| 204 | if (this.number < 2048) |
| 205 | sb.append(match.getAnnotationID(this.number)); |
| 206 | else { |
| 207 | Relation rel = match.getRelationID(this.number); |
| 208 | sb.append(rel.annotation); |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 209 | sb.append('>').append(rel.refStart); |
| 210 | |
| 211 | if (rel.refEnd != -1) |
| 212 | sb.append('-').append(rel.refEnd); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 213 | }; |
| 214 | sb.append(':'); |
| 215 | } |
| 216 | else if (this.number != 0) |
| 217 | sb.append(this.number).append(':'); |
| 218 | }; |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 219 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 220 | return sb.toString(); |
| 221 | } |
| 222 | else if (this.type == 2) { |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 223 | |
| 224 | // This is context |
| 225 | if (this.number == CONTEXT) |
| 226 | return ""; |
| 227 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 228 | if (this.number == -1) |
| 229 | return "]"; |
| 230 | return "}"; |
| 231 | }; |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 232 | return escapeBrackets(this.characters); |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 233 | }; |
| 234 | }; |