| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 1 | // Connector to the Lucene Backend |
| 2 | package de.ids_mannheim.korap.web; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 3 | |
| margaretha | 61471cc | 2017-04-20 18:42:23 +0200 | [diff] [blame] | 4 | import java.io.File; |
| 5 | import java.io.IOException; |
| 6 | import java.nio.file.Paths; |
| 7 | import java.util.List; |
| margaretha | f68daa6 | 2017-09-21 02:11:24 +0200 | [diff] [blame^] | 8 | import java.util.Properties; |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 9 | import java.util.regex.Matcher; |
| 10 | import java.util.regex.Pattern; |
| margaretha | 61471cc | 2017-04-20 18:42:23 +0200 | [diff] [blame] | 11 | |
| 12 | import org.apache.lucene.store.MMapDirectory; |
| 13 | import org.slf4j.Logger; |
| 14 | import org.slf4j.LoggerFactory; |
| 15 | |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 16 | import de.ids_mannheim.korap.Krill; |
| 17 | import de.ids_mannheim.korap.KrillCollection; |
| 18 | import de.ids_mannheim.korap.KrillIndex; |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 19 | import de.ids_mannheim.korap.exceptions.KustvaktException; |
| 20 | import de.ids_mannheim.korap.exceptions.StatusCodes; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 21 | import de.ids_mannheim.korap.response.Match; |
| 22 | import de.ids_mannheim.korap.response.Result; |
| 23 | import de.ids_mannheim.korap.util.QueryException; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 24 | |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 25 | /** |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 26 | * The SearchKrill class allows for searching in the |
| 27 | * Lucene based Krill backend by applying KoralQuery. |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 28 | * |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 29 | * @author Nils Diewald |
| 30 | */ |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 31 | public class SearchKrill { |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 32 | private final static Logger jlog = LoggerFactory |
| Michael Hanl | f078532 | 2015-11-13 16:14:45 +0100 | [diff] [blame] | 33 | .getLogger(SearchKrill.class); |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 34 | |
| 35 | // Temporary - shouldn't be here. |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 36 | String indexDir = "/data/prep_corpus/index/"; |
| 37 | String i = "/Users/hanl/Projects/prep_corpus"; |
| 38 | String klinux10 = "/vol/work/hanl/indices"; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 39 | private KrillIndex index; |
| margaretha | f68daa6 | 2017-09-21 02:11:24 +0200 | [diff] [blame^] | 40 | |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 41 | /** |
| 42 | * Constructor |
| 43 | */ |
| 44 | // todo: use korap.config to get index location |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 45 | public SearchKrill (String path) { |
| Bodmo | 3d6bd35 | 2017-04-25 11:31:39 +0200 | [diff] [blame] | 46 | |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 47 | try { |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 48 | if (path.equals(":temp:")) { |
| 49 | this.index = new KrillIndex(); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 50 | } |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 51 | else { |
| 52 | File f = new File(path); |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 53 | jlog.info("Loading index from " + path); |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 54 | if (!f.exists()) { |
| Bodmo | 3d6bd35 | 2017-04-25 11:31:39 +0200 | [diff] [blame] | 55 | jlog.error("Index not found: " + path + "!"); |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 56 | System.exit(-1); |
| Michael Hanl | daf8660 | 2016-05-12 14:31:52 +0200 | [diff] [blame] | 57 | } |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 58 | this.index = new KrillIndex(new MMapDirectory(Paths.get(path))); |
| 59 | }; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 60 | } |
| 61 | catch (IOException e) { |
| 62 | jlog.error("Unable to loadSubTypes index: {}", e.getMessage()); |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 63 | }; |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 64 | }; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 65 | |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 66 | public KrillIndex getIndex () { |
| 67 | return this.index; |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 68 | }; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 69 | |
| 70 | |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 71 | /** |
| 72 | * Search in the Lucene index. |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 73 | * |
| 74 | * @param json |
| 75 | * JSON-LD string with search and potential meta |
| 76 | * filters. |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 77 | */ |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 78 | public String search (String json) { |
| margaretha | 61471cc | 2017-04-20 18:42:23 +0200 | [diff] [blame] | 79 | jlog.trace(json); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 80 | if (this.index != null) |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 81 | return new Krill(json).apply(this.index).toJsonString(); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 82 | Result kr = new Result(); |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 83 | kr.addError(601, "Unable to find index"); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 84 | return kr.toJsonString(); |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 85 | }; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 86 | |
| 87 | |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 88 | /** |
| 89 | * Search in the Lucene index and return matches as token lists. |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 90 | * |
| 91 | * @param json |
| 92 | * JSON-LD string with search and potential meta |
| 93 | * filters. |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 94 | */ |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 95 | @Deprecated |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 96 | public String searchTokenList (String json) { |
| margaretha | 61471cc | 2017-04-20 18:42:23 +0200 | [diff] [blame] | 97 | jlog.trace(json); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 98 | if (this.index != null) |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 99 | return new Krill(json).apply(this.index).toTokenListJsonString(); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 100 | Result kr = new Result(); |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 101 | kr.addError(601, "Unable to find index"); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 102 | return kr.toJsonString(); |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 103 | }; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 104 | |
| 105 | |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 106 | /** |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 107 | * Get info on a match - by means of a richly annotated html |
| 108 | * snippet. |
| 109 | * |
| 110 | * @param id |
| 111 | * match id |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 112 | * @param availabilityList |
| 113 | * @throws KustvaktException |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 114 | */ |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 115 | public String getMatch (String id, Pattern licensePattern) { |
| 116 | Match km; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 117 | if (this.index != null) { |
| 118 | try { |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 119 | km = this.index.getMatch(id); |
| 120 | String availability = km.getAvailability(); |
| margaretha | 698d953 | 2017-06-27 10:53:27 +0200 | [diff] [blame] | 121 | if (licensePattern!=null && availability != null){ |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 122 | Matcher m = licensePattern.matcher(availability); |
| 123 | if (!m.matches()){ |
| margaretha | aec93f7 | 2017-05-29 16:51:41 +0200 | [diff] [blame] | 124 | km = new Match(); |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 125 | km.addError(StatusCodes.ACCESS_DENIED, |
| margaretha | 65ca5fb | 2017-06-29 15:01:57 +0200 | [diff] [blame] | 126 | "Retrieving match info with ID "+id+" is not allowed.", id); |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 127 | } |
| 128 | } |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 129 | } |
| 130 | catch (QueryException qe) { |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 131 | km = new Match(); |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 132 | km.addError(qe.getErrorCode(), qe.getMessage()); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 133 | } |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 134 | } |
| 135 | else{ |
| 136 | km = new Match(); |
| 137 | km.addError(601, "Unable to find index"); |
| 138 | } |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 139 | return km.toJsonString(); |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 140 | }; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 141 | |
| 142 | |
| 143 | public String getMatch (String id, List<String> foundries, |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 144 | List<String> layers, boolean includeSpans, |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 145 | boolean includeHighlights, boolean sentenceExpansion, |
| 146 | Pattern licensePattern) { |
| 147 | Match km; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 148 | if (this.index != null) { |
| 149 | try { |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 150 | km = this.index.getMatchInfo(id, "tokens", true, foundries, |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 151 | layers, includeSpans, includeHighlights, |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 152 | sentenceExpansion); |
| 153 | String availability = km.getAvailability(); |
| margaretha | 698d953 | 2017-06-27 10:53:27 +0200 | [diff] [blame] | 154 | if (licensePattern !=null && availability != null){ |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 155 | Matcher m = licensePattern.matcher(availability); |
| 156 | if (!m.matches()){ |
| margaretha | 65b6714 | 2017-05-29 16:23:16 +0200 | [diff] [blame] | 157 | km = new Match(); |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 158 | km.addError(StatusCodes.ACCESS_DENIED, |
| margaretha | 65ca5fb | 2017-06-29 15:01:57 +0200 | [diff] [blame] | 159 | "Retrieving match info with ID "+id+" is not allowed.", id); |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 160 | } |
| 161 | } |
| 162 | |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 163 | } |
| 164 | catch (QueryException qe) { |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 165 | km = new Match(); |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 166 | km.addError(qe.getErrorCode(), qe.getMessage()); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 167 | } |
| margaretha | a76ed24 | 2017-05-24 17:48:22 +0200 | [diff] [blame] | 168 | } |
| 169 | else{ |
| 170 | km = new Match(); |
| 171 | km.addError(601, "Unable to find index"); |
| 172 | } |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 173 | return km.toJsonString(); |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 174 | }; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 175 | |
| 176 | |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 177 | /** |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 178 | * Get info on a match - by means of a richly annotated html |
| 179 | * snippet. |
| 180 | * |
| 181 | * @param id |
| 182 | * match id |
| 183 | * @param foundry |
| 184 | * the foundry of interest - may be null |
| 185 | * @param layer |
| 186 | * the layer of interest - may be null |
| 187 | * @param includeSpans |
| 188 | * Should spans be included (or only token infos)? |
| 189 | * @param includeHighlights |
| 190 | * Should highlight markup be included? |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 191 | */ |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 192 | public String getMatch (String id, String foundry, String layer, |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 193 | boolean includeSpans, boolean includeHighlights, |
| 194 | boolean sentenceExpansion) { |
| 195 | |
| 196 | if (this.index != null) { |
| 197 | try { |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 198 | /* |
| 199 | For multiple foundries/layers use |
| 200 | String idString, |
| 201 | "tokens", |
| 202 | true, |
| 203 | ArrayList<String> foundry, |
| 204 | ArrayList<String> layer, |
| 205 | boolean includeSpans, |
| 206 | boolean includeHighlights, |
| 207 | boolean extendToSentence |
| 208 | */ |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 209 | return this.index.getMatchInfo(id, "tokens", foundry, layer, |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 210 | includeSpans, includeHighlights, sentenceExpansion) |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 211 | .toJsonString(); |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 212 | } |
| 213 | catch (QueryException qe) { |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 214 | Match km = new Match(); |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 215 | km.addError(qe.getErrorCode(), qe.getMessage()); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 216 | return km.toJsonString(); |
| 217 | } |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 218 | }; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 219 | Match km = new Match(); |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 220 | km.addError(601, "Unable to find index"); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 221 | return km.toJsonString(); |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 222 | }; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 223 | |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 224 | |
| 225 | /** |
| 226 | * Get statistics on (virtual) collections. |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 227 | * |
| 228 | * @param json |
| 229 | * JSON-LD string with potential meta filters. |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 230 | */ |
| Akron | b99b70c | 2015-06-19 20:14:28 +0200 | [diff] [blame] | 231 | @Deprecated |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 232 | public String getStatistics (String json) { |
| margaretha | 61471cc | 2017-04-20 18:42:23 +0200 | [diff] [blame] | 233 | jlog.trace(json); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 234 | if (this.index == null) { |
| 235 | return "{\"documents\" : -1, error\" : \"No index given\" }"; |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 236 | }; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 237 | // Create Virtual collection from json search |
| 238 | KrillCollection kc = new KrillCollection(json); |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 239 | // Set index |
| 240 | kc.setIndex(this.index); |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 241 | long docs = 0, tokens = 0, sentences = 0, paragraphs = 0; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 242 | // Get numbers from index (currently slow) |
| 243 | try { |
| 244 | docs = kc.numberOf("documents"); |
| Akron | a3afa7d | 2017-07-04 16:13:22 +0200 | [diff] [blame] | 245 | if (docs > 0) { |
| 246 | tokens = kc.numberOf("tokens"); |
| 247 | sentences = kc.numberOf("base/sentences"); |
| 248 | paragraphs = kc.numberOf("base/paragraphs"); |
| 249 | }; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 250 | } |
| Michael Hanl | 1939065 | 2016-01-16 11:01:24 +0100 | [diff] [blame] | 251 | catch (IOException e) { |
| 252 | e.printStackTrace(); |
| 253 | }; |
| Michael Hanl | ca740d7 | 2015-06-16 10:04:58 +0200 | [diff] [blame] | 254 | // Build json response |
| 255 | StringBuilder sb = new StringBuilder("{"); |
| 256 | sb.append("\"documents\":").append(docs).append(",\"tokens\":") |
| 257 | .append(tokens).append(",\"sentences\":").append(sentences) |
| 258 | .append(",\"paragraphs\":").append(paragraphs).append("}"); |
| 259 | return sb.toString(); |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 260 | }; |
| Michael Hanl | 8abaf9e | 2016-05-23 16:46:35 +0200 | [diff] [blame] | 261 | |
| margaretha | 61471cc | 2017-04-20 18:42:23 +0200 | [diff] [blame] | 262 | |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 263 | /** |
| margaretha | 61471cc | 2017-04-20 18:42:23 +0200 | [diff] [blame] | 264 | * Return the match identifier as a string. |
| 265 | * This is a convenient method to deal with legacy instantiation |
| 266 | * of the |
| 267 | * code. |
| 268 | */ |
| 269 | public String getMatchId (String corpusID, String docID, String textID, |
| 270 | String matchID) { |
| 271 | // Create a string representation of the match |
| 272 | StringBuilder sb = new StringBuilder(); |
| 273 | sb.append("match-").append(corpusID).append('/').append(docID) |
| 274 | .append('/').append(textID).append('-').append(matchID); |
| 275 | return sb.toString(); |
| Akron | 78e2d20 | 2016-10-13 14:17:11 +0200 | [diff] [blame] | 276 | }; |
| 277 | }; |