blob: f6eb3f3a34074f4b79592c4427c9d4a28fd864d1 [file] [log] [blame]
Nils Diewaldff6f7662014-09-21 15:08:52 +00001package de.ids_mannheim.korap.util;
2
3import java.io.*;
Eliza Margaretha805e27f2016-10-14 21:39:42 +02004import java.net.URLDecoder;
Nils Diewaldff6f7662014-09-21 15:08:52 +00005import java.nio.file.Files;
Eliza Margaretha805e27f2016-10-14 21:39:42 +02006import java.nio.file.Path;
Nils Diewaldff6f7662014-09-21 15:08:52 +00007import java.nio.file.Paths;
8import java.nio.charset.Charset;
9import java.nio.charset.StandardCharsets;
10
11/**
Nils Diewaldfe6a3652015-02-05 20:34:27 +000012 * A collection of string related utility
13 * functions.
Nils Diewaldbb33da22015-03-04 16:24:25 +000014 *
Nils Diewaldfe6a3652015-02-05 20:34:27 +000015 * @author diewald
Nils Diewaldff6f7662014-09-21 15:08:52 +000016 */
Nils Diewaldc383ed02015-02-26 21:35:22 +000017public class KrillString {
Nils Diewaldff6f7662014-09-21 15:08:52 +000018
Nils Diewaldc471b182014-11-19 22:51:15 +000019 /**
Nils Diewaldfe6a3652015-02-05 20:34:27 +000020 * Get String from file.
Nils Diewaldbb33da22015-03-04 16:24:25 +000021 *
22 * @param path
23 * The path of the file represented as a string.
24 * @param path
25 * The expected {@link Charset}.
Nils Diewaldfe6a3652015-02-05 20:34:27 +000026 * @return The content of the file
27 * @throws IOException
Nils Diewaldc471b182014-11-19 22:51:15 +000028 */
29 public static String StringfromFile (String path, Charset encoding)
Nils Diewaldbb33da22015-03-04 16:24:25 +000030 throws IOException {
Eliza Margaretha6f989202016-10-14 21:48:29 +020031 path = URLDecoder.decode(path, "UTF-8");
Eliza Margaretha805e27f2016-10-14 21:39:42 +020032 path = path.replaceFirst("^/(.:/)", "$1");
33 Path p = Paths.get(path);
34 byte[] encoded = Files.readAllBytes(p);
Nils Diewaldfe6a3652015-02-05 20:34:27 +000035 return new String(encoded, encoding);
Nils Diewaldff6f7662014-09-21 15:08:52 +000036 };
37
Nils Diewaldc471b182014-11-19 22:51:15 +000038
39 /**
Nils Diewaldfe6a3652015-02-05 20:34:27 +000040 * Get String from file (expecting UTF-8).
Nils Diewaldbb33da22015-03-04 16:24:25 +000041 *
42 * @param path
43 * The path of the file represented as a string.
Nils Diewaldfe6a3652015-02-05 20:34:27 +000044 * @return The content of the file
45 * @throws IOException
Nils Diewaldc471b182014-11-19 22:51:15 +000046 */
Nils Diewaldff6f7662014-09-21 15:08:52 +000047 public static String StringfromFile (String path) throws IOException {
Nils Diewaldfe6a3652015-02-05 20:34:27 +000048 return StringfromFile(path, StandardCharsets.UTF_8);
49 };
50
51
52 /**
53 * Escape HTML relevant characters as entities.
Nils Diewaldbb33da22015-03-04 16:24:25 +000054 *
55 * @param text
56 * The string to escape.
Nils Diewaldfe6a3652015-02-05 20:34:27 +000057 * @return The secured string.
58 */
59 public static String escapeHTML (String text) {
Akron2caa2f92017-09-12 11:50:16 +020060
61 if (text == null)
62 return "";
63
Nils Diewaldbb33da22015-03-04 16:24:25 +000064 return text.replace("&", "&amp;").replace("<", "&lt;")
65 .replace(">", "&gt;").replace("\"", "&quot;");
Nils Diewaldff6f7662014-09-21 15:08:52 +000066 };
Akron74748c62016-06-29 00:22:43 +020067
68
69 /**
Akronfc2625e2016-07-27 01:52:28 +020070 * Escape Bracket relevant characters.
71 *
72 * @param text
73 * The string to escape.
74 * @return The secured string.
75 */
76 public static String escapeBrackets (String text) {
Akron967ee7c2024-06-20 12:52:56 +020077 if (text == null)
78 return "";
Akronfc2625e2016-07-27 01:52:28 +020079 return text.replaceAll("([\\{\\}\\[\\]\\\\])", "\\\\$1");
80 };
81
Akron08f4ceb2016-08-03 23:53:32 +020082
Akronfc2625e2016-07-27 01:52:28 +020083 /**
Akron74748c62016-06-29 00:22:43 +020084 * Add surrounding double quotes.
85 *
86 * @param text
87 * The string to escape.
88 * @return The secured string.
89 */
90 public static String quote (String text) {
91 return '"' + text.replaceAll("([\"\\\\])", "\\\\$1") + '"';
92 };
Akron906470f2023-12-19 11:13:32 +010093
94
95 /**
96 * Provide a substring method that works well with surrogate pairs.
97 *
98 * @param text
99 * The string to substring.
100 * @param start
101 * The start offset.
102 * @param end
103 * The end offset.
104 * @return The substring.
105 */
106 public static String codePointSubstring(String text, int start, int end) {
107 int a = text.offsetByCodePoints(0, start);
108 return text.substring(
109 a,
110 text.offsetByCodePoints(a, end - start)
111 );
112 };
113
114 /**
115 * Provide a substring method that works well with surrogate pairs.
116 *
117 * @param text
118 * The string to substring.
119 * @param start
120 * The start offset.
121 * @return The substring.
122 */
123 public static String codePointSubstring(String text, int start) {
124 return text.substring(text.offsetByCodePoints(0, start));
125 };
Nils Diewaldff6f7662014-09-21 15:08:52 +0000126};