| margaretha | 56e8e55 | 2017-12-05 16:31:21 +0100 | [diff] [blame] | 1 | package de.ids_mannheim.korap.rewrite; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 2 | |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 3 | import java.util.ArrayList; |
| 4 | import java.util.List; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 5 | |
| margaretha | 8489f86 | 2025-02-05 11:32:16 +0100 | [diff] [blame] | 6 | import org.apache.commons.text.CaseUtils; |
| margaretha | 49cb688 | 2018-07-04 04:19:54 +0200 | [diff] [blame] | 7 | import org.apache.logging.log4j.LogManager; |
| 8 | import org.apache.logging.log4j.Logger; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 9 | |
| 10 | import com.fasterxml.jackson.databind.JsonNode; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 11 | import com.google.common.collect.Lists; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 12 | |
| margaretha | 56e8e55 | 2017-12-05 16:31:21 +0100 | [diff] [blame] | 13 | import de.ids_mannheim.korap.config.FullConfiguration; |
| margaretha | 1bc9cca | 2018-12-11 15:09:44 +0100 | [diff] [blame] | 14 | import de.ids_mannheim.korap.config.KustvaktConfiguration; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 15 | import de.ids_mannheim.korap.exceptions.KustvaktException; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 16 | import de.ids_mannheim.korap.query.object.KoralMatchOperator; |
| margaretha | ed7bc7a | 2017-11-12 21:39:41 +0100 | [diff] [blame] | 17 | import de.ids_mannheim.korap.query.object.KoralOperation; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 18 | import de.ids_mannheim.korap.user.User; |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 19 | import de.ids_mannheim.korap.user.User.CorpusAccess; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 20 | import de.ids_mannheim.korap.utils.JsonUtils; |
| 21 | import de.ids_mannheim.korap.utils.KoralCollectionQueryBuilder; |
| 22 | |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 23 | /** |
| margaretha | c105e2f | 2024-12-19 09:55:46 +0100 | [diff] [blame] | 24 | * AvailabilityRewrite determines which availability field values are |
| margaretha | 9d82046 | 2024-11-25 16:11:57 +0100 | [diff] [blame] | 25 | * possible for a user with respect to login and location of access. |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 26 | * |
| 27 | * <br/><br/> |
| 28 | * KorAP differentiates 3 kinds of access: |
| 29 | * <ul> |
| 30 | * <li>FREE: without login</li> |
| 31 | * <li>PUB: login outside IDS network</li> |
| 32 | * <li>ALL: login within IDS network</li> |
| 33 | * </ul> |
| 34 | * |
| 35 | * Each of these accesses corresponds to a regular expression of |
| margaretha | 9d82046 | 2024-11-25 16:11:57 +0100 | [diff] [blame] | 36 | * license formats defined in kustvakt.conf. For a given access, only those |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 37 | * resources whose availability field matches its regular expression |
| 38 | * are allowed to be retrieved. |
| 39 | * |
| 40 | * |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 41 | * @author margaretha |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 42 | * @see CorpusAccess |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 43 | */ |
| margaretha | c105e2f | 2024-12-19 09:55:46 +0100 | [diff] [blame] | 44 | public class AvailabilityRewrite implements RewriteTask.RewriteQuery { |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 45 | |
| margaretha | c105e2f | 2024-12-19 09:55:46 +0100 | [diff] [blame] | 46 | public static Logger jlog = LogManager.getLogger(AvailabilityRewrite.class); |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 47 | |
| margaretha | c105e2f | 2024-12-19 09:55:46 +0100 | [diff] [blame] | 48 | public AvailabilityRewrite () { |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 49 | super(); |
| 50 | } |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 51 | |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 52 | private List<String> checkAvailability (JsonNode node, |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 53 | List<String> availabilityRules, |
| 54 | List<String> actualAvailabilities, boolean isOperationOr) { |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 55 | |
| 56 | if (node.has("operands")) { |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 57 | ArrayList<JsonNode> operands = Lists |
| 58 | .newArrayList(node.at("/operands").elements()); |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 59 | |
| 60 | if (node.at("/operation").asText() |
| 61 | .equals(KoralOperation.AND.toString())) { |
| 62 | for (int i = 0; i < operands.size(); i++) { |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 63 | actualAvailabilities = checkAvailability(operands.get(i), |
| 64 | availabilityRules, actualAvailabilities, |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 65 | false); |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 66 | if (actualAvailabilities.isEmpty()) |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 67 | break; |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 68 | } |
| 69 | } |
| 70 | else { |
| 71 | for (int i = 0; i < operands.size(); i++) { |
| margaretha | de92c20 | 2018-02-20 14:38:40 +0100 | [diff] [blame] | 72 | node = operands.get(i); |
| 73 | if (node.has("key") && !node.at("/key").asText() |
| 74 | .equals("availability")) { |
| 75 | jlog.debug("RESET availabilities 1, key=" |
| 76 | + node.at("/key").asText()); |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 77 | actualAvailabilities.clear(); |
| 78 | actualAvailabilities.addAll(availabilityRules); |
| margaretha | de92c20 | 2018-02-20 14:38:40 +0100 | [diff] [blame] | 79 | break; |
| 80 | } |
| 81 | else { |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 82 | actualAvailabilities = checkAvailability( |
| 83 | operands.get(i), availabilityRules, |
| 84 | actualAvailabilities, true); |
| margaretha | de92c20 | 2018-02-20 14:38:40 +0100 | [diff] [blame] | 85 | } |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 86 | } |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 87 | } |
| 88 | } |
| 89 | else if (node.has("key") |
| 90 | && node.at("/key").asText().equals("availability")) { |
| 91 | String queryAvailability = node.at("/value").asText(); |
| 92 | String matchOp = node.at("/match").asText(); |
| margaretha | de92c20 | 2018-02-20 14:38:40 +0100 | [diff] [blame] | 93 | |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 94 | if (availabilityRules.contains(queryAvailability) |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 95 | && matchOp.equals(KoralMatchOperator.EQUALS.toString())) { |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 96 | actualAvailabilities.remove(queryAvailability); |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 97 | } |
| 98 | else if (isOperationOr) { |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 99 | actualAvailabilities.clear(); |
| 100 | actualAvailabilities.addAll(availabilityRules); |
| 101 | return actualAvailabilities; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 102 | } |
| 103 | } |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 104 | return actualAvailabilities; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 105 | } |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 106 | |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 107 | @Override |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 108 | public KoralNode rewriteQuery (KoralNode koralNode, KustvaktConfiguration config, |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 109 | User user) throws KustvaktException { |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 110 | JsonNode jsonNode = koralNode.rawNode(); |
| margaretha | de92c20 | 2018-02-20 14:38:40 +0100 | [diff] [blame] | 111 | |
| margaretha | 56e8e55 | 2017-12-05 16:31:21 +0100 | [diff] [blame] | 112 | FullConfiguration fullConfig = (FullConfiguration) config; |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 113 | CorpusAccess corpusAccess = user.getCorpusAccess(); |
| margaretha | 8489f86 | 2025-02-05 11:32:16 +0100 | [diff] [blame] | 114 | String corpusAccessName = user.accesstoString(); |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 115 | List<String> availabilityRules = getAvailabilityRules(corpusAccess, |
| 116 | fullConfig); |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 117 | |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 118 | String availabilityQuery = getCorpusQuery(corpusAccess, fullConfig); |
| 119 | |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 120 | if (jsonNode.has("collection")) { |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 121 | if (jsonNode.toString().contains("availability")) { |
| 122 | List<String> actualAvalability = new ArrayList<>(); |
| 123 | actualAvalability.addAll(availabilityRules); |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 124 | |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 125 | actualAvalability = checkAvailability(jsonNode.at("/collection"), |
| 126 | availabilityRules, actualAvalability, false); |
| 127 | if (!actualAvalability.isEmpty()) { |
| 128 | createOperationAnd(availabilityQuery, jsonNode, |
| margaretha | 8489f86 | 2025-02-05 11:32:16 +0100 | [diff] [blame] | 129 | corpusAccessName, koralNode); |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 130 | |
| 131 | // builder.with(availabilityQuery); |
| 132 | // builder.setBaseQuery(builder.toJSON()); |
| 133 | // rewrittenNode = builder.mergeWith(jsonNode).at("/collection"); |
| 134 | // koralNode.set("collection", rewrittenNode, identifier); |
| margaretha | d6f39d5 | 2018-12-06 14:21:39 +0100 | [diff] [blame] | 135 | } |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 136 | } |
| 137 | else { |
| 138 | createOperationAnd(availabilityQuery, jsonNode, |
| margaretha | 8489f86 | 2025-02-05 11:32:16 +0100 | [diff] [blame] | 139 | corpusAccessName, koralNode); |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 140 | } |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 141 | } |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 142 | else { |
| 143 | KoralCollectionQueryBuilder builder = |
| 144 | new KoralCollectionQueryBuilder(); |
| 145 | builder.with(availabilityQuery); |
| 146 | JsonNode rewrittenNode = JsonUtils.readTree(builder.toJSON()) |
| 147 | .at("/collection"); |
| margaretha | 8489f86 | 2025-02-05 11:32:16 +0100 | [diff] [blame] | 148 | |
| 149 | RewriteIdentifier identifier = new RewriteIdentifier(null, null, |
| 150 | corpusAccessName + " corpus access policy has been added."); |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 151 | koralNode.set("collection", rewrittenNode, identifier); |
| 152 | } |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 153 | |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 154 | koralNode = koralNode.at("/collection"); |
| 155 | return koralNode; |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 156 | } |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 157 | |
| 158 | private void createOperationAnd (String availabilityQuery, |
| margaretha | 8489f86 | 2025-02-05 11:32:16 +0100 | [diff] [blame] | 159 | JsonNode jsonNode, String corpusAccessName, KoralNode node) |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 160 | throws KustvaktException { |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 161 | |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 162 | KoralCollectionQueryBuilder availabilityBuilder = |
| 163 | new KoralCollectionQueryBuilder(); |
| 164 | availabilityBuilder.with(availabilityQuery); |
| 165 | JsonNode availabilityNode = JsonUtils |
| 166 | .readTree(availabilityBuilder.toJSON()); |
| margaretha | de92c20 | 2018-02-20 14:38:40 +0100 | [diff] [blame] | 167 | |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 168 | String source = jsonNode.at("/collection").toString(); |
| 169 | JsonNode sourceNode = JsonUtils.readTree(source); |
| margaretha | ad618d2 | 2017-12-11 19:58:49 +0100 | [diff] [blame] | 170 | |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 171 | KoralCollectionQueryBuilder builder = new KoralCollectionQueryBuilder(); |
| 172 | // Base query must contains collection |
| 173 | builder.setBaseQuery(availabilityNode); |
| 174 | JsonNode rewrittenNode = builder.mergeWith(jsonNode).at("/collection"); |
| margaretha | 8489f86 | 2025-02-05 11:32:16 +0100 | [diff] [blame] | 175 | RewriteIdentifier identifier = new RewriteIdentifier(null, sourceNode, |
| 176 | corpusAccessName + " corpus access policy has been added."); |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 177 | node.replace("collection", rewrittenNode, identifier); |
| 178 | } |
| 179 | |
| 180 | private List<String> getAvailabilityRules (CorpusAccess access, |
| 181 | FullConfiguration fullConfig) { |
| 182 | switch (access) { |
| 183 | case PUB: |
| 184 | return fullConfig.getPublicRegexList(); |
| 185 | case ALL: |
| 186 | return fullConfig.getAllRegexList(); |
| 187 | default: // free |
| 188 | return fullConfig.getFreeRegexList(); |
| 189 | } |
| 190 | } |
| margaretha | de92c20 | 2018-02-20 14:38:40 +0100 | [diff] [blame] | 191 | |
| margaretha | d9e43ec | 2024-12-20 12:11:43 +0100 | [diff] [blame] | 192 | private String getCorpusQuery (CorpusAccess access, |
| 193 | FullConfiguration fullConfig) { |
| 194 | switch (access) { |
| 195 | case PUB: |
| 196 | return fullConfig.getPublicAvailabilityQuery(); |
| 197 | case ALL: |
| 198 | return fullConfig.getAllAvailabilityQuery(); |
| 199 | default: // free |
| 200 | return fullConfig.getFreeAvailabilityQuery(); |
| 201 | } |
| 202 | |
| 203 | } |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 204 | } |