| margaretha | 56e8e55 | 2017-12-05 16:31:21 +0100 | [diff] [blame] | 1 | package de.ids_mannheim.korap.rewrite; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 2 | |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 3 | |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 4 | import java.util.ArrayList; |
| 5 | import java.util.List; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 6 | |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 7 | import org.slf4j.Logger; |
| 8 | import org.slf4j.LoggerFactory; |
| 9 | |
| 10 | import com.fasterxml.jackson.databind.JsonNode; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 11 | import com.google.common.collect.Lists; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 12 | |
| 13 | import de.ids_mannheim.korap.config.Attributes; |
| 14 | import de.ids_mannheim.korap.config.KustvaktConfiguration; |
| margaretha | 56e8e55 | 2017-12-05 16:31:21 +0100 | [diff] [blame] | 15 | import de.ids_mannheim.korap.config.FullConfiguration; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 16 | import de.ids_mannheim.korap.exceptions.KustvaktException; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 17 | import de.ids_mannheim.korap.query.object.KoralMatchOperator; |
| margaretha | ed7bc7a | 2017-11-12 21:39:41 +0100 | [diff] [blame] | 18 | import de.ids_mannheim.korap.query.object.KoralOperation; |
| margaretha | 56e8e55 | 2017-12-05 16:31:21 +0100 | [diff] [blame] | 19 | import de.ids_mannheim.korap.resource.rewrite.KoralNode; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 20 | import de.ids_mannheim.korap.resource.rewrite.KoralNode.RewriteIdentifier; |
| margaretha | 56e8e55 | 2017-12-05 16:31:21 +0100 | [diff] [blame] | 21 | import de.ids_mannheim.korap.resource.rewrite.RewriteTask; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 22 | import de.ids_mannheim.korap.user.User; |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 23 | import de.ids_mannheim.korap.user.User.CorpusAccess; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 24 | import de.ids_mannheim.korap.utils.JsonUtils; |
| 25 | import de.ids_mannheim.korap.utils.KoralCollectionQueryBuilder; |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 26 | import edu.emory.mathcs.backport.java.util.Arrays; |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 27 | |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 28 | /** CollectionRewrite determines which availability field values are |
| 29 | * possible for a user with respect to his mean and location of access. |
| 30 | * |
| 31 | * <br/><br/> |
| 32 | * KorAP differentiates 3 kinds of access: |
| 33 | * <ul> |
| 34 | * <li>FREE: without login</li> |
| 35 | * <li>PUB: login outside IDS network</li> |
| 36 | * <li>ALL: login within IDS network</li> |
| 37 | * </ul> |
| 38 | * |
| 39 | * Each of these accesses corresponds to a regular expression of license |
| 40 | * formats defined in kustvakt.conf. For a given access, only those |
| 41 | * resources whose availability field matches its regular expression |
| 42 | * are allowed to be retrieved. |
| 43 | * |
| 44 | * |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 45 | * @author margaretha |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 46 | * @last-update 21 Nov 2017 |
| 47 | * @see CorpusAccess |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 48 | */ |
| 49 | public class CollectionRewrite implements RewriteTask.RewriteQuery { |
| 50 | |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 51 | private static Logger jlog = |
| 52 | LoggerFactory.getLogger(CollectionRewrite.class); |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 53 | |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 54 | public CollectionRewrite () { |
| 55 | super(); |
| 56 | } |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 57 | |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 58 | |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 59 | private List<String> checkAvailability (JsonNode node, |
| 60 | List<String> originalAvailabilities, |
| 61 | List<String> updatedAvailabilities, boolean isOperationOr) { |
| 62 | try { |
| 63 | jlog.debug(JsonUtils.toJSON(node)); |
| 64 | } |
| 65 | catch (KustvaktException e) { |
| 66 | e.printStackTrace(); |
| 67 | } |
| 68 | |
| 69 | if (node.has("operands")) { |
| 70 | ArrayList<JsonNode> operands = |
| 71 | Lists.newArrayList(node.at("/operands").elements()); |
| 72 | |
| 73 | if (node.at("/operation").asText() |
| 74 | .equals(KoralOperation.AND.toString())) { |
| 75 | for (int i = 0; i < operands.size(); i++) { |
| 76 | updatedAvailabilities = checkAvailability(operands.get(i), |
| 77 | originalAvailabilities, updatedAvailabilities, |
| 78 | false); |
| 79 | if (updatedAvailabilities.isEmpty()) break; |
| 80 | } |
| 81 | } |
| 82 | else { |
| 83 | for (int i = 0; i < operands.size(); i++) { |
| 84 | updatedAvailabilities = checkAvailability(operands.get(i), |
| 85 | originalAvailabilities, updatedAvailabilities, |
| 86 | true); |
| 87 | } |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 88 | } |
| 89 | } |
| 90 | else if (node.has("key") |
| 91 | && node.at("/key").asText().equals("availability")) { |
| 92 | String queryAvailability = node.at("/value").asText(); |
| 93 | String matchOp = node.at("/match").asText(); |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 94 | if (originalAvailabilities.contains(queryAvailability) |
| 95 | && matchOp.equals(KoralMatchOperator.EQUALS.toString())) { |
| 96 | jlog.debug("REMOVE " + queryAvailability); |
| 97 | updatedAvailabilities.remove(queryAvailability); |
| 98 | } |
| 99 | else if (isOperationOr) { |
| 100 | jlog.debug("RESET availabilities"); |
| 101 | updatedAvailabilities.clear(); |
| 102 | updatedAvailabilities.addAll(originalAvailabilities); |
| 103 | return updatedAvailabilities; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 104 | } |
| 105 | } |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 106 | return updatedAvailabilities; |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 107 | } |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 108 | |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 109 | @Override |
| 110 | public JsonNode rewriteQuery (KoralNode node, KustvaktConfiguration config, |
| 111 | User user) throws KustvaktException { |
| 112 | JsonNode jsonNode = node.rawNode(); |
| margaretha | 56e8e55 | 2017-12-05 16:31:21 +0100 | [diff] [blame] | 113 | |
| 114 | FullConfiguration fullConfig = (FullConfiguration) config; |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 115 | |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 116 | List<String> userAvailabilities = new ArrayList<String>(); |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 117 | switch (user.getCorpusAccess()) { |
| 118 | case PUB: |
| margaretha | dfecb4b | 2017-12-12 19:32:30 +0100 | [diff] [blame] | 119 | userAvailabilities.addAll(fullConfig.getFreeRegexList()); |
| 120 | userAvailabilities.addAll(fullConfig.getPublicRegexList()); |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 121 | break; |
| 122 | case ALL: |
| margaretha | dfecb4b | 2017-12-12 19:32:30 +0100 | [diff] [blame] | 123 | userAvailabilities.addAll(fullConfig.getFreeRegexList()); |
| 124 | userAvailabilities.addAll(fullConfig.getPublicRegexList()); |
| 125 | userAvailabilities.addAll(fullConfig.getAllRegexList()); |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 126 | break; |
| 127 | case FREE: |
| margaretha | dfecb4b | 2017-12-12 19:32:30 +0100 | [diff] [blame] | 128 | userAvailabilities.addAll(fullConfig.getFreeRegexList()); |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 129 | break; |
| 130 | } |
| 131 | |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 132 | KoralCollectionQueryBuilder builder = new KoralCollectionQueryBuilder(); |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 133 | RewriteIdentifier identifier = new KoralNode.RewriteIdentifier( |
| 134 | Attributes.AVAILABILITY, user.getCorpusAccess()); |
| 135 | JsonNode rewrittesNode; |
| 136 | |
| 137 | if (jsonNode.has("collection")) { |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 138 | List<String> avalabilityCopy = |
| 139 | new ArrayList<String>(userAvailabilities.size()); |
| 140 | avalabilityCopy.addAll(userAvailabilities); |
| 141 | jlog.debug("Availabilities: " |
| 142 | + Arrays.toString(userAvailabilities.toArray())); |
| 143 | |
| 144 | userAvailabilities = checkAvailability(jsonNode.at("/collection"), |
| 145 | avalabilityCopy, userAvailabilities, false); |
| 146 | if (!userAvailabilities.isEmpty()) { |
| 147 | builder.with(buildAvailability(avalabilityCopy)); |
| margaretha | cfea1ae | 2018-01-15 20:27:26 +0100 | [diff] [blame] | 148 | jlog.debug("corpus query: " +builder.toString()); |
| margaretha | 80b30ce | 2017-06-27 16:28:40 +0200 | [diff] [blame] | 149 | builder.setBaseQuery(builder.toJSON()); |
| 150 | rewrittesNode = builder.mergeWith(jsonNode).at("/collection"); |
| 151 | node.set("collection", rewrittesNode, identifier); |
| margaretha | 3d7d355 | 2017-06-26 17:45:36 +0200 | [diff] [blame] | 152 | } |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 153 | } |
| 154 | else { |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 155 | builder.with(buildAvailability(userAvailabilities)); |
| margaretha | cfea1ae | 2018-01-15 20:27:26 +0100 | [diff] [blame] | 156 | jlog.debug("corpus query: " +builder.toString()); |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 157 | rewrittesNode = |
| 158 | JsonUtils.readTree(builder.toJSON()).at("/collection"); |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 159 | node.set("collection", rewrittesNode, identifier); |
| 160 | } |
| 161 | |
| margaretha | cfea1ae | 2018-01-15 20:27:26 +0100 | [diff] [blame] | 162 | jlog.debug("REWRITES: " + node.at("/collection").toString()); |
| margaretha | a89c3f9 | 2017-05-30 19:02:08 +0200 | [diff] [blame] | 163 | return node.rawNode(); |
| 164 | } |
| margaretha | bb48630 | 2017-11-21 13:47:22 +0100 | [diff] [blame] | 165 | |
| 166 | |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 167 | private String buildAvailability (List<String> userAvailabilities) { |
| 168 | StringBuilder sb = new StringBuilder(); |
| margaretha | ad618d2 | 2017-12-11 19:58:49 +0100 | [diff] [blame] | 169 | for (int i = 0; i < userAvailabilities.size(); i++) { |
| 170 | parseAvailability(sb, userAvailabilities.get(i), "|"); |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 171 | } |
| margaretha | ad618d2 | 2017-12-11 19:58:49 +0100 | [diff] [blame] | 172 | String availabilities = sb.toString(); |
| 173 | return availabilities.substring(0, availabilities.length()-3); |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 174 | } |
| margaretha | ad618d2 | 2017-12-11 19:58:49 +0100 | [diff] [blame] | 175 | |
| 176 | private void parseAvailability (StringBuilder sb, String availability, String operator) { |
| 177 | String uaArr[] = null; |
| 178 | if (availability.contains("|")){ |
| 179 | uaArr = availability.split("\\|"); |
| 180 | for (int j=0; j < uaArr.length; j++){ |
| 181 | parseAvailability(sb, uaArr[j].trim(), "|"); |
| 182 | } |
| 183 | } |
| 184 | // EM: not supported |
| 185 | // else if (availability.contains("&")){ |
| 186 | // uaArr = availability.split("&"); |
| 187 | // for (int j=0; j < uaArr.length -1; j++){ |
| 188 | // parseAvailability(sb, uaArr[j], "&"); |
| 189 | // } |
| 190 | // parseAvailability(sb, uaArr[uaArr.length-1], "|"); |
| 191 | // } |
| 192 | else{ |
| 193 | sb.append("availability=/"); |
| 194 | sb.append(availability); |
| 195 | sb.append("/ "); |
| 196 | sb.append(operator); |
| 197 | sb.append(" "); |
| 198 | } |
| 199 | |
| 200 | } |
| 201 | |
| margaretha | dc73192 | 2017-05-22 17:20:42 +0200 | [diff] [blame] | 202 | } |
| margaretha | 416e787 | 2017-06-20 15:05:17 +0200 | [diff] [blame] | 203 | |