| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| Joachim Bingel | 6003b85 | 2014-12-18 14:20:55 +0000 | [diff] [blame] | 3 | import de.ids_mannheim.korap.query.serialize.util.QueryException; |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 4 | import org.z3950.zing.cql.*; |
| 5 | |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 6 | import java.io.IOException; |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 7 | import java.util.ArrayList; |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 8 | import java.util.LinkedHashMap; |
| 9 | import java.util.List; |
| 10 | import java.util.Map; |
| 11 | |
| Eliza Margaretha | 724be11 | 2014-04-01 11:13:40 +0000 | [diff] [blame] | 12 | /** |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 13 | * @author margaretha |
| Eliza Margaretha | f267d4a | 2014-05-09 11:50:23 +0000 | [diff] [blame] | 14 | * @date 09.05.14 |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 15 | */ |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 16 | public class CQLTree extends AbstractSyntaxTree { |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 17 | |
| 18 | private static final String VERSION_1_1 = "1.1"; |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 19 | private static final String VERSION_1_2 = "1.2"; |
| 20 | private static final String INDEX_CQL_SERVERCHOICE = "cql.serverChoice"; |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 21 | private static final String INDEX_WORDS = "words"; |
| 22 | private static final String TERM_RELATION_CQL_1_1 = "scr"; |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 23 | private static final String TERM_RELATION_CQL_1_2 = "="; |
| 24 | private static final String SUPPORTED_RELATION_EXACT = "exact"; // not in the doc |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 25 | private static final String OPERATION_OR = "operation:or"; |
| 26 | private static final String OPERATION_SEQUENCE = "operation:sequence"; |
| Eliza Margaretha | 9544f58 | 2014-05-09 15:22:42 +0000 | [diff] [blame] | 27 | private static final String OPERATION_POSITION = "operation:position"; |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 28 | private static final String KORAP_CONTEXT = "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld"; |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 29 | |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 30 | private LinkedHashMap<String, Object> requestMap; |
| 31 | private String version; |
| 32 | private boolean isCaseSensitive; // default true |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 33 | |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 34 | public CQLTree(String query) throws QueryException { |
| 35 | this(query, VERSION_1_2, true); |
| 36 | } |
| 37 | |
| 38 | public CQLTree(String query, String version) throws QueryException { |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 39 | this(query, version, true); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 40 | } |
| 41 | |
| 42 | public CQLTree(String query, String version, boolean isCaseSensitive) throws QueryException { |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 43 | this.version = version; |
| 44 | this.isCaseSensitive = isCaseSensitive; |
| Michael Hanl | ab3e931 | 2014-04-01 18:52:21 +0000 | [diff] [blame] | 45 | this.requestMap = new LinkedHashMap<>(); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 46 | requestMap.put("@context", KORAP_CONTEXT); |
| 47 | process(query); |
| 48 | } |
| 49 | |
| 50 | |
| 51 | @Override |
| 52 | public Map<String, Object> getRequestMap() { |
| 53 | return this.requestMap; |
| 54 | } |
| 55 | |
| 56 | @Override |
| Eliza Margaretha | 4679357 | 2014-04-02 15:11:25 +0000 | [diff] [blame] | 57 | public void process(String query) throws QueryException { |
| 58 | if ((query == null) || query.isEmpty()) |
| Eliza Margaretha | 6e8e1bd | 2014-09-22 15:27:06 +0000 | [diff] [blame] | 59 | throw new QueryException(301, "SRU diagnostic 27: An empty query is unsupported."); |
| Eliza Margaretha | 4679357 | 2014-04-02 15:11:25 +0000 | [diff] [blame] | 60 | |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 61 | CQLNode cqlNode = parseQuerytoCQLNode(query); |
| Eliza Margaretha | d5eafca | 2014-05-12 13:24:57 +0000 | [diff] [blame] | 62 | Map<String,Object> queryMap = parseCQLNode(cqlNode); |
| 63 | requestMap.put("query", queryMap); |
| 64 | //requestMap.put("query", sentenceWrapper(queryMap)); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 65 | } |
| 66 | |
| Eliza Margaretha | 9544f58 | 2014-05-09 15:22:42 +0000 | [diff] [blame] | 67 | private Map<String,Object> sentenceWrapper(Map<String,Object> m){ |
| 68 | Map<String, Object> map = new LinkedHashMap<String,Object>(); |
| 69 | map.put("@type", "korap:group"); |
| 70 | map.put("operation", OPERATION_POSITION); |
| 71 | map.put("frame", "frame:contains"); |
| 72 | |
| 73 | Map<String, Object> sentence = new LinkedHashMap<String,Object>(); |
| 74 | sentence.put("@type", "korap:span"); |
| 75 | sentence.put("key", "s"); |
| 76 | |
| 77 | List<Map<String, Object>> list = new ArrayList<Map<String, Object>>(); |
| 78 | list.add(sentence); |
| 79 | list.add(m); |
| 80 | map.put("operands", list); |
| 81 | |
| 82 | return map; |
| 83 | } |
| 84 | |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 85 | private CQLNode parseQuerytoCQLNode(String query) throws QueryException { |
| 86 | try { |
| 87 | int compat = -1; |
| 88 | switch (version) { |
| 89 | case VERSION_1_1: |
| 90 | compat = CQLParser.V1POINT1; |
| 91 | break; |
| 92 | case VERSION_1_2: |
| 93 | compat = CQLParser.V1POINT2; |
| 94 | } |
| 95 | return new CQLParser(compat).parse(query); |
| 96 | |
| 97 | } catch (CQLParseException | IOException e) { |
| 98 | throw new QueryException("Error parsing CQL"); |
| 99 | } |
| 100 | } |
| 101 | |
| Eliza Margaretha | 9544f58 | 2014-05-09 15:22:42 +0000 | [diff] [blame] | 102 | private Map<String,Object> parseCQLNode(CQLNode node) throws QueryException { |
| 103 | |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 104 | if (node instanceof CQLTermNode) { |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 105 | return parseTermNode((CQLTermNode) node); |
| Eliza Margaretha | 95ccd41 | 2014-10-01 10:29:56 +0000 | [diff] [blame] | 106 | } else if (node instanceof CQLAndNode) { |
| 107 | return parseAndNode((CQLAndNode) node); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 108 | } else if (node instanceof CQLOrNode) { |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 109 | return parseOrNode((CQLOrNode) node); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 110 | } else { |
| Eliza Margaretha | 6e8e1bd | 2014-09-22 15:27:06 +0000 | [diff] [blame] | 111 | throw new QueryException(105, "SRU diagnostic 48: Only basic search including term-only " + |
| Eliza Margaretha | 95ccd41 | 2014-10-01 10:29:56 +0000 | [diff] [blame] | 112 | "and boolean (AND,OR) operator queries are currently supported."); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 113 | } |
| 114 | } |
| 115 | |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 116 | private Map<String,Object> parseTermNode(CQLTermNode node) throws QueryException { |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 117 | checkTermNode(node); |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 118 | final String term = node.getTerm(); |
| 119 | if ((term == null) || term.isEmpty()) { |
| Eliza Margaretha | 6e8e1bd | 2014-09-22 15:27:06 +0000 | [diff] [blame] | 120 | throw new QueryException(301, "SRU diagnostic 27: An empty term is unsupported."); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 121 | } else if (term.contains(" ")) { |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 122 | return writeSequence(term); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 123 | } else { |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 124 | return writeTerm(term); |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 125 | } |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 126 | } |
| 127 | |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 128 | private Map<String,Object> parseAndNode(CQLAndNode node) throws QueryException { |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 129 | checkBooleanModifier(node); |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 130 | |
| 131 | Map<String, Object> map = new LinkedHashMap<String,Object>(); |
| 132 | map.put("@type", "korap:group"); |
| 133 | map.put("operation", OPERATION_SEQUENCE); |
| Eliza Margaretha | 95ccd41 | 2014-10-01 10:29:56 +0000 | [diff] [blame] | 134 | map.put("inOrder", false); |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 135 | |
| 136 | List<Map<String, Object>> list = new ArrayList<Map<String, Object>>(); |
| 137 | Map<String, Object> distanceMap = new LinkedHashMap<String,Object>(); |
| 138 | distanceMap.put("@type", "korap:distance"); |
| Eliza Margaretha | 95ccd41 | 2014-10-01 10:29:56 +0000 | [diff] [blame] | 139 | distanceMap.put("key", "s"); |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 140 | distanceMap.put("min", "0"); |
| 141 | distanceMap.put("max", "0"); |
| 142 | list.add(distanceMap); |
| 143 | map.put("distances", list); |
| 144 | |
| 145 | List<Map<String, Object>> operandList = new ArrayList<Map<String, Object>>(); |
| 146 | operandList.add(parseCQLNode(node.getLeftOperand())); |
| 147 | operandList.add(parseCQLNode(node.getRightOperand())); |
| 148 | map.put("operands", operandList); |
| 149 | |
| 150 | return map; |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 151 | } |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 152 | |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 153 | private Map<String,Object> parseOrNode(CQLOrNode node) throws QueryException { |
| 154 | checkBooleanModifier(node); |
| 155 | |
| 156 | Map<String, Object> map = new LinkedHashMap<String,Object>(); |
| 157 | map.put("@type", "korap:group"); |
| 158 | map.put("operation", OPERATION_OR); |
| 159 | |
| 160 | List<Map<String, Object>> list = new ArrayList<Map<String, Object>>(); |
| 161 | list.add(parseCQLNode(node.getLeftOperand())); |
| 162 | list.add(parseCQLNode(node.getRightOperand())); |
| 163 | map.put("operands", list); |
| 164 | |
| 165 | return map; |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 166 | } |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 167 | |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 168 | private Map<String, Object> writeSequence(String str) { |
| 169 | Map<String, Object> sequenceMap = new LinkedHashMap<String,Object>(); |
| 170 | sequenceMap.put("@type", "korap:group"); |
| 171 | sequenceMap.put("operation", OPERATION_SEQUENCE); |
| 172 | |
| 173 | List<Map<String, Object>> termList = new ArrayList<Map<String, Object>>(); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 174 | String[] terms = str.split(" "); |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 175 | for (String term : terms){ |
| 176 | termList.add(writeTerm(term)); |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 177 | } |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 178 | sequenceMap.put("operands", termList); |
| 179 | |
| 180 | return sequenceMap; |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 181 | } |
| 182 | |
| Eliza Margaretha | 6051482 | 2014-04-23 11:32:43 +0000 | [diff] [blame] | 183 | private Map<String, Object> writeTerm(String term) { |
| 184 | Map<String, Object> map = new LinkedHashMap<String,Object>(); |
| 185 | map.put("@type", "korap:term"); |
| 186 | if (!isCaseSensitive) { |
| 187 | map.put("caseInsensitive","true"); |
| 188 | } |
| 189 | map.put("key", term); |
| 190 | map.put("layer", "orth"); |
| 191 | map.put("match", "match:eq"); |
| 192 | |
| 193 | Map<String, Object> tokenMap = new LinkedHashMap<String,Object>(); |
| 194 | tokenMap.put("@type", "korap:token"); |
| 195 | tokenMap.put("wrap", map); |
| 196 | return tokenMap; |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 197 | } |
| 198 | |
| 199 | private void checkBooleanModifier(CQLBooleanNode node) throws QueryException { |
| 200 | List<Modifier> modifiers = node.getModifiers(); |
| 201 | if ((modifiers != null) && !modifiers.isEmpty()) { |
| 202 | Modifier modifier = modifiers.get(0); |
| Eliza Margaretha | 6e8e1bd | 2014-09-22 15:27:06 +0000 | [diff] [blame] | 203 | throw new QueryException(105, "SRU diagnostic 20: Relation modifier " + |
| Michael Hanl | ab3e931 | 2014-04-01 18:52:21 +0000 | [diff] [blame] | 204 | modifier.toCQL() + " is not supported."); |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 205 | } |
| 206 | } |
| 207 | |
| 208 | private void checkTermNode(CQLTermNode node) throws QueryException { |
| 209 | // only allow "cql.serverChoice" and "words" index |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 210 | if (!(INDEX_CQL_SERVERCHOICE.equals(node.getIndex()) || |
| 211 | INDEX_WORDS.equals(node.getIndex()))) { |
| Eliza Margaretha | 6e8e1bd | 2014-09-22 15:27:06 +0000 | [diff] [blame] | 212 | throw new QueryException(105, "SRU diagnostic 16: Index " + node.getIndex() + " is not supported."); |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 213 | } |
| 214 | // only allow "=" relation without any modifiers |
| 215 | CQLRelation relation = node.getRelation(); |
| 216 | String baseRel = relation.getBase(); |
| 217 | if (!(TERM_RELATION_CQL_1_1.equals(baseRel) || |
| 218 | TERM_RELATION_CQL_1_2.equals(baseRel) || |
| 219 | SUPPORTED_RELATION_EXACT.equals(baseRel))) { |
| Eliza Margaretha | 6e8e1bd | 2014-09-22 15:27:06 +0000 | [diff] [blame] | 220 | throw new QueryException(105, "SRU diagnostic 19: Relation " + |
| Michael Hanl | ab3e931 | 2014-04-01 18:52:21 +0000 | [diff] [blame] | 221 | relation.getBase() + " is not supported."); |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 222 | } |
| 223 | List<Modifier> modifiers = relation.getModifiers(); |
| 224 | if ((modifiers != null) && !modifiers.isEmpty()) { |
| 225 | Modifier modifier = modifiers.get(0); |
| Eliza Margaretha | 6e8e1bd | 2014-09-22 15:27:06 +0000 | [diff] [blame] | 226 | throw new QueryException(105, "SRU diagnostic 20: Relation modifier " + |
| Michael Hanl | ab3e931 | 2014-04-01 18:52:21 +0000 | [diff] [blame] | 227 | modifier.getValue() + " is not supported."); |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 228 | } |
| Michael Hanl | 2a30f42 | 2014-04-01 16:41:44 +0000 | [diff] [blame] | 229 | } |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 230 | |
| Eliza Margaretha | 39db1ab | 2014-04-01 10:39:54 +0000 | [diff] [blame] | 231 | } |