| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 1 | package de.ids_mannheim.korap.response; |
| 2 | |
| 3 | import org.slf4j.Logger; |
| 4 | import org.slf4j.LoggerFactory; |
| 5 | |
| 6 | import com.fasterxml.jackson.annotation.*; |
| 7 | import com.fasterxml.jackson.annotation.JsonInclude.Include; |
| 8 | import com.fasterxml.jackson.databind.ObjectMapper; |
| 9 | import com.fasterxml.jackson.databind.JsonNode; |
| 10 | import com.fasterxml.jackson.databind.node.ObjectNode; |
| 11 | import com.fasterxml.jackson.databind.node.ArrayNode; |
| Akron | 0c8a5c5 | 2018-03-07 20:00:36 +0100 | [diff] [blame] | 12 | |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 13 | import de.ids_mannheim.korap.index.AbstractDocument; |
| Akron | 0c8a5c5 | 2018-03-07 20:00:36 +0100 | [diff] [blame] | 14 | import de.ids_mannheim.korap.util.KrillDate; |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 15 | |
| Akron | 04f0095 | 2018-03-06 18:56:54 +0100 | [diff] [blame] | 16 | import java.io.IOException; |
| 17 | |
| 18 | import de.ids_mannheim.korap.index.KeywordAnalyzer; |
| 19 | import org.apache.lucene.analysis.TokenStream; |
| 20 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| 21 | |
| 22 | import java.io.StringReader; |
| 23 | |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 24 | import java.util.*; |
| Akron | 0c8a5c5 | 2018-03-07 20:00:36 +0100 | [diff] [blame] | 25 | import java.util.regex.*; |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 26 | |
| 27 | import org.apache.lucene.index.*; |
| 28 | |
| 29 | @JsonInclude(Include.NON_NULL) |
| 30 | public class MetaFields extends AbstractDocument { |
| 31 | |
| 32 | // Logger |
| 33 | private final static Logger log = LoggerFactory.getLogger(MetaFields.class); |
| 34 | |
| 35 | // This advices the java compiler to ignore all loggings |
| 36 | public static final boolean DEBUG = false; |
| 37 | |
| Akron | 0c8a5c5 | 2018-03-07 20:00:36 +0100 | [diff] [blame] | 38 | // TODO: |
| 39 | // This is a temporary indicator to check |
| 40 | // whether a date field is a date |
| 41 | private static final Pattern dateKeyPattern = Pattern.compile(".*Date$"); |
| 42 | |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 43 | // Mapper for JSON serialization |
| 44 | ObjectMapper mapper = new ObjectMapper(); |
| 45 | |
| 46 | private Map<String, MetaField> fieldsMap = new HashMap<>(); |
| 47 | |
| 48 | public MetaFields (String id) { |
| 49 | this.addMessage(0, "Response format is temporary"); |
| 50 | }; |
| 51 | |
| 52 | |
| 53 | /** |
| 54 | * Add field to collection |
| 55 | */ |
| 56 | public void add (IndexableField iField) { |
| 57 | |
| 58 | IndexableFieldType iFieldType = iField.fieldType(); |
| 59 | |
| 60 | // Field type needs to be restored heuristically |
| 61 | // - though that's not very elegant |
| 62 | |
| 63 | // Ignore non-stored fields |
| 64 | if (!iFieldType.stored()) |
| 65 | return; |
| 66 | |
| 67 | MetaField mf = new MetaField(iField.name()); |
| 68 | |
| 69 | // Reuse existing metafield |
| 70 | if (fieldsMap.containsKey(mf.key)) { |
| 71 | mf = fieldsMap.get(mf.key); |
| 72 | } |
| 73 | |
| 74 | // Add new field |
| 75 | else { |
| 76 | fieldsMap.put(mf.key, mf); |
| 77 | }; |
| 78 | |
| 79 | // TODO: Check if metaField exists for that field |
| 80 | |
| 81 | Number n = iField.numericValue(); |
| 82 | String s = iField.stringValue(); |
| 83 | |
| 84 | // Field has numeric value (possibly a date) |
| 85 | if (n != null) { |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 86 | |
| Akron | 0c8a5c5 | 2018-03-07 20:00:36 +0100 | [diff] [blame] | 87 | // Check if key indicates a date |
| 88 | Matcher dateMatcher = dateKeyPattern.matcher(mf.key); |
| 89 | if (dateMatcher.matches()) { |
| 90 | mf.type = "type:date"; |
| 91 | |
| 92 | // Check structure with KrillDate |
| 93 | KrillDate date = new KrillDate(n.toString()); |
| 94 | if (date != null) { |
| 95 | |
| 96 | // Serialize withz dash separation |
| 97 | mf.values.add(date.toDisplay()); |
| 98 | }; |
| 99 | } |
| 100 | |
| 101 | // Field is a number |
| 102 | else { |
| 103 | mf.type = "type:number"; |
| 104 | mf.values.add(n.toString()); |
| 105 | }; |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 106 | } |
| 107 | |
| 108 | // Field has a textual value |
| 109 | else if (s != null) { |
| Akron | 1ca674e | 2018-02-08 16:16:24 +0100 | [diff] [blame] | 110 | |
| Akron | 04f0095 | 2018-03-06 18:56:54 +0100 | [diff] [blame] | 111 | // Stored |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 112 | if (iFieldType.indexOptions() == IndexOptions.NONE) { |
| Akron | 1ca674e | 2018-02-08 16:16:24 +0100 | [diff] [blame] | 113 | mf.type = "type:store"; |
| Akron | 04f0095 | 2018-03-06 18:56:54 +0100 | [diff] [blame] | 114 | mf.values.add(s.toString()); |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 115 | } |
| Akron | 04f0095 | 2018-03-06 18:56:54 +0100 | [diff] [blame] | 116 | |
| 117 | // Keywords |
| 118 | else if (iFieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS) { |
| 119 | mf.type = "type:keywords"; |
| 120 | |
| 121 | // Analyze keywords |
| 122 | try { |
| 123 | StringReader reader = new StringReader(s.toString()); |
| 124 | KeywordAnalyzer kwa = new KeywordAnalyzer(); |
| 125 | TokenStream ts = kwa.tokenStream("-", reader); |
| 126 | CharTermAttribute term; |
| 127 | ts.reset(); |
| 128 | while (ts.incrementToken()) { |
| 129 | term = ts.getAttribute(CharTermAttribute.class); |
| 130 | mf.values.add(term.toString()); |
| 131 | }; |
| 132 | ts.close(); |
| 133 | reader.close(); |
| 134 | } |
| 135 | catch (IOException e) { |
| 136 | log.error("Unable to split {}={}", iField.name(), s.toString()); |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | // Text |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 141 | else if (iFieldType.indexOptions() != IndexOptions.DOCS) { |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 142 | mf.type = "type:text"; |
| Akron | 04f0095 | 2018-03-06 18:56:54 +0100 | [diff] [blame] | 143 | mf.values.add(s.toString()); |
| 144 | } |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 145 | |
| Akron | 04f0095 | 2018-03-06 18:56:54 +0100 | [diff] [blame] | 146 | // String |
| 147 | else { |
| 148 | mf.values.add(s.toString()); |
| 149 | }; |
| Akron | c74dee0 | 2018-02-07 18:48:30 +0100 | [diff] [blame] | 150 | } |
| 151 | |
| 152 | else { |
| 153 | log.error("Unknown field type {}", iField.name()); |
| 154 | }; |
| 155 | }; |
| 156 | |
| 157 | |
| 158 | /** |
| 159 | * Serialize response as a {@link JsonNode}. |
| 160 | * |
| 161 | * @return {@link JsonNode} representation of the response |
| 162 | */ |
| 163 | public JsonNode toJsonNode () { |
| 164 | |
| 165 | // Get notifications |
| 166 | ObjectNode json = (ObjectNode) super.toJsonNode(); |
| 167 | |
| 168 | ObjectNode doc = json.putObject("document"); |
| 169 | doc.put("@type", "koral:document"); |
| 170 | |
| 171 | ArrayNode fields = doc.putArray("fields"); |
| 172 | |
| 173 | // Iterate over all fields |
| 174 | Iterator fIter = fieldsMap.keySet().iterator(); |
| 175 | while (fIter.hasNext()) { |
| 176 | // System.err.println(fIter.next()); |
| 177 | MetaField mf = fieldsMap.get(fIter.next()); |
| 178 | // System.err.println(mf.type); |
| 179 | fields.add(mf.toJsonNode()); |
| 180 | }; |
| 181 | |
| 182 | return json; |
| 183 | }; |
| 184 | }; |