blob: e28a40445164d89c6e7fd28fdd53f827de2c42ee [file] [log] [blame]
Akronc74dee02018-02-07 18:48:30 +01001package de.ids_mannheim.korap.response;
2
3import org.slf4j.Logger;
4import org.slf4j.LoggerFactory;
5
6import com.fasterxml.jackson.annotation.*;
7import com.fasterxml.jackson.annotation.JsonInclude.Include;
8import com.fasterxml.jackson.databind.ObjectMapper;
9import com.fasterxml.jackson.databind.JsonNode;
10import com.fasterxml.jackson.databind.node.ObjectNode;
11import com.fasterxml.jackson.databind.node.ArrayNode;
Akron0c8a5c52018-03-07 20:00:36 +010012
Akronc74dee02018-02-07 18:48:30 +010013import de.ids_mannheim.korap.index.AbstractDocument;
Akron0c8a5c52018-03-07 20:00:36 +010014import de.ids_mannheim.korap.util.KrillDate;
Akronc74dee02018-02-07 18:48:30 +010015
Akron04f00952018-03-06 18:56:54 +010016import java.io.IOException;
17
18import de.ids_mannheim.korap.index.KeywordAnalyzer;
19import org.apache.lucene.analysis.TokenStream;
20import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
21
22import java.io.StringReader;
23
Akronc74dee02018-02-07 18:48:30 +010024import java.util.*;
Akron0c8a5c52018-03-07 20:00:36 +010025import java.util.regex.*;
Akronc74dee02018-02-07 18:48:30 +010026
27import org.apache.lucene.index.*;
28
29@JsonInclude(Include.NON_NULL)
30public class MetaFields extends AbstractDocument {
31
32 // Logger
33 private final static Logger log = LoggerFactory.getLogger(MetaFields.class);
34
35 // This advices the java compiler to ignore all loggings
36 public static final boolean DEBUG = false;
37
Akron0c8a5c52018-03-07 20:00:36 +010038 // TODO:
39 // This is a temporary indicator to check
40 // whether a date field is a date
41 private static final Pattern dateKeyPattern = Pattern.compile(".*Date$");
42
Akronc74dee02018-02-07 18:48:30 +010043 // Mapper for JSON serialization
44 ObjectMapper mapper = new ObjectMapper();
45
46 private Map<String, MetaField> fieldsMap = new HashMap<>();
47
48 public MetaFields (String id) {
49 this.addMessage(0, "Response format is temporary");
50 };
51
52
53 /**
54 * Add field to collection
55 */
56 public void add (IndexableField iField) {
57
58 IndexableFieldType iFieldType = iField.fieldType();
59
60 // Field type needs to be restored heuristically
61 // - though that's not very elegant
62
63 // Ignore non-stored fields
64 if (!iFieldType.stored())
65 return;
66
67 MetaField mf = new MetaField(iField.name());
68
69 // Reuse existing metafield
70 if (fieldsMap.containsKey(mf.key)) {
71 mf = fieldsMap.get(mf.key);
72 }
73
74 // Add new field
75 else {
76 fieldsMap.put(mf.key, mf);
77 };
78
79 // TODO: Check if metaField exists for that field
80
81 Number n = iField.numericValue();
82 String s = iField.stringValue();
83
84 // Field has numeric value (possibly a date)
85 if (n != null) {
Akronc74dee02018-02-07 18:48:30 +010086
Akron0c8a5c52018-03-07 20:00:36 +010087 // Check if key indicates a date
88 Matcher dateMatcher = dateKeyPattern.matcher(mf.key);
89 if (dateMatcher.matches()) {
90 mf.type = "type:date";
91
92 // Check structure with KrillDate
93 KrillDate date = new KrillDate(n.toString());
94 if (date != null) {
95
96 // Serialize withz dash separation
97 mf.values.add(date.toDisplay());
98 };
99 }
100
101 // Field is a number
102 else {
103 mf.type = "type:number";
104 mf.values.add(n.toString());
105 };
Akronc74dee02018-02-07 18:48:30 +0100106 }
107
108 // Field has a textual value
109 else if (s != null) {
Akron1ca674e2018-02-08 16:16:24 +0100110
Akron04f00952018-03-06 18:56:54 +0100111 // Stored
Akronc74dee02018-02-07 18:48:30 +0100112 if (iFieldType.indexOptions() == IndexOptions.NONE) {
Akron1ca674e2018-02-08 16:16:24 +0100113 mf.type = "type:store";
Akron04f00952018-03-06 18:56:54 +0100114 mf.values.add(s.toString());
Akronc74dee02018-02-07 18:48:30 +0100115 }
Akron04f00952018-03-06 18:56:54 +0100116
117 // Keywords
118 else if (iFieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS) {
119 mf.type = "type:keywords";
120
121 // Analyze keywords
122 try {
123 StringReader reader = new StringReader(s.toString());
124 KeywordAnalyzer kwa = new KeywordAnalyzer();
125 TokenStream ts = kwa.tokenStream("-", reader);
126 CharTermAttribute term;
127 ts.reset();
128 while (ts.incrementToken()) {
129 term = ts.getAttribute(CharTermAttribute.class);
130 mf.values.add(term.toString());
131 };
132 ts.close();
133 reader.close();
134 }
135 catch (IOException e) {
136 log.error("Unable to split {}={}", iField.name(), s.toString());
137 }
138 }
139
140 // Text
Akronc74dee02018-02-07 18:48:30 +0100141 else if (iFieldType.indexOptions() != IndexOptions.DOCS) {
Akronc74dee02018-02-07 18:48:30 +0100142 mf.type = "type:text";
Akron04f00952018-03-06 18:56:54 +0100143 mf.values.add(s.toString());
144 }
Akronc74dee02018-02-07 18:48:30 +0100145
Akron04f00952018-03-06 18:56:54 +0100146 // String
147 else {
148 mf.values.add(s.toString());
149 };
Akronc74dee02018-02-07 18:48:30 +0100150 }
151
152 else {
153 log.error("Unknown field type {}", iField.name());
154 };
155 };
156
157
158 /**
159 * Serialize response as a {@link JsonNode}.
160 *
161 * @return {@link JsonNode} representation of the response
162 */
163 public JsonNode toJsonNode () {
164
165 // Get notifications
166 ObjectNode json = (ObjectNode) super.toJsonNode();
167
168 ObjectNode doc = json.putObject("document");
169 doc.put("@type", "koral:document");
170
171 ArrayNode fields = doc.putArray("fields");
172
173 // Iterate over all fields
174 Iterator fIter = fieldsMap.keySet().iterator();
175 while (fIter.hasNext()) {
176 // System.err.println(fIter.next());
177 MetaField mf = fieldsMap.get(fIter.next());
178 // System.err.println(mf.type);
179 fields.add(mf.toJsonNode());
180 };
181
182 return json;
183 };
184};