blob: 86607fd7debe141be577298af75dd2f1fb0007d0 [file] [log] [blame]
Akronc74dee02018-02-07 18:48:30 +01001package de.ids_mannheim.korap.response;
2
3import org.slf4j.Logger;
4import org.slf4j.LoggerFactory;
5
6import com.fasterxml.jackson.annotation.*;
7import com.fasterxml.jackson.annotation.JsonInclude.Include;
8import com.fasterxml.jackson.databind.ObjectMapper;
9import com.fasterxml.jackson.databind.JsonNode;
10import com.fasterxml.jackson.databind.node.ObjectNode;
11import com.fasterxml.jackson.databind.node.ArrayNode;
Akron0c8a5c52018-03-07 20:00:36 +010012
Akronc74dee02018-02-07 18:48:30 +010013import de.ids_mannheim.korap.index.AbstractDocument;
Akron0c8a5c52018-03-07 20:00:36 +010014import de.ids_mannheim.korap.util.KrillDate;
Akronc74dee02018-02-07 18:48:30 +010015
Akron04f00952018-03-06 18:56:54 +010016import java.io.IOException;
17
18import de.ids_mannheim.korap.index.KeywordAnalyzer;
19import org.apache.lucene.analysis.TokenStream;
20import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
21
22import java.io.StringReader;
23
Akronc74dee02018-02-07 18:48:30 +010024import java.util.*;
Akron0c8a5c52018-03-07 20:00:36 +010025import java.util.regex.*;
Akronc74dee02018-02-07 18:48:30 +010026
27import org.apache.lucene.index.*;
28
29@JsonInclude(Include.NON_NULL)
30public class MetaFields extends AbstractDocument {
31
32 // Logger
33 private final static Logger log = LoggerFactory.getLogger(MetaFields.class);
34
35 // This advices the java compiler to ignore all loggings
36 public static final boolean DEBUG = false;
37
Akron0c8a5c52018-03-07 20:00:36 +010038 // TODO:
39 // This is a temporary indicator to check
40 // whether a date field is a date
41 private static final Pattern dateKeyPattern = Pattern.compile(".*Date$");
42
Akronc74dee02018-02-07 18:48:30 +010043 // Mapper for JSON serialization
44 ObjectMapper mapper = new ObjectMapper();
45
46 private Map<String, MetaField> fieldsMap = new HashMap<>();
47
48 public MetaFields (String id) {
49 this.addMessage(0, "Response format is temporary");
50 };
51
52
53 /**
54 * Add field to collection
55 */
56 public void add (IndexableField iField) {
57
58 IndexableFieldType iFieldType = iField.fieldType();
59
60 // Field type needs to be restored heuristically
61 // - though that's not very elegant
62
63 // Ignore non-stored fields
64 if (!iFieldType.stored())
65 return;
66
67 MetaField mf = new MetaField(iField.name());
68
69 // Reuse existing metafield
70 if (fieldsMap.containsKey(mf.key)) {
71 mf = fieldsMap.get(mf.key);
72 }
73
74 // Add new field
75 else {
76 fieldsMap.put(mf.key, mf);
77 };
78
79 // TODO: Check if metaField exists for that field
80
81 Number n = iField.numericValue();
82 String s = iField.stringValue();
83
84 // Field has numeric value (possibly a date)
85 if (n != null) {
Akronc74dee02018-02-07 18:48:30 +010086
Akron0c8a5c52018-03-07 20:00:36 +010087 // Check if key indicates a date
88 Matcher dateMatcher = dateKeyPattern.matcher(mf.key);
89 if (dateMatcher.matches()) {
90 mf.type = "type:date";
91
92 // Check structure with KrillDate
93 KrillDate date = new KrillDate(n.toString());
94 if (date != null) {
95
96 // Serialize withz dash separation
97 mf.values.add(date.toDisplay());
98 };
99 }
100
101 // Field is a number
102 else {
103 mf.type = "type:number";
104 mf.values.add(n.toString());
105 };
Akronc74dee02018-02-07 18:48:30 +0100106 }
107
108 // Field has a textual value
109 else if (s != null) {
Akron1ca674e2018-02-08 16:16:24 +0100110
Akron04f00952018-03-06 18:56:54 +0100111 // Stored
Akronc74dee02018-02-07 18:48:30 +0100112 if (iFieldType.indexOptions() == IndexOptions.NONE) {
Akron8bb3bc32018-12-12 19:34:56 +0100113 String value = s.toString();
114 if (value.startsWith("data:")) {
115 mf.type = "type:attachement";
116 }
117 else {
118 mf.type = "type:store";
119 };
120 mf.values.add(value);
Akronc74dee02018-02-07 18:48:30 +0100121 }
Akron04f00952018-03-06 18:56:54 +0100122
123 // Keywords
124 else if (iFieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS) {
125 mf.type = "type:keywords";
126
127 // Analyze keywords
128 try {
129 StringReader reader = new StringReader(s.toString());
130 KeywordAnalyzer kwa = new KeywordAnalyzer();
131 TokenStream ts = kwa.tokenStream("-", reader);
132 CharTermAttribute term;
133 ts.reset();
134 while (ts.incrementToken()) {
135 term = ts.getAttribute(CharTermAttribute.class);
136 mf.values.add(term.toString());
137 };
138 ts.close();
139 reader.close();
140 }
141 catch (IOException e) {
142 log.error("Unable to split {}={}", iField.name(), s.toString());
143 }
144 }
145
146 // Text
Akronc74dee02018-02-07 18:48:30 +0100147 else if (iFieldType.indexOptions() != IndexOptions.DOCS) {
Akronc74dee02018-02-07 18:48:30 +0100148 mf.type = "type:text";
Akron04f00952018-03-06 18:56:54 +0100149 mf.values.add(s.toString());
150 }
Akronc74dee02018-02-07 18:48:30 +0100151
Akron04f00952018-03-06 18:56:54 +0100152 // String
153 else {
154 mf.values.add(s.toString());
155 };
Akronc74dee02018-02-07 18:48:30 +0100156 }
157
158 else {
159 log.error("Unknown field type {}", iField.name());
160 };
161 };
162
163
164 /**
165 * Serialize response as a {@link JsonNode}.
166 *
167 * @return {@link JsonNode} representation of the response
168 */
169 public JsonNode toJsonNode () {
170
171 // Get notifications
172 ObjectNode json = (ObjectNode) super.toJsonNode();
173
174 ObjectNode doc = json.putObject("document");
175 doc.put("@type", "koral:document");
176
177 ArrayNode fields = doc.putArray("fields");
178
179 // Iterate over all fields
180 Iterator fIter = fieldsMap.keySet().iterator();
181 while (fIter.hasNext()) {
182 // System.err.println(fIter.next());
183 MetaField mf = fieldsMap.get(fIter.next());
184 // System.err.println(mf.type);
185 fields.add(mf.toJsonNode());
186 };
187
188 return json;
189 };
190};