blob: da8124c7ee7f25a392595c605e3d870860dfe805 [file] [log] [blame]
Akrone64cc162019-01-08 18:40:37 +01001package de.ids_mannheim.korap.response;
2
3import org.slf4j.Logger;
4import org.slf4j.LoggerFactory;
5
6import com.fasterxml.jackson.annotation.*;
7import com.fasterxml.jackson.annotation.JsonInclude.Include;
8import com.fasterxml.jackson.databind.ObjectMapper;
9import com.fasterxml.jackson.databind.JsonNode;
10import com.fasterxml.jackson.databind.node.ObjectNode;
11import com.fasterxml.jackson.databind.node.ArrayNode;
12
13import de.ids_mannheim.korap.index.AbstractDocument;
14import de.ids_mannheim.korap.util.KrillDate;
15
16import java.io.IOException;
17
18import de.ids_mannheim.korap.index.KeywordAnalyzer;
19import org.apache.lucene.analysis.TokenStream;
20import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
21
22import java.io.StringReader;
23
24import java.util.*;
25import java.util.regex.*;
26
27import org.apache.lucene.index.*;
Akronc7a2abc2019-01-17 14:21:34 +010028import org.apache.lucene.document.FieldType;
Akrone64cc162019-01-08 18:40:37 +010029
Akron50e5f612019-01-16 12:52:39 +010030public class MetaFieldsObj implements Iterable<MetaField> {
Akrone64cc162019-01-08 18:40:37 +010031
32 // Logger
33 private final static Logger log = LoggerFactory.getLogger(MetaFields.class);
34
35 // This advices the java compiler to ignore all loggings
36 public static final boolean DEBUG = false;
37
Akronc7a2abc2019-01-17 14:21:34 +010038 private static final Pattern dateValuePattern = Pattern.compile("^([0-9]{8})$");
Akrone64cc162019-01-08 18:40:37 +010039
40 // Mapper for JSON serialization
41 ObjectMapper mapper = new ObjectMapper();
42
Akron1a8bb762019-01-18 15:48:59 +010043 public List<String> fieldsOrder;
44
Akrone64cc162019-01-08 18:40:37 +010045 private Map<String, MetaField> fieldsMap = new HashMap<>();
46
47
Akron50e5f612019-01-16 12:52:39 +010048 public MetaFieldsObj () {};
Akrone64cc162019-01-08 18:40:37 +010049
50
51 /**
52 * Add field to collection
53 */
Akron2b921a62019-01-14 18:52:45 +010054 public MetaField add (IndexableField iField) {
Akron4376e742019-01-16 15:02:30 +010055 return this.add(
56 metaFieldFromIndexableField(
57 iField,
Akron1a8bb762019-01-18 15:48:59 +010058 new MetaField(iField.name(), "type:string")
Akron4376e742019-01-16 15:02:30 +010059 )
60 );
Akron2b921a62019-01-14 18:52:45 +010061 };
62
63
64 /**
65 * Add field to collection
66 */
67 public MetaField add (MetaField mf) {
68 // Ignore non-stored fields
69 if (mf == null)
70 return null;
71
Akronde4f0852019-01-16 16:29:44 +010072 if (fieldsMap.containsKey(mf.key)) {
73 fieldsMap.get(mf.key).values.addAll(mf.values);
74 return fieldsMap.get(mf.key);
75 };
76
Akron2b921a62019-01-14 18:52:45 +010077 fieldsMap.put(mf.key, mf);
78 return mf;
79 };
80
Akron1a8bb762019-01-18 15:48:59 +010081
Akron2b921a62019-01-14 18:52:45 +010082 // Field type needs to be restored heuristically
83 // - though that's not very elegant
Akron4376e742019-01-16 15:02:30 +010084 public static MetaField metaFieldFromIndexableField (IndexableField iField, MetaField mf) {
Akrone64cc162019-01-08 18:40:37 +010085 IndexableFieldType iFieldType = iField.fieldType();
86
87 // Field type needs to be restored heuristically
88 // - though that's not very elegant
89
90 // Ignore non-stored fields
91 if (!iFieldType.stored())
Akron2b921a62019-01-14 18:52:45 +010092 return null;
Akrone64cc162019-01-08 18:40:37 +010093
94 // TODO: Check if metaField exists for that field
95
96 Number n = iField.numericValue();
97 String s = iField.stringValue();
98
99 // Field has numeric value (possibly a date)
100 if (n != null) {
101
102 // Check if key indicates a date
Akronc7a2abc2019-01-17 14:21:34 +0100103 Matcher dateMatcher = dateValuePattern.matcher(n.toString());
104 if (dateMatcher.matches()) {
Akron2b921a62019-01-14 18:52:45 +0100105 mf.type = "type:date";
Akronc7a2abc2019-01-17 14:21:34 +0100106
Akron2b921a62019-01-14 18:52:45 +0100107 KrillDate date = new KrillDate(n.toString());
Akrone64cc162019-01-08 18:40:37 +0100108 if (date != null) {
109
110 // Serialize withz dash separation
111 mf.values.add(date.toDisplay());
112 };
Akron2b921a62019-01-14 18:52:45 +0100113 }
Akrone64cc162019-01-08 18:40:37 +0100114
115 // Field is a number
116 else {
Akron50e5f612019-01-16 12:52:39 +0100117 mf.type = "type:integer";
Akrone96f9fe2023-08-28 11:36:12 +0200118 mf.values.add(Integer.valueOf(n.intValue()).toString());
Akrone64cc162019-01-08 18:40:37 +0100119 };
120 }
121
122 // Field has a textual value
123 else if (s != null) {
124
Akron2b921a62019-01-14 18:52:45 +0100125 // Stored
Akrone64cc162019-01-08 18:40:37 +0100126 if (iFieldType.indexOptions() == IndexOptions.NONE) {
Akron2b921a62019-01-14 18:52:45 +0100127
Akrone64cc162019-01-08 18:40:37 +0100128 String value = s.toString();
129 if (value.startsWith("data:")) {
130 mf.type = "type:attachement";
131 }
132 else {
133 mf.type = "type:store";
134 };
135 mf.values.add(value);
Akron2b921a62019-01-14 18:52:45 +0100136 return mf;
Akrone64cc162019-01-08 18:40:37 +0100137 }
138
139 // Keywords
140 else if (iFieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS) {
141 mf.type = "type:keywords";
142
143 // Analyze keywords
144 try {
145 StringReader reader = new StringReader(s.toString());
146 KeywordAnalyzer kwa = new KeywordAnalyzer();
147 TokenStream ts = kwa.tokenStream("-", reader);
148 CharTermAttribute term;
149 ts.reset();
150 while (ts.incrementToken()) {
151 term = ts.getAttribute(CharTermAttribute.class);
152 mf.values.add(term.toString());
153 };
154 ts.close();
155 reader.close();
156 }
157 catch (IOException e) {
158 log.error("Unable to split {}={}", iField.name(), s.toString());
159 }
160 }
161
162 // Text
163 else if (iFieldType.indexOptions() != IndexOptions.DOCS) {
164 mf.type = "type:text";
165 mf.values.add(s.toString());
166 }
167
Akron2b921a62019-01-14 18:52:45 +0100168 // Special treatment for legacy indices
169 else if (mf.key.equals("UID")) {
170 mf.type = "type:integer";
171 mf.values.add(s.toString());
172 }
173
Akrone64cc162019-01-08 18:40:37 +0100174 // String
175 else {
176 mf.values.add(s.toString());
177 };
178 }
179
180 else {
181 log.error("Unknown field type {}", iField.name());
182 };
Akrone64cc162019-01-08 18:40:37 +0100183
Akron2b921a62019-01-14 18:52:45 +0100184 mf.values.removeAll(Collections.singleton(null));
Akrone64cc162019-01-08 18:40:37 +0100185
Akron2b921a62019-01-14 18:52:45 +0100186 return mf;
Akrone64cc162019-01-08 18:40:37 +0100187 };
188
189
190 /**
191 * Get field from collection
Akron32b95192019-01-11 13:58:55 +0100192 *
193 * @param key
194 * The key of the field
Akrone64cc162019-01-08 18:40:37 +0100195 */
196 public MetaField get (String key) {
197 return fieldsMap.get(key);
198 };
Akron32b95192019-01-11 13:58:55 +0100199
200
201 /**
202 * Check for field existence.
203 *
204 * @param key
205 * The key of the field
206 */
207 public Boolean contains (String key) {
208 return fieldsMap.containsKey(key);
209 };
210
Akronbe9638d2019-02-07 17:09:42 +0100211
Akron1a8bb762019-01-18 15:48:59 +0100212 private Iterator<String> getIterator () {
213 if (this.fieldsOrder == null) {
214 return fieldsMap.keySet().iterator();
215 };
216 return this.fieldsOrder.iterator();
217 };
Akron32b95192019-01-11 13:58:55 +0100218
219
220 @Override
221 public Iterator<MetaField> iterator() {
222 return new Iterator<MetaField>() {
223
Akron1a8bb762019-01-18 15:48:59 +0100224 private Iterator<String> it = getIterator();
225
Akron32b95192019-01-11 13:58:55 +0100226 private int currentIndex = 0;
227
228 @Override
229 public boolean hasNext() {
230 return it.hasNext();
231 };
232
233 @Override
234 public MetaField next() {
Akron1a8bb762019-01-18 15:48:59 +0100235 String key = it.next();
236 MetaField mf = fieldsMap.get(key);
237 if (mf == null)
238 return new MetaField(key);
239 return mf;
Akron32b95192019-01-11 13:58:55 +0100240 };
241
242 @Override
243 public void remove() {
244 throw new UnsupportedOperationException();
245 };
246 };
247 };
Akrone64cc162019-01-08 18:40:37 +0100248};