| package de.ids_mannheim.korap; |
| |
| import java.util.*; |
| import java.io.IOException; |
| |
| import de.ids_mannheim.korap.*; |
| import de.ids_mannheim.korap.util.KorapDate; |
| import de.ids_mannheim.korap.util.QueryException; |
| import de.ids_mannheim.korap.filter.BooleanFilter; |
| import de.ids_mannheim.korap.filter.FilterOperation; |
| |
| import org.apache.lucene.search.spans.SpanQuery; |
| import org.apache.lucene.search.*; |
| |
| import org.apache.lucene.index.AtomicReaderContext; |
| import org.apache.lucene.util.FixedBitSet; |
| import org.apache.lucene.util.Bits; |
| |
| import com.fasterxml.jackson.databind.ObjectMapper; |
| import com.fasterxml.jackson.databind.JsonNode; |
| |
| import java.io.StringWriter; |
| |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| // TODO: Make a cache for the bits!!! DELETE IT IN CASE OF AN EXTENSION OR A FILTER! |
| // Todo: Maybe use randomaccessfilterstrategy |
| // TODO: Maybe a constantScoreQuery can make things faster? |
| |
| // THIS MAY CHANGE for stuff like combining virtual collections |
| // See http://mail-archives.apache.org/mod_mbox/lucene-java-user/ |
| // 200805.mbox/%3C17080852.post@talk.nabble.com%3E |
| |
| public class KorapCollection { |
| private KorapIndex index; |
| private KorapDate created; |
| private String id; |
| private String error; |
| private ArrayList<FilterOperation> filter; |
| private int filterCount = 0; |
| |
| // Logger |
| private final static Logger log = LoggerFactory.getLogger(KorapCollection.class); |
| |
| // This advices the java compiler to ignore all loggings |
| public static final boolean DEBUG = false; |
| |
| public KorapCollection (KorapIndex ki) { |
| this.index = ki; |
| this.filter = new ArrayList<FilterOperation>(5); |
| }; |
| |
| /** |
| * Construct a new KorapCollection by passing a JSON query. |
| * This supports collections with key "collection" and |
| * legacy collections with the key "collections". |
| */ |
| public KorapCollection (String jsonString) { |
| ObjectMapper mapper = new ObjectMapper(); |
| this.filter = new ArrayList<FilterOperation>(5); |
| |
| try { |
| JsonNode json = mapper.readValue(jsonString, JsonNode.class); |
| |
| if (json.has("collection")) { |
| this.fromJSON(json.get("collection")); |
| } |
| |
| // Legacy collection serialization |
| // This will be removed! |
| else if (json.has("collections")) { |
| if (DEBUG) |
| log.warn("Using DEPRECATED collection!"); |
| |
| for (JsonNode collection : json.get("collections")) { |
| this.fromJSONLegacy(collection); |
| }; |
| }; |
| } |
| catch (Exception e) { |
| this.error = e.getMessage(); |
| }; |
| }; |
| |
| |
| public KorapCollection () { |
| this.filter = new ArrayList<FilterOperation>(5); |
| }; |
| |
| |
| public void fromJSON (String jsonString) throws QueryException { |
| ObjectMapper mapper = new ObjectMapper(); |
| try { |
| this.fromJSON((JsonNode) mapper.readValue(jsonString, JsonNode.class)); |
| } |
| catch (Exception e) { |
| this.error = e.getMessage(); |
| }; |
| }; |
| |
| |
| public void fromJSON (JsonNode json) throws QueryException { |
| this.filter(new KorapFilter(json)); |
| }; |
| |
| |
| /** |
| * Legacy API for collection filters. |
| */ |
| public void fromJSONLegacy (String jsonString) throws QueryException { |
| ObjectMapper mapper = new ObjectMapper(); |
| try { |
| this.fromJSONLegacy((JsonNode) mapper.readValue(jsonString, JsonNode.class)); |
| } |
| catch (Exception e) { |
| this.error = e.getMessage(); |
| }; |
| }; |
| |
| |
| /** |
| * Legacy API for collection filters. |
| */ |
| public void fromJSONLegacy (JsonNode json) throws QueryException { |
| if (!json.has("@type")) |
| throw new QueryException(612, "JSON-LD group has no @type attribute"); |
| |
| if (!json.has("@value")) |
| throw new QueryException(612, "Legacy filter need @value fields"); |
| |
| String type = json.get("@type").asText(); |
| |
| KorapFilter kf = new KorapFilter(); |
| kf.setBooleanFilter( |
| kf.fromJSONLegacy(json.get("@value"), "tokens") |
| ); |
| if (type.equals("korap:meta-filter")) { |
| if (DEBUG) |
| log.trace("Add Filter LEGACY"); |
| this.filter(kf); |
| } |
| |
| else if (type.equals("korap:meta-extend")) { |
| if (DEBUG) |
| log.trace("Add Extend LEGACY"); |
| this.extend(kf); |
| }; |
| }; |
| |
| public int getCount() { |
| return this.filterCount; |
| }; |
| |
| public void setIndex (KorapIndex ki) { |
| this.index = ki; |
| }; |
| |
| // The checks asre not necessary |
| public KorapCollection filter (BooleanFilter filter) { |
| if (DEBUG) |
| log.trace("Added filter: {}", filter.toString()); |
| |
| if (filter == null) { |
| log.warn("No filter is given"); |
| return this; |
| }; |
| |
| Filter f = (Filter) new QueryWrapperFilter(filter.toQuery()); |
| if (f == null) { |
| log.warn("Filter can't be wrapped"); |
| return this; |
| }; |
| FilterOperation fo = new FilterOperation(f, false); |
| if (fo == null) { |
| log.warn("Filter operation invalid"); |
| return this; |
| }; |
| this.filter.add(fo); |
| this.filterCount++; |
| return this; |
| }; |
| |
| // Filter based on UIDs |
| public KorapCollection filterUIDs (String ... uids) { |
| BooleanFilter filter = new BooleanFilter(); |
| filter.or("UID", uids); |
| if (DEBUG) |
| log.debug("UID based filter: {}", filter.toString()); |
| return this.filter(filter); |
| }; |
| |
| |
| public KorapCollection filter (KorapFilter filter) { |
| return this.filter(filter.getBooleanFilter()); |
| }; |
| |
| |
| public KorapCollection extend (BooleanFilter filter) { |
| if (DEBUG) |
| log.trace("Added extension: {}", filter.toString()); |
| this.filter.add( |
| new FilterOperation( |
| (Filter) new QueryWrapperFilter(filter.toQuery()), |
| true |
| ) |
| ); |
| this.filterCount++; |
| return this; |
| }; |
| |
| public KorapCollection extend (KorapFilter filter) { |
| return this.extend(filter.getBooleanFilter()); |
| }; |
| |
| |
| public ArrayList<FilterOperation> getFilters () { |
| return this.filter; |
| }; |
| |
| public FilterOperation getFilter (int i) { |
| return this.filter.get(i); |
| }; |
| |
| |
| public String toString () { |
| StringBuilder sb = new StringBuilder(); |
| for (FilterOperation fo : this.filter) { |
| sb.append(fo.toString()).append("; "); |
| }; |
| return sb.toString(); |
| }; |
| |
| /** |
| * Search in the virtual collection. This is just used for |
| * testing purposes and not recommended for serious usage. |
| */ |
| public KorapResult search (SpanQuery query) { |
| return this.index.search( |
| this, |
| query, |
| 0, |
| (short) 20, |
| true, (short) 5, |
| true, (short) 5 |
| ); |
| }; |
| |
| public FixedBitSet bits (AtomicReaderContext atomic) throws IOException { |
| |
| /* |
| Use Bits.MatchAllBits(int len) |
| */ |
| |
| boolean noDoc = true; |
| FixedBitSet bitset; |
| |
| if (this.filterCount > 0) { |
| bitset = new FixedBitSet(atomic.reader().maxDoc()); |
| |
| ArrayList<FilterOperation> filters = (ArrayList<FilterOperation>) this.filter.clone(); |
| |
| FilterOperation kcInit = filters.remove(0); |
| if (DEBUG) |
| log.trace("FILTER: {}", kcInit); |
| |
| // Init vector |
| DocIdSet docids = kcInit.filter.getDocIdSet(atomic, null); |
| |
| DocIdSetIterator filterIter = docids.iterator(); |
| |
| if (filterIter != null) { |
| if (DEBUG) |
| log.trace("InitFilter has effect"); |
| bitset.or(filterIter); |
| noDoc = false; |
| }; |
| |
| if (!noDoc) { |
| for (FilterOperation kc : filters) { |
| if (DEBUG) |
| log.trace("FILTER: {}", kc); |
| |
| // TODO: BUG!!!!!!!!!! |
| docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null : bitset); |
| filterIter = docids.iterator(); |
| |
| if (filterIter == null) { |
| // There must be a better way ... |
| if (kc.isFilter()) { |
| // TODO: Check if this is really correct! |
| // Maybe here is the bug |
| bitset.clear(0, bitset.length()); |
| noDoc = true; |
| } |
| else { |
| // System.err.println("No term found"); |
| }; |
| continue; |
| }; |
| if (kc.isExtension()) { |
| // System.err.println("Term found!"); |
| // System.err.println("Old Card:" + bitset.cardinality()); |
| bitset.or(filterIter); |
| // System.err.println("New Card:" + bitset.cardinality()); |
| } |
| else { |
| bitset.and(filterIter); |
| }; |
| }; |
| |
| if (!noDoc) { |
| FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs(); |
| if (livedocs != null) { |
| bitset.and(livedocs); |
| }; |
| }; |
| } |
| else { |
| return bitset; |
| }; |
| } |
| else { |
| bitset = (FixedBitSet) atomic.reader().getLiveDocs(); |
| }; |
| |
| return bitset; |
| }; |
| |
| public long numberOf (String foundry, String type) throws IOException { |
| if (this.index == null) |
| return (long) 0; |
| |
| return this.index.numberOf(this, foundry, type); |
| }; |
| |
| public long numberOf (String type) throws IOException { |
| if (this.index == null) |
| return (long) 0; |
| |
| return this.index.numberOf(this, "tokens", type); |
| }; |
| |
| // This is only for testing purposes! |
| @Deprecated |
| public HashMap getTermRelation(String field) throws Exception { |
| if (this.index == null) { |
| HashMap<String,Long> map = new HashMap<>(1); |
| map.put("-docs", (long) 0); |
| return map; |
| }; |
| |
| return this.index.getTermRelation(this, field); |
| }; |
| |
| @Deprecated |
| public String getTermRelationJSON(String field) throws IOException { |
| ObjectMapper mapper = new ObjectMapper(); |
| StringWriter sw = new StringWriter(); |
| sw.append("{\"field\":"); |
| mapper.writeValue(sw,field); |
| sw.append(","); |
| |
| try { |
| HashMap<String, Long> map = this.getTermRelation(field); |
| |
| sw.append("\"documents\":"); |
| mapper.writeValue(sw,map.remove("-docs")); |
| sw.append(","); |
| |
| String[] keys = map.keySet().toArray(new String[map.size()]); |
| |
| HashMap<String,Integer> setHash = new HashMap<>(20); |
| ArrayList<HashMap<String,Long>> set = new ArrayList<>(20); |
| ArrayList<Long[]> overlap = new ArrayList<>(100); |
| |
| int count = 0; |
| for (String key : keys) { |
| if (!key.startsWith("#__")) { |
| HashMap<String,Long> simpleMap = new HashMap<>(); |
| simpleMap.put(key, map.remove(key)); |
| set.add(simpleMap); |
| setHash.put(key, count++); |
| }; |
| }; |
| |
| keys = map.keySet().toArray(new String[map.size()]); |
| for (String key : keys) { |
| String[] comb = key.substring(3).split(":###:"); |
| Long[] l = new Long[3]; |
| l[0] = (long) setHash.get(comb[0]); |
| l[1] = (long) setHash.get(comb[1]); |
| l[2] = map.remove(key); |
| overlap.add(l); |
| }; |
| |
| |
| sw.append("\"sets\":"); |
| mapper.writeValue(sw, (Object) set); |
| sw.append(",\"overlaps\":"); |
| mapper.writeValue(sw, (Object) overlap); |
| sw.append(",\"error\":null"); |
| |
| } |
| catch (Exception e) { |
| sw.append("\"error\":"); |
| mapper.writeValue(sw,e.getMessage()); |
| }; |
| |
| sw.append("}"); |
| return sw.getBuffer().toString(); |
| }; |
| |
| public String getError () { |
| return this.error; |
| }; |
| }; |