src/main/java/de/ids_mannheim/korap/KrillCollection.java - KorAP/Krill - Gitiles

 package de.ids_mannheim.korap;

 import java.util.*;
 import java.io.IOException;

 import de.ids_mannheim.korap.collection.CollectionBuilder;
 import de.ids_mannheim.korap.response.Notifications;
 import de.ids_mannheim.korap.util.QueryException;
 import de.ids_mannheim.korap.response.Result;

 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.*;
 import org.apache.lucene.index.*;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.BitDocIdSet;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.search.BitsFilteredDocIdSet;

 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.JsonNode;

 import java.nio.ByteBuffer;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 /**
  * Create a Virtual Collection of documents by means of a KoralQuery
  * collection object.
  *
  * <blockquote><pre>
  * KrillCollection kc = new KrillCollection(json);
  * </pre></blockquote>
  *
  * @author diewald
  */
 /*
  * TODO: Make a cache for the bits
  *       Delete it in case of an extension or a filter
  * TODO: Maybe use randomaccessfilterstrategy
  * TODO: Maybe a constantScoreQuery can make things faster?
  * See http://mail-archives.apache.org/mod_mbox/lucene-java-user/
  *     200805.mbox/%3C17080852.post@talk.nabble.com%3E
  */
 public final class KrillCollection extends Notifications {
     private KrillIndex index;
     private JsonNode json;
     private CollectionBuilder cb = new CollectionBuilder();
     private CollectionBuilder.Interface cbi;
     private byte[] pl = new byte[4];
     private static ByteBuffer bb = ByteBuffer.allocate(4);

     // Logger
     private final static Logger log = LoggerFactory
             .getLogger(KrillCollection.class);

     // This advices the java compiler to ignore all loggings
     public static final boolean DEBUG = false;


     /**
      * Construct a new KrillCollection.
      *
      */
     public KrillCollection () {};


     /**
      * Construct a new KrillCollection by passing a KrillIndex.
      *
      * @param index
      *            The {@link KrillIndex} object.
      */
     public KrillCollection (KrillIndex index) {
         this.index = index;
     };


     /**
      * Construct a new KrillCollection by passing a KoralQuery.
      *
      * @param json
      *            The KoralQuery document as a JSON string.
      */
     public KrillCollection (String jsonString) {
         ObjectMapper mapper = new ObjectMapper();
         try {
             JsonNode json = mapper.readTree(jsonString);

             if (json.has("collection"))
                 this.fromJson(json.get("collection"));

             else if (json.has("collections"))
                 this.addError(899,
                         "Collections are not supported anymore in favour of a single collection");
         }

         // Query Exception
         catch (QueryException qe) {
             this.addError(qe.getErrorCode(), qe.getMessage());
         }

         // JSON exception
         catch (IOException e) {
             this.addError(621, "Unable to parse JSON", "KrillCollection",
                     e.getLocalizedMessage());
         };
     };


     /**
      * Set the {@link KrillIndex} the virtual collection refers to.
      *
      * @param index
      *            The {@link KrillIndex} the virtual collection refers
      *            to.
      */
     public void setIndex (KrillIndex index) {
         this.index = index;
     };


     /**
      * Import the "collection" part of a KoralQuery.
      *
      * @param jsonString
      *            The "collection" part of a KoralQuery.
      * @throws QueryException
      */
     public KrillCollection fromJson (String jsonString) throws QueryException {
         ObjectMapper mapper = new ObjectMapper();
         try {
             this.fromJson((JsonNode) mapper.readTree(jsonString));
         }
         catch (Exception e) {
             this.addError(621, "Unable to parse JSON", "KrillCollection");
         };

         return this;
     };


     /**
      * Import the "collection" part of a KoralQuery.
      *
      * @param json
      *            The "collection" part of a KoralQuery
      *            as a {@link JsonNode} object.
      * @throws QueryException
      */
     public KrillCollection fromJson (JsonNode json) throws QueryException {
         this.json = json;
         return this.fromBuilder(this._fromJson(json));
     };


     private CollectionBuilder.Interface _fromJson (JsonNode json)
             throws QueryException {

         if (!json.has("@type")) {
             throw new QueryException(701,
                     "JSON-LD group has no @type attribute");
         };

         String type = json.get("@type").asText();

         if (type.equals("koral:doc")) {

             String key = "tokens";
             String valtype = "type:string";
             String match = "match:eq";

             if (json.has("key"))
                 key = json.get("key").asText();

             if (json.has("type"))
                 valtype = json.get("type").asText();

             // Filter based on date
             if (valtype.equals("type:date")) {

                 if (!json.has("value"))
                     throw new QueryException(820, "Dates require value fields");

                 String dateStr = json.get("value").asText();

                 if (json.has("match"))
                     match = json.get("match").asText();

                 // TODO: This isn't stable yet
                 switch (match) {
                     case "match:eq":
                         return this.cb.date(key, dateStr);
                     case "match:ne":
                         return this.cb.date(key, dateStr).not();
                     case "match:geq":
                         return this.cb.since(key, dateStr);
                     case "match:leq":
                         return this.cb.till(key, dateStr);
                 };

                 throw new QueryException(841, "Match relation unknown for type");
             }

             // Filter based on string
             else if (valtype.equals("type:string")) {
                 if (json.has("match"))
                     match = json.get("match").asText();

                 switch (match) {

                     case "match:eq":
                         return this.cb.term(key, json.get("value").asText());
                     case "match:ne":
                         return this.cb.term(key, json.get("value").asText())
                                 .not();

                         // This may change - but for now it means the elements are lowercased
                     case "match:contains":
                         return this.cb.term(key, json.get("value").asText()
                                 .toLowerCase());

                     case "match:containsnot":
                         return this.cb.term(key,
                                 json.get("value").asText().toLowerCase()).not();

                         // <LEGACY>
                     case "match:excludes":
                         return this.cb.term(key,
                                 json.get("value").asText().toLowerCase()).not();
                         // </LEGACY>
                 };

                 throw new QueryException(841, "Match relation unknown for type");
             }

             // Filter based on regex
             else if (valtype.equals("type:regex")) {

                 if (json.has("match"))
                     match = json.get("match").asText();

                 if (match.equals("match:eq")) {
                     return this.cb.re(key, json.get("value").asText());
                 }
                 else if (match.equals("match:ne")) {
                     return this.cb.re(key, json.get("value").asText()).not();
                 }
                 else if (match.equals("match:contains")) {
                     return this.cb.re(key, json.get("value").asText());
                 }
                 else if (match.equals("match:excludes")) {
                     return this.cb.re(key, json.get("value").asText()).not();
                 };

                 throw new QueryException(841, "Match relation unknown for type");
             };

             throw new QueryException(843, "Document type is not supported");
         }

         // nested group
         else if (type.equals("koral:docGroup")) {

             if (!json.has("operands") || !json.get("operands").isArray())
                 throw new QueryException(842,
                         "Document group needs operand list");

             CollectionBuilder.Group group;

             String operation = "operation:and";
             if (json.has("operation"))
                 operation = json.get("operation").asText();

             if (operation.equals("operation:or"))
                 group = this.cb.orGroup();
             else if (operation.equals("operation:and"))
                 group = this.cb.andGroup();
             else
                 throw new QueryException(810,
                         "Unknown document group operation");

             for (JsonNode operand : json.get("operands")) {
                 group.with(this._fromJson(operand));
             };
             return group;
         }

         // Unknown type
         throw new QueryException(813, "Collection type is not supported");
     };


     // Returns the number of filters - always one!
     @Deprecated
     public int getCount () {
         return 1;
     };


     /**
      * Set the collection from a {@link CollectionBuilder} object.
      *
      * @param cb
      *            The CollectionBuilder object.
      */
     public KrillCollection fromBuilder (CollectionBuilder.Interface cbi) {
         this.cbi = cbi;
         return this;
     };


     public CollectionBuilder.Interface getBuilder () {
         return this.cbi;
     };


     public CollectionBuilder build () {
         return this.cb;
     };


     public KrillCollection filter (CollectionBuilder.Interface filter) {
         return this.fromBuilder(this.cb.andGroup().with(this.cbi).with(filter));
     };


     public KrillCollection extend (CollectionBuilder.Interface extension) {
         return this.fromBuilder(this.cb.orGroup().with(this.cbi)
                 .with(extension));
     };


     /**
      * Add a filter based on a list of unique document identifiers.
      * UIDs may be indexed in the field "UID".
      *
      * This filter is not part of the legacy API!
      *
      * @param uids
      *            The list of unique document identifier.
      * @return The {@link KrillCollection} object for chaining.
      */
     public KrillCollection filterUIDs (String ... uids) {
         CollectionBuilder.Group cbg = this.cb.orGroup();
         for (String uid : uids) {
             cbg.with(this.cb.term("UID", uid));
         };
         return this.filter(cbg);
     };


     /**
      * Serialize collection to a {@link Filter} object.
      */
     public Filter toFilter () {
         if (this.cbi == null)
             return null;

         return this.cbi.toFilter();
     };


     /**
      * Boolean value if the collection should work inverted or
      * not.
      */
     public boolean isNegative () {
         if (this.cbi == null)
             return false;

         return this.cbi.isNegative();
     };


     /**
      * Generate a string representatio of the virtual collection.
      *
      * <strong>Warning</strong>: This currently does not generate a
      * valid
      * KoralQuery string, so this may change in a future version.
      *
      * @return A string representation of the virtual collection.
      */
     public String toString () {
         Filter filter = this.toFilter();
         if (filter == null)
             return "";

         return (this.isNegative() ? "-" : "") + filter.toString();
     };


     /**
      * Return the associated KoralQuery collection object
      * as a {@link JsonNode}. This won't work,
      * if the object was build using a CollectionBuilder,
      * therefore it is limited to mirror a deserialized KoralQuery
      * object.
      *
      * @return The {@link JsonNode} representing the collection object
      *         of a deserialized KoralQuery object.
      */
     public JsonNode toJsonNode () {
         return this.json;
     };


     /**
      * Create a bit vector representing the live documents of the
      * virtual collection to be used in searches.
      * This will respect deleted documents.
      *
      * @param The
      *            {@link LeafReaderContext} to search in.
      * @return A bit vector representing the live documents of the
      *         virtual collection.
      * @throws IOException
      */
     public FixedBitSet bits (LeafReaderContext atomic) throws IOException {
         LeafReader r = atomic.reader();
         FixedBitSet bitset = new FixedBitSet(r.maxDoc());
         DocIdSet docids = this.getDocIdSet(atomic, (Bits) r.getLiveDocs());

         if (docids == null) {
             if (this.cbi != null) {
                 bitset.clear(0, bitset.length());
             }
             else {
                 bitset.set(0, bitset.length());
             };
         }
         else
             bitset.or(docids.iterator());

         return bitset;
     };


     /**
      * Return the {@link DocIdSet} representing the documents of the
      * virtual collection to be used in searches.
      * This will respect deleted documents.
      *
      * @param atomic
      *            The {@link LeafReaderContext} to search in.
      * @param accepted
      *            {@link Bits} vector of accepted documents.
      * @throws IOException
      */
     public DocIdSet getDocIdSet (LeafReaderContext atomic, Bits acceptDocs)
             throws IOException {

         int maxDoc = atomic.reader().maxDoc();
         FixedBitSet bitset = new FixedBitSet(maxDoc);

         Filter filter;
         if (this.cbi == null || (filter = this.cbi.toFilter()) == null) {
             if (acceptDocs == null)
                 return null;

             bitset.set(0, maxDoc);
         }
         else {

             // Init vector
             DocIdSet docids = filter.getDocIdSet(atomic, null);
             DocIdSetIterator filterIter = (docids == null) ? null : docids
                     .iterator();

             if (filterIter == null) {
                 if (!this.cbi.isNegative())
                     return null;

                 bitset.set(0, maxDoc);
             }
             else {
                 // Or bit set
                 bitset.or(filterIter);

                 // Revert for negation
                 if (this.cbi.isNegative())
                     bitset.flip(0, maxDoc);
             };
         };

         if (DEBUG) {
             log.debug("Bit set is  {}", _bits(bitset));
             log.debug("Livedocs is {}", _bits(acceptDocs));
         };

         // Remove deleted docs
         return (DocIdSet) BitsFilteredDocIdSet.wrap((DocIdSet) new BitDocIdSet(
                 bitset), acceptDocs);
     };


     public long numberOf (String type) throws IOException {
         return this.numberOf("tokens", type);
     };


     /**
      * Search for the number of occurrences of different types,
      * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
      * collection.
      *
      * @param field
      *            The field containing the textual data and the
      *            annotations as a string.
      * @param type
      *            The type of meta information,
      *            e.g. <i>documents</i> or <i>sentences</i> as a
      *            string.
      * @return The number of the occurrences.
      * @throws IOException
      * @see KrillIndex#numberOf
      */
     public long numberOf (String field, String type) throws IOException {

         // No index defined
         if (this.index == null)
             return (long) -1;

         // No reader (inex is empty)
         if (this.index.reader() == null)
             return (long) 0;

         // This is redundant to index stuff
         if (type.equals("documents") || type.equals("base/texts")) {
             if (this.cbi == null) {
                 if (this.index.reader() == null)
                     return (long) 0;
                 return (long) this.index.reader().numDocs();
             }
             else
                 return this.docCount();
         };

         // Create search term
         // This may be prefixed by foundries
         Term term = new Term(field, "-:" + type);

         if (DEBUG)
             log.debug("Iterate for {}/{}", field, type);

         long occurrences = 0;
         try {
             // Iterate over all atomic readers and collect occurrences
             for (LeafReaderContext atomic : this.index.reader().leaves()) {
                 Bits bits = this.bits(atomic);

                 if (DEBUG)
                     log.debug("Final bits  {}", _bits(bits));

                 occurrences += this._numberOfAtomic(bits, atomic, term);
                 if (DEBUG)
                     log.debug("Added up to {} for {}/{}", occurrences, field,
                             type);
             };
         }

         // Something went wrong
         catch (IOException e) {
             log.warn(e.getMessage());
         };

         return occurrences;
     };


     // Search for meta information in term vectors
     // This will create the sum of all numerical payloads
     // of the term in the document vector
     private long _numberOfAtomic (Bits docvec, LeafReaderContext atomic,
             Term term) throws IOException {

         // This reimplements docsAndPositionsEnum with payloads
         final Terms terms = atomic.reader().fields().terms(term.field());

         // No terms were found
         if (terms != null) {
             // Todo: Maybe reuse a termsEnum!
             final TermsEnum termsEnum = terms.iterator(null);

             // Set the position in the iterator to the term that is seeked
             if (termsEnum.seekExact(term.bytes())) {

                 // TODO: Reuse a DocsAndPositionsEnum!!

                 // Start an iterator to fetch all payloads of the term
                 DocsAndPositionsEnum docs = termsEnum.docsAndPositions(docvec,
                         null, DocsAndPositionsEnum.FLAG_PAYLOADS);


                 // The iterator is empty
                 // This may even be an error, but we return 0
                 if (docs.docID() == DocsAndPositionsEnum.NO_MORE_DOCS)
                     return 0;

                 // Init some variables for data copying
                 long occurrences = 0;
                 BytesRef payload;

                 // Init nextDoc()
                 while (docs.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {

                     if (docs.freq() < 1)
                         continue;

                     // Initialize (go to first term)
                     docs.nextPosition();

                     // Copy payload with the offset of the BytesRef
                     payload = docs.getPayload();
                     if (payload != null) {
                         System.arraycopy(payload.bytes, payload.offset, pl, 0,
                                 4);

                         // Add payload as integer
                         occurrences += bb.wrap(pl).getInt();

                         if (DEBUG)
                             log.debug(
                                     "Value for {} incremented by {} to {} in {}",
                                     term, bb.wrap(pl).getInt(), occurrences,
                                     docs.docID());
                     };
                 };

                 // Return the sum of all occurrences
                 return occurrences;
             };
         };

         // Nothing found
         return 0;
     };


     /**
      * Return the number of documents in the virtual
      * collection.
      *
      * @return The number of the occurrences.
      * @see #numberOf
      */
     public long docCount () {

         // No index defined
         if (this.index == null)
             return (long) 0;

         // TODO: Caching!

         long docCount = 0;
         try {
             FixedBitSet bitset;
             for (LeafReaderContext atomic : this.index.reader().leaves()) {
                 if ((bitset = this.bits(atomic)) != null)
                     docCount += bitset.cardinality();
             };
         }
         catch (IOException e) {
             log.warn(e.getLocalizedMessage());
         };
         return docCount;
     };


     private static String _bits (Bits bitset) {
         String str = "";
         for (int i = 0; i < bitset.length(); i++) {
             str += bitset.get(i) ? "1" : "0";
         };
         return str;
     };


     /*
       @Deprecated
       public HashMap getTermRelation (String field) throws Exception {
           return this.getTermRelation(new KrillCollection(this), field);
       };
     */

     /*
      * Analyze how terms relate
      */
     /*
     @Deprecated
     public HashMap getTermRelation (KrillCollection kc, String field)
             throws Exception {
         HashMap<String, Long> map = new HashMap<>(100);
         long docNumber = 0, checkNumber = 0;

         try {
             if (kc.getCount() <= 0) {
                 checkNumber = (long) this.reader().numDocs();
             };

             for (LeafReaderContext atomic : this.reader().leaves()) {
                 HashMap<String, FixedBitSet> termVector = new HashMap<>(20);

                 FixedBitSet docvec = kc.bits(atomic);
                 if (docvec != null) {
                     docNumber += docvec.cardinality();
                 };

                 Terms terms = atomic.reader().fields().terms(field);

                 if (terms == null) {
                     continue;
                 };

                 int docLength = atomic.reader().maxDoc();
                 FixedBitSet bitset = new FixedBitSet(docLength);

                 // Iterate over all tokens in this field
                 TermsEnum termsEnum = terms.iterator(null);

                 while (termsEnum.next() != null) {

                     String termString = termsEnum.term().utf8ToString();

                     bitset.clear(0, docLength);

                     // Get frequency
                     bitset.or((DocIdSetIterator) termsEnum.docs((Bits) docvec,
                             null));

                     long value = 0;
                     if (map.containsKey(termString))
                         value = map.get(termString);

                     map.put(termString, value + bitset.cardinality());

                     termVector.put(termString, bitset.clone());
                 };

                 int keySize = termVector.size();
                 String[] keys = termVector.keySet()
                         .toArray(new String[keySize]);
                 java.util.Arrays.sort(keys);

                 if (keySize > maxTermRelations) {
                     throw new Exception("termRelations are limited to "
                             + maxTermRelations + " sets"
                             + " (requested were at least " + keySize + " sets)");
                 };

                 for (int i = 0; i < keySize; i++) {
                     for (int j = i + 1; j < keySize; j++) {
                         FixedBitSet comby = termVector.get(keys[i]).clone();
                         comby.and(termVector.get(keys[j]));

                         StringBuilder sb = new StringBuilder();
                         sb.append("#__").append(keys[i]).append(":###:")
                                 .append(keys[j]);
                         String combString = sb.toString();

                         long cap = (long) comby.cardinality();
                         if (map.containsKey(combString)) {
                             cap += map.get(combString);
                         };
                         map.put(combString, cap);
                     };
                 };
             };
             map.put("-docs", checkNumber != 0 ? checkNumber : docNumber);
         }
         catch (IOException e) {
             log.warn(e.getMessage());
         };
         return map;
     };
     */


 };
	package de.ids_mannheim.korap;

	import java.util.*;
	import java.io.IOException;

	import de.ids_mannheim.korap.collection.CollectionBuilder;
	import de.ids_mannheim.korap.response.Notifications;
	import de.ids_mannheim.korap.util.QueryException;
	import de.ids_mannheim.korap.response.Result;

	import org.apache.lucene.search.spans.SpanQuery;
	import org.apache.lucene.search.*;
	import org.apache.lucene.index.*;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.FixedBitSet;
	import org.apache.lucene.util.BitDocIdSet;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.search.BitsFilteredDocIdSet;

	import com.fasterxml.jackson.databind.ObjectMapper;
	import com.fasterxml.jackson.databind.JsonNode;

	import java.nio.ByteBuffer;

	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	/**
	* Create a Virtual Collection of documents by means of a KoralQuery
	* collection object.
	*
	* <blockquote><pre>
	* KrillCollection kc = new KrillCollection(json);
	* </pre></blockquote>
	*
	* @author diewald
	*/
	/*
	* TODO: Make a cache for the bits
	* Delete it in case of an extension or a filter
	* TODO: Maybe use randomaccessfilterstrategy
	* TODO: Maybe a constantScoreQuery can make things faster?
	* See http://mail-archives.apache.org/mod_mbox/lucene-java-user/
	* 200805.mbox/%3C17080852.post@talk.nabble.com%3E
	*/
	public final class KrillCollection extends Notifications {
	private KrillIndex index;
	private JsonNode json;
	private CollectionBuilder cb = new CollectionBuilder();
	private CollectionBuilder.Interface cbi;
	private byte[] pl = new byte[4];
	private static ByteBuffer bb = ByteBuffer.allocate(4);

	// Logger
	private final static Logger log = LoggerFactory
	.getLogger(KrillCollection.class);

	// This advices the java compiler to ignore all loggings
	public static final boolean DEBUG = false;


	/**
	* Construct a new KrillCollection.
	*
	*/
	public KrillCollection () {};


	/**
	* Construct a new KrillCollection by passing a KrillIndex.
	*
	* @param index
	* The {@link KrillIndex} object.
	*/
	public KrillCollection (KrillIndex index) {
	this.index = index;
	};


	/**
	* Construct a new KrillCollection by passing a KoralQuery.
	*
	* @param json
	* The KoralQuery document as a JSON string.
	*/
	public KrillCollection (String jsonString) {
	ObjectMapper mapper = new ObjectMapper();
	try {
	JsonNode json = mapper.readTree(jsonString);

	if (json.has("collection"))
	this.fromJson(json.get("collection"));

	else if (json.has("collections"))
	this.addError(899,
	"Collections are not supported anymore in favour of a single collection");
	}

	// Query Exception
	catch (QueryException qe) {
	this.addError(qe.getErrorCode(), qe.getMessage());
	}

	// JSON exception
	catch (IOException e) {
	this.addError(621, "Unable to parse JSON", "KrillCollection",
	e.getLocalizedMessage());
	};
	};


	/**
	* Set the {@link KrillIndex} the virtual collection refers to.
	*
	* @param index
	* The {@link KrillIndex} the virtual collection refers
	* to.
	*/
	public void setIndex (KrillIndex index) {
	this.index = index;
	};


	/**
	* Import the "collection" part of a KoralQuery.
	*
	* @param jsonString
	* The "collection" part of a KoralQuery.
	* @throws QueryException
	*/
	public KrillCollection fromJson (String jsonString) throws QueryException {
	ObjectMapper mapper = new ObjectMapper();
	try {
	this.fromJson((JsonNode) mapper.readTree(jsonString));
	}
	catch (Exception e) {
	this.addError(621, "Unable to parse JSON", "KrillCollection");
	};

	return this;
	};


	/**
	* Import the "collection" part of a KoralQuery.
	*
	* @param json
	* The "collection" part of a KoralQuery
	* as a {@link JsonNode} object.
	* @throws QueryException
	*/
	public KrillCollection fromJson (JsonNode json) throws QueryException {
	this.json = json;
	return this.fromBuilder(this._fromJson(json));
	};


	private CollectionBuilder.Interface _fromJson (JsonNode json)
	throws QueryException {

	if (!json.has("@type")) {
	throw new QueryException(701,
	"JSON-LD group has no @type attribute");
	};

	String type = json.get("@type").asText();

	if (type.equals("koral:doc")) {

	String key = "tokens";
	String valtype = "type:string";
	String match = "match:eq";

	if (json.has("key"))
	key = json.get("key").asText();

	if (json.has("type"))
	valtype = json.get("type").asText();

	// Filter based on date
	if (valtype.equals("type:date")) {

	if (!json.has("value"))
	throw new QueryException(820, "Dates require value fields");

	String dateStr = json.get("value").asText();

	if (json.has("match"))
	match = json.get("match").asText();

	// TODO: This isn't stable yet
	switch (match) {
	case "match:eq":
	return this.cb.date(key, dateStr);
	case "match:ne":
	return this.cb.date(key, dateStr).not();
	case "match:geq":
	return this.cb.since(key, dateStr);
	case "match:leq":
	return this.cb.till(key, dateStr);
	};

	throw new QueryException(841, "Match relation unknown for type");
	}

	// Filter based on string
	else if (valtype.equals("type:string")) {
	if (json.has("match"))
	match = json.get("match").asText();

	switch (match) {

	case "match:eq":
	return this.cb.term(key, json.get("value").asText());
	case "match:ne":
	return this.cb.term(key, json.get("value").asText())
	.not();

	// This may change - but for now it means the elements are lowercased
	case "match:contains":
	return this.cb.term(key, json.get("value").asText()
	.toLowerCase());

	case "match:containsnot":
	return this.cb.term(key,
	json.get("value").asText().toLowerCase()).not();

	// <LEGACY>
	case "match:excludes":
	return this.cb.term(key,
	json.get("value").asText().toLowerCase()).not();
	// </LEGACY>
	};

	throw new QueryException(841, "Match relation unknown for type");
	}

	// Filter based on regex
	else if (valtype.equals("type:regex")) {

	if (json.has("match"))
	match = json.get("match").asText();

	if (match.equals("match:eq")) {
	return this.cb.re(key, json.get("value").asText());
	}
	else if (match.equals("match:ne")) {
	return this.cb.re(key, json.get("value").asText()).not();
	}
	else if (match.equals("match:contains")) {
	return this.cb.re(key, json.get("value").asText());
	}
	else if (match.equals("match:excludes")) {
	return this.cb.re(key, json.get("value").asText()).not();
	};

	throw new QueryException(841, "Match relation unknown for type");
	};

	throw new QueryException(843, "Document type is not supported");
	}

	// nested group
	else if (type.equals("koral:docGroup")) {

	if (!json.has("operands") \|\| !json.get("operands").isArray())
	throw new QueryException(842,
	"Document group needs operand list");

	CollectionBuilder.Group group;

	String operation = "operation:and";
	if (json.has("operation"))
	operation = json.get("operation").asText();

	if (operation.equals("operation:or"))
	group = this.cb.orGroup();
	else if (operation.equals("operation:and"))
	group = this.cb.andGroup();
	else
	throw new QueryException(810,
	"Unknown document group operation");

	for (JsonNode operand : json.get("operands")) {
	group.with(this._fromJson(operand));
	};
	return group;
	}

	// Unknown type
	throw new QueryException(813, "Collection type is not supported");
	};


	// Returns the number of filters - always one!
	@Deprecated
	public int getCount () {
	return 1;
	};


	/**
	* Set the collection from a {@link CollectionBuilder} object.
	*
	* @param cb
	* The CollectionBuilder object.
	*/
	public KrillCollection fromBuilder (CollectionBuilder.Interface cbi) {
	this.cbi = cbi;
	return this;
	};


	public CollectionBuilder.Interface getBuilder () {
	return this.cbi;
	};


	public CollectionBuilder build () {
	return this.cb;
	};


	public KrillCollection filter (CollectionBuilder.Interface filter) {
	return this.fromBuilder(this.cb.andGroup().with(this.cbi).with(filter));
	};


	public KrillCollection extend (CollectionBuilder.Interface extension) {
	return this.fromBuilder(this.cb.orGroup().with(this.cbi)
	.with(extension));
	};



	/**
	* Add a filter based on a list of unique document identifiers.
	* UIDs may be indexed in the field "UID".
	*
	* This filter is not part of the legacy API!
	*
	* @param uids
	* The list of unique document identifier.
	* @return The {@link KrillCollection} object for chaining.
	*/
	public KrillCollection filterUIDs (String ... uids) {
	CollectionBuilder.Group cbg = this.cb.orGroup();
	for (String uid : uids) {
	cbg.with(this.cb.term("UID", uid));
	};
	return this.filter(cbg);
	};


	/**
	* Serialize collection to a {@link Filter} object.
	*/
	public Filter toFilter () {
	if (this.cbi == null)
	return null;

	return this.cbi.toFilter();
	};


	/**
	* Boolean value if the collection should work inverted or
	* not.
	*/
	public boolean isNegative () {
	if (this.cbi == null)
	return false;

	return this.cbi.isNegative();
	};


	/**
	* Generate a string representatio of the virtual collection.
	*
	* <strong>Warning</strong>: This currently does not generate a
	* valid
	* KoralQuery string, so this may change in a future version.
	*
	* @return A string representation of the virtual collection.
	*/
	public String toString () {
	Filter filter = this.toFilter();
	if (filter == null)
	return "";

	return (this.isNegative() ? "-" : "") + filter.toString();
	};


	/**
	* Return the associated KoralQuery collection object
	* as a {@link JsonNode}. This won't work,
	* if the object was build using a CollectionBuilder,
	* therefore it is limited to mirror a deserialized KoralQuery
	* object.
	*
	* @return The {@link JsonNode} representing the collection object
	* of a deserialized KoralQuery object.
	*/
	public JsonNode toJsonNode () {
	return this.json;
	};


	/**
	* Create a bit vector representing the live documents of the
	* virtual collection to be used in searches.
	* This will respect deleted documents.
	*
	* @param The
	* {@link LeafReaderContext} to search in.
	* @return A bit vector representing the live documents of the
	* virtual collection.
	* @throws IOException
	*/
	public FixedBitSet bits (LeafReaderContext atomic) throws IOException {
	LeafReader r = atomic.reader();
	FixedBitSet bitset = new FixedBitSet(r.maxDoc());
	DocIdSet docids = this.getDocIdSet(atomic, (Bits) r.getLiveDocs());

	if (docids == null) {
	if (this.cbi != null) {
	bitset.clear(0, bitset.length());
	}
	else {
	bitset.set(0, bitset.length());
	};
	}
	else
	bitset.or(docids.iterator());

	return bitset;
	};


	/**
	* Return the {@link DocIdSet} representing the documents of the
	* virtual collection to be used in searches.
	* This will respect deleted documents.
	*
	* @param atomic
	* The {@link LeafReaderContext} to search in.
	* @param accepted
	* {@link Bits} vector of accepted documents.
	* @throws IOException
	*/
	public DocIdSet getDocIdSet (LeafReaderContext atomic, Bits acceptDocs)
	throws IOException {

	int maxDoc = atomic.reader().maxDoc();
	FixedBitSet bitset = new FixedBitSet(maxDoc);

	Filter filter;
	if (this.cbi == null \|\| (filter = this.cbi.toFilter()) == null) {
	if (acceptDocs == null)
	return null;

	bitset.set(0, maxDoc);
	}
	else {

	// Init vector
	DocIdSet docids = filter.getDocIdSet(atomic, null);
	DocIdSetIterator filterIter = (docids == null) ? null : docids
	.iterator();

	if (filterIter == null) {
	if (!this.cbi.isNegative())
	return null;

	bitset.set(0, maxDoc);
	}
	else {
	// Or bit set
	bitset.or(filterIter);

	// Revert for negation
	if (this.cbi.isNegative())
	bitset.flip(0, maxDoc);
	};
	};

	if (DEBUG) {
	log.debug("Bit set is {}", _bits(bitset));
	log.debug("Livedocs is {}", _bits(acceptDocs));
	};

	// Remove deleted docs
	return (DocIdSet) BitsFilteredDocIdSet.wrap((DocIdSet) new BitDocIdSet(
	bitset), acceptDocs);
	};


	public long numberOf (String type) throws IOException {
	return this.numberOf("tokens", type);
	};


	/**
	* Search for the number of occurrences of different types,
	* e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
	* collection.
	*
	* @param field
	* The field containing the textual data and the
	* annotations as a string.
	* @param type
	* The type of meta information,
	* e.g. <i>documents</i> or <i>sentences</i> as a
	* string.
	* @return The number of the occurrences.
	* @throws IOException
	* @see KrillIndex#numberOf
	*/
	public long numberOf (String field, String type) throws IOException {

	// No index defined
	if (this.index == null)
	return (long) -1;

	// No reader (inex is empty)
	if (this.index.reader() == null)
	return (long) 0;

	// This is redundant to index stuff
	if (type.equals("documents") \|\| type.equals("base/texts")) {
	if (this.cbi == null) {
	if (this.index.reader() == null)
	return (long) 0;
	return (long) this.index.reader().numDocs();
	}
	else
	return this.docCount();
	};

	// Create search term
	// This may be prefixed by foundries
	Term term = new Term(field, "-:" + type);

	if (DEBUG)
	log.debug("Iterate for {}/{}", field, type);

	long occurrences = 0;
	try {
	// Iterate over all atomic readers and collect occurrences
	for (LeafReaderContext atomic : this.index.reader().leaves()) {
	Bits bits = this.bits(atomic);

	if (DEBUG)
	log.debug("Final bits {}", _bits(bits));

	occurrences += this._numberOfAtomic(bits, atomic, term);
	if (DEBUG)
	log.debug("Added up to {} for {}/{}", occurrences, field,
	type);
	};
	}

	// Something went wrong
	catch (IOException e) {
	log.warn(e.getMessage());
	};

	return occurrences;
	};


	// Search for meta information in term vectors
	// This will create the sum of all numerical payloads
	// of the term in the document vector
	private long _numberOfAtomic (Bits docvec, LeafReaderContext atomic,
	Term term) throws IOException {

	// This reimplements docsAndPositionsEnum with payloads
	final Terms terms = atomic.reader().fields().terms(term.field());

	// No terms were found
	if (terms != null) {
	// Todo: Maybe reuse a termsEnum!
	final TermsEnum termsEnum = terms.iterator(null);

	// Set the position in the iterator to the term that is seeked
	if (termsEnum.seekExact(term.bytes())) {

	// TODO: Reuse a DocsAndPositionsEnum!!

	// Start an iterator to fetch all payloads of the term
	DocsAndPositionsEnum docs = termsEnum.docsAndPositions(docvec,
	null, DocsAndPositionsEnum.FLAG_PAYLOADS);


	// The iterator is empty
	// This may even be an error, but we return 0
	if (docs.docID() == DocsAndPositionsEnum.NO_MORE_DOCS)
	return 0;

	// Init some variables for data copying
	long occurrences = 0;
	BytesRef payload;

	// Init nextDoc()
	while (docs.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {

	if (docs.freq() < 1)
	continue;

	// Initialize (go to first term)
	docs.nextPosition();

	// Copy payload with the offset of the BytesRef
	payload = docs.getPayload();
	if (payload != null) {
	System.arraycopy(payload.bytes, payload.offset, pl, 0,
	4);

	// Add payload as integer
	occurrences += bb.wrap(pl).getInt();

	if (DEBUG)
	log.debug(
	"Value for {} incremented by {} to {} in {}",
	term, bb.wrap(pl).getInt(), occurrences,
	docs.docID());
	};
	};

	// Return the sum of all occurrences
	return occurrences;
	};
	};

	// Nothing found
	return 0;
	};


	/**
	* Return the number of documents in the virtual
	* collection.
	*
	* @return The number of the occurrences.
	* @see #numberOf
	*/
	public long docCount () {

	// No index defined
	if (this.index == null)
	return (long) 0;

	// TODO: Caching!

	long docCount = 0;
	try {
	FixedBitSet bitset;
	for (LeafReaderContext atomic : this.index.reader().leaves()) {
	if ((bitset = this.bits(atomic)) != null)
	docCount += bitset.cardinality();
	};
	}
	catch (IOException e) {
	log.warn(e.getLocalizedMessage());
	};
	return docCount;
	};


	private static String _bits (Bits bitset) {
	String str = "";
	for (int i = 0; i < bitset.length(); i++) {
	str += bitset.get(i) ? "1" : "0";
	};
	return str;
	};


	/*
	@Deprecated
	public HashMap getTermRelation (String field) throws Exception {
	return this.getTermRelation(new KrillCollection(this), field);
	};
	*/

	/*
	* Analyze how terms relate
	*/
	/*
	@Deprecated
	public HashMap getTermRelation (KrillCollection kc, String field)
	throws Exception {
	HashMap<String, Long> map = new HashMap<>(100);
	long docNumber = 0, checkNumber = 0;

	try {
	if (kc.getCount() <= 0) {
	checkNumber = (long) this.reader().numDocs();
	};

	for (LeafReaderContext atomic : this.reader().leaves()) {
	HashMap<String, FixedBitSet> termVector = new HashMap<>(20);

	FixedBitSet docvec = kc.bits(atomic);
	if (docvec != null) {
	docNumber += docvec.cardinality();
	};

	Terms terms = atomic.reader().fields().terms(field);

	if (terms == null) {
	continue;
	};

	int docLength = atomic.reader().maxDoc();
	FixedBitSet bitset = new FixedBitSet(docLength);

	// Iterate over all tokens in this field
	TermsEnum termsEnum = terms.iterator(null);

	while (termsEnum.next() != null) {

	String termString = termsEnum.term().utf8ToString();

	bitset.clear(0, docLength);

	// Get frequency
	bitset.or((DocIdSetIterator) termsEnum.docs((Bits) docvec,
	null));

	long value = 0;
	if (map.containsKey(termString))
	value = map.get(termString);

	map.put(termString, value + bitset.cardinality());

	termVector.put(termString, bitset.clone());
	};

	int keySize = termVector.size();
	String[] keys = termVector.keySet()
	.toArray(new String[keySize]);
	java.util.Arrays.sort(keys);

	if (keySize > maxTermRelations) {
	throw new Exception("termRelations are limited to "
	+ maxTermRelations + " sets"
	+ " (requested were at least " + keySize + " sets)");
	};

	for (int i = 0; i < keySize; i++) {
	for (int j = i + 1; j < keySize; j++) {
	FixedBitSet comby = termVector.get(keys[i]).clone();
	comby.and(termVector.get(keys[j]));

	StringBuilder sb = new StringBuilder();
	sb.append("#__").append(keys[i]).append(":###:")
	.append(keys[j]);
	String combString = sb.toString();

	long cap = (long) comby.cardinality();
	if (map.containsKey(combString)) {
	cap += map.get(combString);
	};
	map.put(combString, cap);
	};
	};
	};
	map.put("-docs", checkNumber != 0 ? checkNumber : docNumber);
	}
	catch (IOException e) {
	log.warn(e.getMessage());
	};
	return map;
	};
	*/


	};