Started adapting tests to new Collection API
Change-Id: I6dcf7ecbdc4404f65628e1038cf234ae32d1b070
diff --git a/Changes b/Changes
index 862b218..476b5e3 100644
--- a/Changes
+++ b/Changes
@@ -3,6 +3,7 @@
This should fix a lot of issues with deleted
documents and negation in virtual collections.
- [cleanup] REMOVED deprecated collection filtering (diewald)
+ - [cleanup] REMOVED deprecated termRelation API (diewald)
- [feature] Added removal methods for documents (diewald)
0.52 2015-07-08
diff --git a/Errorcodes b/Errorcodes
index 62b3e2f..232cf42 100644
--- a/Errorcodes
+++ b/Errorcodes
@@ -72,11 +72,16 @@
813: "Collection type is not supported" (like 713)
814: "Unknown rewrite operation"
815: "Rewrite expects source"
+820: "Dates require value fields"
830: "Filter was empty"
831: "Filter is not wrappable"
832: "Filter operation is invalid"
+841: "Match relation unknown for type" (like 741)
+842: "Document group needs operand list"
+843: "Document type is not supported"
850: "Collections are deprecated in favour of a single collection"
851: "Legacy filters need @value fields"
+899: "Collections are not supported anymore in favour of a single collection"
* 900 - 999 - Corpus Data errors
952: "Given offset information is not numeric"
diff --git a/src/main/java/de/ids_mannheim/korap/Krill.java b/src/main/java/de/ids_mannheim/korap/Krill.java
index ee6e42b..e026682 100644
--- a/src/main/java/de/ids_mannheim/korap/Krill.java
+++ b/src/main/java/de/ids_mannheim/korap/Krill.java
@@ -212,21 +212,13 @@
// TODO: Temporary
if (collNode.fieldNames().hasNext()) {
- KrillCollection kc = new KrillCollection();
- this.setCollection(kc);
- kc.fromJson(collNode);
+ this.setCollection(new KrillCollection().fromJson(collNode));
};
}
- // <legacycode>
else if (json.has("collections")) {
- KrillCollection kc = new KrillCollection();
- this.setCollection(kc);
- for (JsonNode collection : json.get("collections")) {
- kc.fromJsonLegacy(collection);
- };
+ this.addError(899, "Collections are not supported anymore in favour of a single collection");
};
- // </legacycode>
}
catch (QueryException q) {
this.addError(q.getErrorCode(), q.getMessage());
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index 1aef947..329550f 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -3,27 +3,25 @@
import java.util.*;
import java.io.IOException;
-import de.ids_mannheim.korap.*;
-import de.ids_mannheim.korap.util.KrillDate;
-import de.ids_mannheim.korap.util.QueryException;
-import de.ids_mannheim.korap.collection.BooleanFilter;
-import de.ids_mannheim.korap.collection.RegexFilter;
-import de.ids_mannheim.korap.collection.FilterOperation;
import de.ids_mannheim.korap.collection.CollectionBuilder;
import de.ids_mannheim.korap.response.Notifications;
+import de.ids_mannheim.korap.util.QueryException;
import de.ids_mannheim.korap.response.Result;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.*;
-
-import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.*;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.DocIdBitSet;
+import org.apache.lucene.search.BitsFilteredDocIdSet;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
-import java.io.StringWriter;
+import java.nio.ByteBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -31,22 +29,14 @@
/**
* Create a Virtual Collection of documents by means of a KoralQuery
* collection object.
- * Alternatively by applying manual filters and extensions on Lucene
- * fields.
*
* <blockquote><pre>
* KrillCollection kc = new KrillCollection(json);
- * kc.filterUIDS("a1", "a2", "a3");
* </pre></blockquote>
*
- * <strong>Warning</strong>: This API is deprecated and will
- * be replaced in future versions. It supports legacy versions of
- * KoralQuery that will be disabled.
- *
* @author diewald
*/
/*
- * TODO: Clean up for new KoralQuery
* TODO: Make a cache for the bits
* Delete it in case of an extension or a filter
* TODO: Maybe use randomaccessfilterstrategy
@@ -56,11 +46,10 @@
*/
public class KrillCollection extends Notifications {
private KrillIndex index;
- private KrillDate created;
- private String id;
- private ArrayList<FilterOperation> filter;
- private int filterCount = 0;
private JsonNode json;
+ private CollectionBuilder.CollectionBuilderInterface cb;
+ private byte[] pl = new byte[4];
+ private static ByteBuffer bb = ByteBuffer.allocate(4);
// Logger
private final static Logger log = LoggerFactory
@@ -71,6 +60,13 @@
/**
+ * Construct a new KrillCollection.
+ *
+ */
+ public KrillCollection () {};
+
+
+ /**
* Construct a new KrillCollection by passing a KrillIndex.
*
* @param index
@@ -78,44 +74,33 @@
*/
public KrillCollection (KrillIndex index) {
this.index = index;
- this.filter = new ArrayList<FilterOperation>(5);
};
/**
* Construct a new KrillCollection by passing a KoralQuery.
- * This supports collections with the key "collection" and
- * legacy collections with the key "collections".
*
- * @param jsonString
- * The virtual collection as a KoralQuery.
+ * @param json
+ * The KoralQuery document as a JSON string.
*/
public KrillCollection (String jsonString) {
ObjectMapper mapper = new ObjectMapper();
- this.filter = new ArrayList<FilterOperation>(5);
-
try {
JsonNode json = mapper.readTree(jsonString);
- // Deserialize from recent collections
- if (json.has("collection")) {
+ if (json.has("collection"))
this.fromJson(json.get("collection"));
- }
- // Legacy collection serialization
- // This will be removed!
- else if (json.has("collections")) {
- this.addMessage(850,
- "Collections are deprecated in favour of a single collection");
- for (JsonNode collection : json.get("collections")) {
- this.fromJsonLegacy(collection);
- };
- };
+ else if (json.has("collections"))
+ this.addError(899, "Collections are not supported anymore in favour of a single collection");
}
- // Some exceptions ...
+
+ // Query Exception
catch (QueryException qe) {
this.addError(qe.getErrorCode(), qe.getMessage());
}
+
+ // JSON exception
catch (IOException e) {
this.addError(621, "Unable to parse JSON", "KrillCollection",
e.getLocalizedMessage());
@@ -124,10 +109,14 @@
/**
- * Construct a new KrillCollection.
+ * Set the {@link KrillIndex} the virtual collection refers to.
+ *
+ * @param index
+ * The {@link KrillIndex} the virtual collection refers
+ * to.
*/
- public KrillCollection () {
- this.filter = new ArrayList<FilterOperation>(5);
+ public void setIndex (KrillIndex index) {
+ this.index = index;
};
@@ -161,21 +150,13 @@
*/
public KrillCollection fromJson (JsonNode json) throws QueryException {
this.json = json;
- this.filter(this._fromJson(json));
- return this;
+ return this.fromBuilder(this._fromJson(json));
};
- // Create a boolean filter from JSON
- private BooleanFilter _fromJson (JsonNode json) throws QueryException {
- return this._fromJson(json, "tokens");
- };
+ private CollectionBuilder.CollectionBuilderInterface _fromJson (JsonNode json) throws QueryException {
-
- // Create a booleanfilter from JSON
- private BooleanFilter _fromJson (JsonNode json, String field)
- throws QueryException {
- BooleanFilter bfilter = new BooleanFilter();
+ CollectionBuilder cb = new CollectionBuilder();
if (!json.has("@type")) {
throw new QueryException(701,
@@ -184,7 +165,6 @@
String type = json.get("@type").asText();
- // Single filter
if (type.equals("koral:doc")) {
String key = "tokens";
@@ -201,27 +181,26 @@
if (valtype.equals("type:date")) {
if (!json.has("value"))
- throw new QueryException(612, "Dates require value fields");
+ throw new QueryException(820, "Dates require value fields");
String dateStr = json.get("value").asText();
+
if (json.has("match"))
match = json.get("match").asText();
// TODO: This isn't stable yet
switch (match) {
- case "match:eq":
- bfilter.date(dateStr);
- break;
- case "match:geq":
- bfilter.since(dateStr);
- break;
- case "match:leq":
- bfilter.till(dateStr);
- break;
+ case "match:eq":
+ return cb.date(key, dateStr);
+ case "match:ne":
+ return cb.date(key, dateStr).not();
+ case "match:geq":
+ return cb.since(key, dateStr);
+ case "match:leq":
+ return cb.till(key, dateStr);
};
- // No good reason for gt or lt
- return bfilter;
+ throw new QueryException(841, "Match relation unknown for type");
}
// Filter based on string
@@ -229,355 +208,107 @@
if (json.has("match"))
match = json.get("match").asText();
- if (match.equals("match:eq")) {
- bfilter.and(key, json.get("value").asText());
- }
- else if (match.equals("match:ne")) {
- bfilter.andNot(key, json.get("value").asText());
- }
+ switch (match) {
+
+ case "match:eq":
+ return cb.term(key, json.get("value").asText());
+ case "match:ne":
+ return cb.term(key, json.get("value").asText()).not();
+
// This may change - but for now it means the elements are lowercased
- else if (match.equals("match:contains")) {
- bfilter.and(key, json.get("value").asText().toLowerCase());
- }
- else if (match.equals("match:containsnot")) {
- bfilter.andNot(key, json.get("value").asText().toLowerCase());
- }
- // <LEGACY>
- else if (match.equals("match:excludes")) {
- bfilter.andNot(key, json.get("value").asText().toLowerCase());
- }
- // </LEGACY>
- else {
- throw new QueryException(0, "Unknown match type");
+ case "match:contains":
+ return cb.term(key, json.get("value").asText().toLowerCase());
+
+ case "match:containsnot":
+ return cb.term(key, json.get("value").asText().toLowerCase()).not();
+
+ // <LEGACY>
+ case "match:excludes":
+ return cb.term(key, json.get("value").asText().toLowerCase()).not();
+ // </LEGACY>
};
- return bfilter;
+ throw new QueryException(841, "Match relation unknown for type");
}
// Filter based on regex
else if (valtype.equals("type:regex")) {
+
if (json.has("match"))
match = json.get("match").asText();
if (match.equals("match:eq")) {
- return bfilter.and(key, new RegexFilter(json.get("value")
- .asText()));
+ return cb.re(key, json.get("value").asText());
}
else if (match.equals("match:ne")) {
- return bfilter.andNot(key, new RegexFilter(json
- .get("value").asText()));
+ return cb.re(key, json.get("value").asText()).not();
+ }
+ else if (match.equals("match:contains")) {
+ return cb.re(key, json.get("value").asText());
+ }
+ else if (match.equals("match:excludes")) {
+ return cb.re(key, json.get("value").asText()).not();
};
- // TODO! for excludes and contains
- throw new QueryException(0, "Unknown document type");
+ throw new QueryException(841, "Match relation unknown for type");
};
- // TODO!
- throw new QueryException(0, "Unknown document operation");
+ throw new QueryException(843, "Document type is not supported");
}
// nested group
else if (type.equals("koral:docGroup")) {
+
if (!json.has("operands") || !json.get("operands").isArray())
- throw new QueryException(612, "Groups need operands");
+ throw new QueryException(842, "Document group needs operand list");
+
+ CollectionBuilder.CollectionBuilderGroup group;
String operation = "operation:and";
if (json.has("operation"))
- operation = json.get("operation").asText();
+ operation = json.get("operation").asText();
- BooleanFilter group = new BooleanFilter();
-
+ if (operation.equals("operation:or"))
+ group = cb.orGroup();
+ else if (operation.equals("operation:and"))
+ group = cb.andGroup();
+ else
+ throw new QueryException(810, "Unknown document group operation");
+
for (JsonNode operand : json.get("operands")) {
- if (operation.equals("operation:and"))
- group.and(this._fromJson(operand, field));
-
- else if (operation.equals("operation:or"))
- group.or(this._fromJson(operand, field));
-
- else
- throw new QueryException(613,
- "Unknown document group operation");
+ group.with(this._fromJson(operand));
};
- bfilter.and(group);
- return bfilter;
+ return group;
}
// Unknown type
- else
- throw new QueryException(613,
- "Collection query type has to be doc or docGroup");
+ throw new QueryException(813, "Collection type is not supported");
+ };
- // return new BooleanFilter();
- };
-
-
- /**
- * Import the "collections" part of a KoralQuery.
- * This method is deprecated and will vanish in future versions.
- *
- * @param jsonString
- * The "collections" part of a KoralQuery.
- * @throws QueryException
- */
+ // Returns the number of filters - always one!
@Deprecated
- public KrillCollection fromJsonLegacy (String jsonString)
- throws QueryException {
- ObjectMapper mapper = new ObjectMapper();
- try {
- this.fromJsonLegacy((JsonNode) mapper.readValue(jsonString,
- JsonNode.class));
- }
- catch (Exception e) {
- this.addError(621, "Unable to parse JSON", "KrillCollection");
- };
+ public int getCount () {
+ return 1;
+ };
+
+
+
+
+
+ /**
+ * Set the collection from a {@link CollectionBuilder} object.
+ *
+ * @param cb The CollectionBuilder object.
+ */
+ public KrillCollection fromBuilder (CollectionBuilder.CollectionBuilderInterface cb) {
+ this.cb = cb;
return this;
};
-
- /**
- * Import the "collections" part of a KoralQuery.
- * This method is deprecated and will vanish in future versions.
- *
- * @param json
- * The "collections" part of a KoralQuery
- * as a {@link JsonNode} object.
- * @throws QueryException
- */
- @Deprecated
- public KrillCollection fromJsonLegacy (JsonNode json) throws QueryException {
- if (!json.has("@type"))
- throw new QueryException(701,
- "JSON-LD group has no @type attribute");
-
- if (!json.has("@value"))
- throw new QueryException(851, "Legacy filter need @value fields");
-
- BooleanFilter bf = this._fromJsonLegacy(json.get("@value"), "tokens");
- String type = json.get("@type").asText();
-
- // Filter the collection
- if (type.equals("koral:meta-filter")) {
- if (DEBUG)
- log.trace("Add Filter LEGACY");
- this.filter(bf);
- }
-
- // Extend the collection
- else if (type.equals("koral:meta-extend")) {
- if (DEBUG)
- log.trace("Add Extend LEGACY");
- this.extend(bf);
- };
-
- return this;
+ public CollectionBuilder.CollectionBuilderInterface getBuilder () {
+ return this.cb;
};
-
- // Create a boolean filter from a Json string
- @Deprecated
- private BooleanFilter _fromJsonLegacy (JsonNode json, String field)
- throws QueryException {
- BooleanFilter bfilter = new BooleanFilter();
-
- if (!json.has("@type"))
- throw new QueryException(612,
- "JSON-LD group has no @type attribute");
-
- String type = json.get("@type").asText();
-
- if (DEBUG)
- log.trace("@type: " + type);
-
- if (json.has("@field"))
- field = _getFieldLegacy(json);
-
- if (type.equals("koral:term")) {
- if (field != null && json.has("@value"))
- bfilter.and(field, json.get("@value").asText());
- return bfilter;
- }
- else if (type.equals("koral:group")) {
- if (!json.has("relation"))
- throw new QueryException(612, "Group needs relation");
-
- if (!json.has("operands"))
- throw new QueryException(612, "Group needs operand list");
-
- String dateStr, till;
- JsonNode operands = json.get("operands");
-
- if (!operands.isArray())
- throw new QueryException(612, "Group needs operand list");
-
- if (DEBUG)
- log.trace("relation found {}", json.get("relation").asText());
-
- BooleanFilter group = new BooleanFilter();
-
- switch (json.get("relation").asText()) {
- case "between":
- dateStr = _getDateLegacy(json, 0);
- till = _getDateLegacy(json, 1);
- if (dateStr != null && till != null)
- bfilter.between(dateStr, till);
- break;
-
- case "until":
- dateStr = _getDateLegacy(json, 0);
- if (dateStr != null)
- bfilter.till(dateStr);
- break;
-
- case "since":
- dateStr = _getDateLegacy(json, 0);
- if (dateStr != null)
- bfilter.since(dateStr);
- break;
-
- case "equals":
- dateStr = _getDateLegacy(json, 0);
- if (dateStr != null)
- bfilter.date(dateStr);
- break;
-
- case "and":
- if (operands.size() < 1)
- throw new QueryException(612,
- "Operation needs at least two operands");
-
- for (JsonNode operand : operands) {
- group.and(this._fromJsonLegacy(operand, field));
- }
- ;
- bfilter.and(group);
- break;
-
- case "or":
- if (operands.size() < 1)
- throw new QueryException(612,
- "Operation needs at least two operands");
-
- for (JsonNode operand : operands) {
- group.or(this._fromJsonLegacy(operand, field));
- }
- ;
- bfilter.and(group);
- break;
-
- default:
- throw new QueryException(613, "Relation is not supported");
- };
- }
- else {
- throw new QueryException(613,
- "Filter type is not a supported group");
- };
- return bfilter;
- };
-
-
- /**
- * Set the {@link KrillIndex} the virtual collection refers to.
- *
- * @param index
- * The {@link KrillIndex} the virtual collection refers
- * to.
- */
- public void setIndex (KrillIndex index) {
- this.index = index;
- };
-
-
- /**
- * Add a filter by means of a {@link BooleanFilter}.
- *
- * <strong>Warning</strong>: Filters are part of the collections
- * legacy API and may vanish without warning.
- *
- * @param filter
- * The filter to add to the collection.
- * @return The {@link KrillCollection} object for chaining.
- */
- // TODO: The checks may not be necessary
- public KrillCollection filter (BooleanFilter filter) {
- if (DEBUG)
- log.trace("Added filter: {}", filter.toString());
-
- if (filter == null) {
- this.addWarning(830, "Filter was empty");
- return this;
- };
-
- Filter f = (Filter) new QueryWrapperFilter(filter.toQuery());
- if (f == null) {
- this.addWarning(831, "Filter is not wrappable");
- return this;
- };
- FilterOperation fo = new FilterOperation(f, false);
- if (fo == null) {
- this.addWarning(832, "Filter operation is invalid");
- return this;
- };
- this.filter.add(fo);
- this.filterCount++;
- return this;
- };
-
-
- /**
- * Add a filter by means of a {@link CollectionBuilder} object.
- *
- * <strong>Warning</strong>: Filters are part of the collections
- * legacy API and may vanish without warning.
- *
- * @param filter
- * The filter to add to the collection.
- * @return The {@link KrillCollection} object for chaining.
- */
- public KrillCollection filter (CollectionBuilder filter) {
- return this.filter(filter.getBooleanFilter());
- };
-
-
- /**
- * Add an extension by means of a {@link BooleanFilter}.
- *
- * <strong>Warning</strong>: Extensions are part of the
- * collections
- * legacy API and may vanish without warning.
- *
- * @param extension
- * The extension to add to the collection.
- * @return The {@link KrillCollection} object for chaining.
- */
- public KrillCollection extend (BooleanFilter extension) {
- if (DEBUG)
- log.trace("Added extension: {}", extension.toString());
-
- this.filter.add(new FilterOperation((Filter) new QueryWrapperFilter(
- extension.toQuery()), true));
- this.filterCount++;
- return this;
- };
-
-
- /**
- * Add an extension by means of a {@link CollectionBuilder}
- * object.
- *
- * <strong>Warning</strong>: Extensions are part of the
- * collections
- * legacy API and may vanish without warning.
- *
- * @param extension
- * The extension to add to the collection.
- * @return The {@link KrillCollection} object for chaining.
- */
- public KrillCollection extend (CollectionBuilder extension) {
- return this.extend(extension.getBooleanFilter());
- };
-
-
/**
* Add a filter based on a list of unique document identifiers.
* UIDs may be indexed in the field "UID".
@@ -589,57 +320,37 @@
* @return The {@link KrillCollection} object for chaining.
*/
public KrillCollection filterUIDs (String ... uids) {
+ /*
BooleanFilter filter = new BooleanFilter();
filter.or("UID", uids);
if (DEBUG)
log.debug("UID based filter: {}", filter.toString());
return this.filter(filter);
+ */
+ return this;
};
/**
- * Get the list of filters constructing the collection.
- *
- * <strong>Warning</strong>: This is part of the collections
- * legacy API and may vanish without warning.
- *
- * @return The list of filters.
+ * Serialize collection to a {@link Filter} object.
*/
- public List<FilterOperation> getFilters () {
- return this.filter;
+ public Filter toFilter () {
+ if (this.cb == null)
+ return null;
+
+ return this.cb.toFilter();
};
/**
- * Get a certain {@link FilterOperation} from the list of filters
- * constructing the collection by its numerical index.
- *
- * <strong>Warning</strong>: This is part of the collections
- * legacy API and may vanish without warning.
- *
- * @param index
- * The index position of the requested
- * {@link FilterOperation}.
- * @return The {@link FilterOperation} at the certain list
- * position.
+ * Boolean value if the collection should work inverted or
+ * not.
*/
- public FilterOperation getFilter (int index) {
- return this.filter.get(index);
- };
+ public boolean isNegative () {
+ if (this.cb == null)
+ return false;
-
- /**
- * Get the number of filter operations constructing this
- * collection.
- *
- * <strong>Warning</strong>: This is part of the collections
- * legacy API and may vanish without warning.
- *
- * @return The number of filter operations constructing this
- * collection.
- */
- public int getCount () {
- return this.filterCount;
+ return this.cb.isNegative();
};
@@ -653,11 +364,11 @@
* @return A string representation of the virtual collection.
*/
public String toString () {
- StringBuilder sb = new StringBuilder();
- for (FilterOperation fo : this.filter) {
- sb.append(fo.toString()).append("; ");
- };
- return sb.toString();
+ Filter filter = this.toFilter();
+ if (filter == null)
+ return "";
+
+ return (this.isNegative() ? "-" : "") + filter.toString();
};
@@ -692,14 +403,15 @@
* result.
*/
public Result search (SpanQuery query) {
- return this.index.search(this, query, 0, (short) 20, true, (short) 5,
- true, (short) 5);
+ // return this.index.search(this, query, 0, (short) 20, true, (short) 5, true, (short) 5);
+ return null;
};
/**
* Create a bit vector representing the live documents of the
* virtual collection to be used in searches.
+ * This will respect deleted documents.
*
* @param The
* {@link AtomicReaderContext} to search in.
@@ -708,76 +420,69 @@
* @throws IOException
*/
public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
- // TODO: Probably use Bits.MatchAllBits(int len)
- boolean noDoc = true;
- FixedBitSet bitset;
+ AtomicReader r = atomic.reader();
+ FixedBitSet bitset = new FixedBitSet(r.maxDoc());
+ DocIdSet docids = this.getDocIdSet(atomic, (Bits) r.getLiveDocs());
- // There are filters set
- if (this.filterCount > 0) {
- bitset = new FixedBitSet(atomic.reader().maxDoc());
+ if (docids == null)
+ return null;
- ArrayList<FilterOperation> filters = (ArrayList<FilterOperation>) this.filter
- .clone();
-
- FilterOperation kcInit = filters.remove(0);
- if (DEBUG)
- log.trace("FILTER: {}", kcInit);
-
- // Init vector
- DocIdSet docids = kcInit.filter.getDocIdSet(atomic, null);
-
- DocIdSetIterator filterIter = docids.iterator();
-
- // The filter has an effect
- if (filterIter != null) {
- if (DEBUG)
- log.trace("InitFilter has effect");
- bitset.or(filterIter);
- noDoc = false;
- };
-
- // Apply all filters sequentially
- for (FilterOperation kc : filters) {
- if (DEBUG)
- log.trace("FILTER: {}", kc);
-
- // TODO: BUG???
- docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null
- : bitset);
- filterIter = docids.iterator();
-
- if (filterIter == null) {
- // There must be a better way ...
- if (kc.isFilter()) {
- // TODO: Check if this is really correct!
- // Maybe here is the bug
- bitset.clear(0, bitset.length());
- noDoc = true;
- };
- continue;
- };
- if (kc.isExtension())
- bitset.or(filterIter);
- else
- bitset.and(filterIter);
- };
-
- if (!noDoc) {
- FixedBitSet livedocs = (FixedBitSet) atomic.reader()
- .getLiveDocs();
- if (livedocs != null)
- bitset.and(livedocs);
- };
- }
- else {
- bitset = (FixedBitSet) atomic.reader().getLiveDocs();
- };
-
+ bitset.or(docids.iterator());
return bitset;
};
/**
+ * Return the {@link DocIdSet} representing the documents of the
+ * virtual collection to be used in searches.
+ * This will respect deleted documents.
+ *
+ * @param atomic
+ * The {@link AtomicReaderContext} to search in.
+ * @param accepted
+ * {@link Bits} vector of accepted documents.
+ * @throws IOException
+ */
+ public DocIdSet getDocIdSet (AtomicReaderContext atomic, Bits acceptDocs) throws IOException {
+
+ int maxDoc = atomic.reader().maxDoc();
+ FixedBitSet bitset = new FixedBitSet(maxDoc);
+
+ Filter filter;
+ if (this.cb == null || (filter = this.cb.toFilter()) == null)
+ return null;
+
+ // Init vector
+ DocIdSet docids = filter.getDocIdSet(atomic, null);
+ DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
+
+ if (filterIter == null) {
+ if (!this.cb.isNegative())
+ return null;
+
+ bitset.set(0, maxDoc);
+ }
+ else {
+ // Or bit set
+ bitset.or(filterIter);
+
+ // Revert for negation
+ if (this.cb.isNegative())
+ bitset.flip(0, maxDoc);
+ };
+
+ // Remove deleted docs
+ return (DocIdSet) BitsFilteredDocIdSet.wrap(
+ (DocIdSet) bitset,
+ acceptDocs
+ );
+ };
+
+ public long numberOf (String type) throws IOException {
+ return this.numberOf("tokens", type);
+ };
+
+ /**
* Search for the number of occurrences of different types,
* e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
* collection.
@@ -794,136 +499,223 @@
* @see KrillIndex#numberOf
*/
public long numberOf (String field, String type) throws IOException {
+
+ // No index defined
if (this.index == null)
return (long) -1;
- return this.index.numberOf(this, field, type);
+ // This is redundant to index stuff
+ if (type.equals("documents") || type.equals("base/texts")) {
+ if (this.cb == null)
+ return (long) this.index.reader().numDocs();
+ else
+ return this.docCount();
+ };
+
+ // Create search term
+ // This may be prefixed by foundries
+ Term term = new Term(field, "-:" + type);
+
+ long occurrences = 0;
+ try {
+ // Iterate over all atomic readers and collect occurrences
+ for (AtomicReaderContext atomic : this.index.reader().leaves()) {
+ occurrences += this._numberOfAtomic(this.bits(atomic), atomic, term);
+ };
+ }
+
+ // Something went wrong
+ catch (Exception e) {
+ log.warn(e.getLocalizedMessage());
+ };
+
+ return occurrences;
};
+ // Search for meta information in term vectors
+ // This will create the sum of all numerical payloads
+ // of the term in the document vector
+ private long _numberOfAtomic (Bits docvec, AtomicReaderContext atomic,
+ Term term) throws IOException {
+
+ // This reimplements docsAndPositionsEnum with payloads
+ final Terms terms = atomic.reader().fields().terms(term.field());
+
+ // No terms were found
+ if (terms != null) {
+ // Todo: Maybe reuse a termsEnum!
+ final TermsEnum termsEnum = terms.iterator(null);
+
+ // Set the position in the iterator to the term that is seeked
+ if (termsEnum.seekExact(term.bytes())) {
+
+ // Start an iterator to fetch all payloads of the term
+ DocsAndPositionsEnum docs = termsEnum.docsAndPositions(docvec,
+ null, DocsAndPositionsEnum.FLAG_PAYLOADS);
+
+ // The iterator is empty
+ // This may even be an error, but we return 0
+ if (docs.docID() == DocsAndPositionsEnum.NO_MORE_DOCS)
+ return 0;
+
+ // Init some variables for data copying
+ long occurrences = 0;
+ BytesRef payload;
+
+ // Init nextDoc()
+ while (docs.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
+
+ // Initialize (go to first term)
+ docs.nextPosition();
+
+ // Copy payload with the offset of the BytesRef
+ payload = docs.getPayload();
+ System.arraycopy(payload.bytes, payload.offset, pl, 0, 4);
+
+ // Add payload as integer
+ occurrences += bb.wrap(pl).getInt();
+ };
+
+ // Return the sum of all occurrences
+ return occurrences;
+ };
+ };
+
+ // Nothing found
+ return 0;
+ };
+
+
/**
- * Search for the number of occurrences of different types,
- * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
- * collection, in the <i>base</i> foundry.
+ * Return the number of documents in the virtual
+ * collection.
*
- * @param type
- * The type of meta information,
- * e.g. <i>documents</i> or <i>sentences</i> as a
- * string.
* @return The number of the occurrences.
- * @throws IOException
- * @see KrillIndex#numberOf
+ * @see #numberOf
*/
- public long numberOf (String type) throws IOException {
- if (this.index == null)
- return (long) -1;
+ public long docCount () {
- return this.index.numberOf(this, "tokens", type);
+ // No index defined
+ if (this.index == null)
+ return (long) 0;
+
+ // TODO: Caching!
+
+ long docCount = 0;
+ try {
+ FixedBitSet bitset;
+ for (AtomicReaderContext atomic : this.index.reader().leaves()) {
+ if ((bitset = this.bits(atomic)) != null)
+ docCount += bitset.cardinality();
+ };
+ }
+ catch (IOException e) {
+ log.warn(e.getLocalizedMessage());
+ };
+ return docCount;
};
- // Term relation API is not in use anymore
+
+ /*
@Deprecated
public HashMap getTermRelation (String field) throws Exception {
- if (this.index == null) {
- HashMap<String, Long> map = new HashMap<>(1);
- map.put("-docs", (long) 0);
- return map;
- };
-
- return this.index.getTermRelation(this, field);
+ return this.getTermRelation(new KrillCollection(this), field);
};
+*/
-
- // Term relation API is not in use anymore
+ /**
+ * Analyze how terms relate
+ */
+ /*
@Deprecated
- public String getTermRelationJSON (String field) throws IOException {
- ObjectMapper mapper = new ObjectMapper();
- StringWriter sw = new StringWriter();
- sw.append("{\"field\":");
- mapper.writeValue(sw, field);
- sw.append(",");
+ public HashMap getTermRelation (KrillCollection kc, String field)
+ throws Exception {
+ HashMap<String, Long> map = new HashMap<>(100);
+ long docNumber = 0, checkNumber = 0;
try {
- HashMap<String, Long> map = this.getTermRelation(field);
+ if (kc.getCount() <= 0) {
+ checkNumber = (long) this.reader().numDocs();
+ };
- sw.append("\"documents\":");
- mapper.writeValue(sw, map.remove("-docs"));
- sw.append(",");
+ for (AtomicReaderContext atomic : this.reader().leaves()) {
+ HashMap<String, FixedBitSet> termVector = new HashMap<>(20);
- String[] keys = map.keySet().toArray(new String[map.size()]);
+ FixedBitSet docvec = kc.bits(atomic);
+ if (docvec != null) {
+ docNumber += docvec.cardinality();
+ };
- HashMap<String, Integer> setHash = new HashMap<>(20);
- ArrayList<HashMap<String, Long>> set = new ArrayList<>(20);
- ArrayList<Long[]> overlap = new ArrayList<>(100);
+ Terms terms = atomic.reader().fields().terms(field);
- int count = 0;
- for (String key : keys) {
- if (!key.startsWith("#__")) {
- HashMap<String, Long> simpleMap = new HashMap<>();
- simpleMap.put(key, map.remove(key));
- set.add(simpleMap);
- setHash.put(key, count++);
+ if (terms == null) {
+ continue;
+ };
+
+ int docLength = atomic.reader().maxDoc();
+ FixedBitSet bitset = new FixedBitSet(docLength);
+
+ // Iterate over all tokens in this field
+ TermsEnum termsEnum = terms.iterator(null);
+
+ while (termsEnum.next() != null) {
+
+ String termString = termsEnum.term().utf8ToString();
+
+ bitset.clear(0, docLength);
+
+ // Get frequency
+ bitset.or((DocIdSetIterator) termsEnum.docs((Bits) docvec,
+ null));
+
+ long value = 0;
+ if (map.containsKey(termString))
+ value = map.get(termString);
+
+ map.put(termString, value + bitset.cardinality());
+
+ termVector.put(termString, bitset.clone());
+ };
+
+ int keySize = termVector.size();
+ String[] keys = termVector.keySet()
+ .toArray(new String[keySize]);
+ java.util.Arrays.sort(keys);
+
+ if (keySize > maxTermRelations) {
+ throw new Exception("termRelations are limited to "
+ + maxTermRelations + " sets"
+ + " (requested were at least " + keySize + " sets)");
+ };
+
+ for (int i = 0; i < keySize; i++) {
+ for (int j = i + 1; j < keySize; j++) {
+ FixedBitSet comby = termVector.get(keys[i]).clone();
+ comby.and(termVector.get(keys[j]));
+
+ StringBuilder sb = new StringBuilder();
+ sb.append("#__").append(keys[i]).append(":###:")
+ .append(keys[j]);
+ String combString = sb.toString();
+
+ long cap = (long) comby.cardinality();
+ if (map.containsKey(combString)) {
+ cap += map.get(combString);
+ };
+ map.put(combString, cap);
+ };
};
};
-
- keys = map.keySet().toArray(new String[map.size()]);
- for (String key : keys) {
- String[] comb = key.substring(3).split(":###:");
- Long[] l = new Long[3];
- l[0] = (long) setHash.get(comb[0]);
- l[1] = (long) setHash.get(comb[1]);
- l[2] = map.remove(key);
- overlap.add(l);
- };
-
- sw.append("\"sets\":");
- mapper.writeValue(sw, (Object) set);
- sw.append(",\"overlaps\":");
- mapper.writeValue(sw, (Object) overlap);
- sw.append(",\"error\":null");
+ map.put("-docs", checkNumber != 0 ? checkNumber : docNumber);
}
- catch (Exception e) {
- sw.append("\"error\":");
- mapper.writeValue(sw, e.getMessage());
+ catch (IOException e) {
+ log.warn(e.getMessage());
};
-
- sw.append("}");
- return sw.getBuffer().toString();
+ return map;
};
+ */
- // Get legacy field
- @Deprecated
- private static String _getFieldLegacy (JsonNode json) {
- if (!json.has("@field"))
- return (String) null;
-
- String field = json.get("@field").asText();
- return field.replaceFirst("koral:field#", "");
- };
-
-
- // Get legacy date
- @Deprecated
- private static String _getDateLegacy (JsonNode json, int index) {
- if (!json.has("operands"))
- return (String) null;
-
- if (!json.get("operands").has(index))
- return (String) null;
-
- JsonNode date = json.get("operands").get(index);
-
- if (!date.has("@type"))
- return (String) null;
-
- if (!date.get("@type").asText().equals("koral:date"))
- return (String) null;
-
- if (!date.has("@value"))
- return (String) null;
-
- return date.get("@value").asText();
- };
};
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollectionLegacy.java b/src/main/java/de/ids_mannheim/korap/KrillCollectionLegacy.java
new file mode 100644
index 0000000..17fa1fe
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollectionLegacy.java
@@ -0,0 +1,937 @@
+package de.ids_mannheim.korap;
+
+import java.util.*;
+import java.io.IOException;
+
+import de.ids_mannheim.korap.*;
+import de.ids_mannheim.korap.util.KrillDate;
+import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.collection.BooleanFilter;
+import de.ids_mannheim.korap.collection.RegexFilter;
+import de.ids_mannheim.korap.collection.FilterOperation;
+import de.ids_mannheim.korap.collection.CollectionBuilderLegacy;
+import de.ids_mannheim.korap.response.Notifications;
+import de.ids_mannheim.korap.response.Result;
+
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.*;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.Bits;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+
+import java.io.StringWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Create a Virtual Collection of documents by means of a KoralQuery
+ * collection object.
+ * Alternatively by applying manual filters and extensions on Lucene
+ * fields.
+ *
+ * <blockquote><pre>
+ * KrillCollectionLegacy kc = new KrillCollectionLegacy(json);
+ * kc.filterUIDS("a1", "a2", "a3");
+ * </pre></blockquote>
+ *
+ * <strong>Warning</strong>: This API is deprecated and will
+ * be replaced in future versions. It supports legacy versions of
+ * KoralQuery that will be disabled.
+ *
+ * @author diewald
+ */
+/*
+ * TODO: Clean up for new KoralQuery
+ * TODO: Make a cache for the bits
+ * Delete it in case of an extension or a filter
+ * TODO: Maybe use randomaccessfilterstrategy
+ * TODO: Maybe a constantScoreQuery can make things faster?
+ * See http://mail-archives.apache.org/mod_mbox/lucene-java-user/
+ * 200805.mbox/%3C17080852.post@talk.nabble.com%3E
+ */
+public class KrillCollectionLegacy extends Notifications {
+ private KrillIndex index;
+ private KrillDate created;
+ private String id;
+ private ArrayList<FilterOperation> filter;
+ private int filterCount = 0;
+ private JsonNode json;
+
+ // Logger
+ private final static Logger log = LoggerFactory
+ .getLogger(KrillCollection.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+
+ /**
+ * Construct a new KrillCollectionLegacy by passing a KrillIndex.
+ *
+ * @param index
+ * The {@link KrillIndex} object.
+ */
+ public KrillCollectionLegacy (KrillIndex index) {
+ this.index = index;
+ this.filter = new ArrayList<FilterOperation>(5);
+ };
+
+
+ /**
+ * Construct a new KrillCollectionLegacy by passing a KoralQuery.
+ * This supports collections with the key "collection" and
+ * legacy collections with the key "collections".
+ *
+ * @param jsonString
+ * The virtual collection as a KoralQuery.
+ */
+ public KrillCollectionLegacy (String jsonString) {
+ ObjectMapper mapper = new ObjectMapper();
+ this.filter = new ArrayList<FilterOperation>(5);
+
+ try {
+ JsonNode json = mapper.readTree(jsonString);
+
+ // Deserialize from recent collections
+ if (json.has("collection")) {
+ this.fromJson(json.get("collection"));
+ }
+
+ // Legacy collection serialization
+ // This will be removed!
+ else if (json.has("collections")) {
+ this.addMessage(850,
+ "Collections are deprecated in favour of a single collection");
+ for (JsonNode collection : json.get("collections")) {
+ this.fromJsonLegacy(collection);
+ };
+ };
+ }
+ // Some exceptions ...
+ catch (QueryException qe) {
+ this.addError(qe.getErrorCode(), qe.getMessage());
+ }
+ catch (IOException e) {
+ this.addError(621, "Unable to parse JSON", "KrillCollectionLegacy",
+ e.getLocalizedMessage());
+ };
+ };
+
+
+ /**
+ * Construct a new KrillCollectionLegacy.
+ */
+ public KrillCollectionLegacy () {
+ this.filter = new ArrayList<FilterOperation>(5);
+ };
+
+
+ /**
+ * Import the "collection" part of a KoralQuery.
+ *
+ * @param jsonString
+ * The "collection" part of a KoralQuery.
+ * @throws QueryException
+ */
+ public KrillCollectionLegacy fromJson (String jsonString) throws QueryException {
+ ObjectMapper mapper = new ObjectMapper();
+ try {
+ this.fromJson((JsonNode) mapper.readTree(jsonString));
+ }
+ catch (Exception e) {
+ this.addError(621, "Unable to parse JSON", "KrillCollection");
+ };
+
+ return this;
+ };
+
+
+ /**
+ * Import the "collection" part of a KoralQuery.
+ *
+ * @param json
+ * The "collection" part of a KoralQuery
+ * as a {@link JsonNode} object.
+ * @throws QueryException
+ */
+ public KrillCollectionLegacy fromJson (JsonNode json) throws QueryException {
+ this.json = json;
+ this.filter(this._fromJson(json));
+ return this;
+ };
+
+
+ // Create a boolean filter from JSON
+ private BooleanFilter _fromJson (JsonNode json) throws QueryException {
+ return this._fromJson(json, "tokens");
+ };
+
+
+ // Create a booleanfilter from JSON
+ private BooleanFilter _fromJson (JsonNode json, String field)
+ throws QueryException {
+ BooleanFilter bfilter = new BooleanFilter();
+
+ if (!json.has("@type")) {
+ throw new QueryException(701,
+ "JSON-LD group has no @type attribute");
+ };
+
+ String type = json.get("@type").asText();
+
+ // Single filter
+ if (type.equals("koral:doc")) {
+
+ String key = "tokens";
+ String valtype = "type:string";
+ String match = "match:eq";
+
+ if (json.has("key"))
+ key = json.get("key").asText();
+
+ if (json.has("type"))
+ valtype = json.get("type").asText();
+
+ // Filter based on date
+ if (valtype.equals("type:date")) {
+
+ if (!json.has("value"))
+ throw new QueryException(612, "Dates require value fields");
+
+ String dateStr = json.get("value").asText();
+ if (json.has("match"))
+ match = json.get("match").asText();
+
+ // TODO: This isn't stable yet
+ switch (match) {
+ case "match:eq":
+ bfilter.date(dateStr);
+ break;
+ case "match:geq":
+ bfilter.since(dateStr);
+ break;
+ case "match:leq":
+ bfilter.till(dateStr);
+ break;
+ };
+
+ // No good reason for gt or lt
+ return bfilter;
+ }
+
+ // Filter based on string
+ else if (valtype.equals("type:string")) {
+ if (json.has("match"))
+ match = json.get("match").asText();
+
+ if (match.equals("match:eq")) {
+ bfilter.and(key, json.get("value").asText());
+ }
+ else if (match.equals("match:ne")) {
+ bfilter.andNot(key, json.get("value").asText());
+ }
+ // This may change - but for now it means the elements are lowercased
+ else if (match.equals("match:contains")) {
+ bfilter.and(key, json.get("value").asText().toLowerCase());
+ }
+ else if (match.equals("match:containsnot")) {
+ bfilter.andNot(key, json.get("value").asText().toLowerCase());
+ }
+ // <LEGACY>
+ else if (match.equals("match:excludes")) {
+ bfilter.andNot(key, json.get("value").asText().toLowerCase());
+ }
+ // </LEGACY>
+ else {
+ throw new QueryException(0, "Unknown match type");
+ };
+
+ return bfilter;
+ }
+
+ // Filter based on regex
+ else if (valtype.equals("type:regex")) {
+ if (json.has("match"))
+ match = json.get("match").asText();
+
+ if (match.equals("match:eq")) {
+ return bfilter.and(key, new RegexFilter(json.get("value")
+ .asText()));
+ }
+ else if (match.equals("match:ne")) {
+ return bfilter.andNot(key, new RegexFilter(json
+ .get("value").asText()));
+ };
+
+ // TODO! for excludes and contains
+ throw new QueryException(0, "Unknown document type");
+ };
+
+ // TODO!
+ throw new QueryException(0, "Unknown document operation");
+ }
+
+ // nested group
+ else if (type.equals("koral:docGroup")) {
+ if (!json.has("operands") || !json.get("operands").isArray())
+ throw new QueryException(612, "Groups need operands");
+
+ String operation = "operation:and";
+ if (json.has("operation"))
+ operation = json.get("operation").asText();
+
+ BooleanFilter group = new BooleanFilter();
+
+ for (JsonNode operand : json.get("operands")) {
+ if (operation.equals("operation:and"))
+ group.and(this._fromJson(operand, field));
+
+ else if (operation.equals("operation:or"))
+ group.or(this._fromJson(operand, field));
+
+ else
+ throw new QueryException(613,
+ "Unknown document group operation");
+ };
+ bfilter.and(group);
+ return bfilter;
+ }
+
+ // Unknown type
+ else
+ throw new QueryException(613,
+ "Collection query type has to be doc or docGroup");
+
+ // return new BooleanFilter();
+ };
+
+
+ /**
+ * Import the "collections" part of a KoralQuery.
+ * This method is deprecated and will vanish in future versions.
+ *
+ * @param jsonString
+ * The "collections" part of a KoralQuery.
+ * @throws QueryException
+ */
+ @Deprecated
+ public KrillCollectionLegacy fromJsonLegacy (String jsonString)
+ throws QueryException {
+ ObjectMapper mapper = new ObjectMapper();
+ try {
+ this.fromJsonLegacy((JsonNode) mapper.readValue(jsonString,
+ JsonNode.class));
+ }
+ catch (Exception e) {
+ this.addError(621, "Unable to parse JSON", "KrillCollection");
+ };
+ return this;
+ };
+
+
+ /**
+ * Import the "collections" part of a KoralQuery.
+ * This method is deprecated and will vanish in future versions.
+ *
+ * @param json
+ * The "collections" part of a KoralQuery
+ * as a {@link JsonNode} object.
+ * @throws QueryException
+ */
+ @Deprecated
+ public KrillCollectionLegacy fromJsonLegacy (JsonNode json) throws QueryException {
+ if (!json.has("@type"))
+ throw new QueryException(701,
+ "JSON-LD group has no @type attribute");
+
+ if (!json.has("@value"))
+ throw new QueryException(851, "Legacy filter need @value fields");
+
+ BooleanFilter bf = this._fromJsonLegacy(json.get("@value"), "tokens");
+ String type = json.get("@type").asText();
+
+ // Filter the collection
+ if (type.equals("koral:meta-filter")) {
+ if (DEBUG)
+ log.trace("Add Filter LEGACY");
+ this.filter(bf);
+ }
+
+ // Extend the collection
+ else if (type.equals("koral:meta-extend")) {
+ if (DEBUG)
+ log.trace("Add Extend LEGACY");
+ this.extend(bf);
+ };
+
+ return this;
+ };
+
+
+ // Create a boolean filter from a Json string
+ @Deprecated
+ private BooleanFilter _fromJsonLegacy (JsonNode json, String field)
+ throws QueryException {
+ BooleanFilter bfilter = new BooleanFilter();
+
+ if (!json.has("@type"))
+ throw new QueryException(612,
+ "JSON-LD group has no @type attribute");
+
+ String type = json.get("@type").asText();
+
+ if (DEBUG)
+ log.trace("@type: " + type);
+
+ if (json.has("@field"))
+ field = _getFieldLegacy(json);
+
+ if (type.equals("koral:term")) {
+ if (field != null && json.has("@value"))
+ bfilter.and(field, json.get("@value").asText());
+ return bfilter;
+ }
+ else if (type.equals("koral:group")) {
+ if (!json.has("relation"))
+ throw new QueryException(612, "Group needs relation");
+
+ if (!json.has("operands"))
+ throw new QueryException(612, "Group needs operand list");
+
+ String dateStr, till;
+ JsonNode operands = json.get("operands");
+
+ if (!operands.isArray())
+ throw new QueryException(612, "Group needs operand list");
+
+ if (DEBUG)
+ log.trace("relation found {}", json.get("relation").asText());
+
+ BooleanFilter group = new BooleanFilter();
+
+ switch (json.get("relation").asText()) {
+ case "between":
+ dateStr = _getDateLegacy(json, 0);
+ till = _getDateLegacy(json, 1);
+ if (dateStr != null && till != null)
+ bfilter.between(dateStr, till);
+ break;
+
+ case "until":
+ dateStr = _getDateLegacy(json, 0);
+ if (dateStr != null)
+ bfilter.till(dateStr);
+ break;
+
+ case "since":
+ dateStr = _getDateLegacy(json, 0);
+ if (dateStr != null)
+ bfilter.since(dateStr);
+ break;
+
+ case "equals":
+ dateStr = _getDateLegacy(json, 0);
+ if (dateStr != null)
+ bfilter.date(dateStr);
+ break;
+
+ case "and":
+ if (operands.size() < 1)
+ throw new QueryException(612,
+ "Operation needs at least two operands");
+
+ for (JsonNode operand : operands) {
+ group.and(this._fromJsonLegacy(operand, field));
+ }
+ ;
+ bfilter.and(group);
+ break;
+
+ case "or":
+ if (operands.size() < 1)
+ throw new QueryException(612,
+ "Operation needs at least two operands");
+
+ for (JsonNode operand : operands) {
+ group.or(this._fromJsonLegacy(operand, field));
+ }
+ ;
+ bfilter.and(group);
+ break;
+
+ default:
+ throw new QueryException(613, "Relation is not supported");
+ };
+ }
+ else {
+ throw new QueryException(613,
+ "Filter type is not a supported group");
+ };
+ return bfilter;
+ };
+
+
+ /**
+ * Set the {@link KrillIndex} the virtual collection refers to.
+ *
+ * @param index
+ * The {@link KrillIndex} the virtual collection refers
+ * to.
+ */
+ public void setIndex (KrillIndex index) {
+ this.index = index;
+ };
+
+
+ /**
+ * Add a filter by means of a {@link BooleanFilter}.
+ *
+ * <strong>Warning</strong>: Filters are part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @param filter
+ * The filter to add to the collection.
+ * @return The {@link KrillCollectionLegacy} object for chaining.
+ */
+ // TODO: The checks may not be necessary
+ public KrillCollectionLegacy filter (BooleanFilter filter) {
+ if (DEBUG)
+ log.trace("Added filter: {}", filter.toString());
+
+ if (filter == null) {
+ this.addWarning(830, "Filter was empty");
+ return this;
+ };
+
+ Filter f = (Filter) new QueryWrapperFilter(filter.toQuery());
+ if (f == null) {
+ this.addWarning(831, "Filter is not wrappable");
+ return this;
+ };
+ FilterOperation fo = new FilterOperation(f, false);
+ if (fo == null) {
+ this.addWarning(832, "Filter operation is invalid");
+ return this;
+ };
+ this.filter.add(fo);
+ this.filterCount++;
+ return this;
+ };
+
+
+ /**
+ * Add a filter by means of a {@link CollectionBuilderLegacy} object.
+ *
+ * <strong>Warning</strong>: Filters are part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @param filter
+ * The filter to add to the collection.
+ * @return The {@link KrillCollectionLegacy} object for chaining.
+ */
+ public KrillCollectionLegacy filter (CollectionBuilderLegacy filter) {
+ return this.filter(filter.getBooleanFilter());
+ };
+
+
+ /**
+ * Add an extension by means of a {@link BooleanFilter}.
+ *
+ * <strong>Warning</strong>: Extensions are part of the
+ * collections
+ * legacy API and may vanish without warning.
+ *
+ * @param extension
+ * The extension to add to the collection.
+ * @return The {@link KrillCollectionLegacy} object for chaining.
+ */
+ public KrillCollectionLegacy extend (BooleanFilter extension) {
+ if (DEBUG)
+ log.trace("Added extension: {}", extension.toString());
+
+ this.filter.add(new FilterOperation((Filter) new QueryWrapperFilter(
+ extension.toQuery()), true));
+ this.filterCount++;
+ return this;
+ };
+
+
+ /**
+ * Add an extension by means of a {@link CollectionBuilderLegacy}
+ * object.
+ *
+ * <strong>Warning</strong>: Extensions are part of the
+ * collections
+ * legacy API and may vanish without warning.
+ *
+ * @param extension
+ * The extension to add to the collection.
+ * @return The {@link KrillCollectionLegacy} object for chaining.
+ */
+ public KrillCollectionLegacy extend (CollectionBuilderLegacy extension) {
+ return this.extend(extension.getBooleanFilter());
+ };
+
+
+ /**
+ * Add a filter based on a list of unique document identifiers.
+ * UIDs may be indexed in the field "UID".
+ *
+ * This filter is not part of the legacy API!
+ *
+ * @param uids
+ * The list of unique document identifier.
+ * @return The {@link KrillCollectionLegacy} object for chaining.
+ */
+ public KrillCollectionLegacy filterUIDs (String ... uids) {
+ BooleanFilter filter = new BooleanFilter();
+ filter.or("UID", uids);
+ if (DEBUG)
+ log.debug("UID based filter: {}", filter.toString());
+ return this.filter(filter);
+ };
+
+
+ /**
+ * Get the list of filters constructing the collection.
+ *
+ * <strong>Warning</strong>: This is part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @return The list of filters.
+ */
+ public List<FilterOperation> getFilters () {
+ return this.filter;
+ };
+
+
+ /**
+ * Get a certain {@link FilterOperation} from the list of filters
+ * constructing the collection by its numerical index.
+ *
+ * <strong>Warning</strong>: This is part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @param index
+ * The index position of the requested
+ * {@link FilterOperation}.
+ * @return The {@link FilterOperation} at the certain list
+ * position.
+ */
+ public FilterOperation getFilter (int index) {
+ return this.filter.get(index);
+ };
+
+
+ /**
+ * Get the number of filter operations constructing this
+ * collection.
+ *
+ * <strong>Warning</strong>: This is part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @return The number of filter operations constructing this
+ * collection.
+ */
+ public int getCount () {
+ return this.filterCount;
+ };
+
+
+ /**
+ * Generate a string representatio of the virtual collection.
+ *
+ * <strong>Warning</strong>: This currently does not generate a
+ * valid
+ * KoralQuery string, so this may change in a future version.
+ *
+ * @return A string representation of the virtual collection.
+ */
+ public String toString () {
+ StringBuilder sb = new StringBuilder();
+ for (FilterOperation fo : this.filter) {
+ sb.append(fo.toString()).append("; ");
+ };
+ return sb.toString();
+ };
+
+
+ /**
+ * Return the associated KoralQuery collection object
+ * as a {@link JsonNode}. This won't work,
+ * if the object was build using a CollectionBuilderLegacy,
+ * therefore it is limited to mirror a deserialized KoralQuery
+ * object.
+ *
+ * @return The {@link JsonNode} representing the collection object
+ * of a deserialized KoralQuery object.
+ */
+ public JsonNode toJsonNode () {
+ return this.json;
+ };
+
+
+
+ /**
+ * Search in the virtual collection.
+ * This is mostly used for testing purposes
+ * and <strong>is not recommended</strong>
+ * as a common search API.
+ *
+ * Please use {@link KrillQuery#run} instead.
+ *
+ * @param query
+ * a {@link SpanQuery} to apply on the
+ * virtual collection.
+ * @return A {@link Result} object representing the search's
+ * result.
+ */
+ public Result search (SpanQuery query) {
+ /*
+return this.index.search(this, query, 0, (short) 20, true, (short) 5,
+ true, (short) 5);
+ */
+ return null;
+ };
+
+
+ /**
+ * Create a bit vector representing the live documents of the
+ * virtual collection to be used in searches.
+ *
+ * @param The
+ * {@link AtomicReaderContext} to search in.
+ * @return A bit vector representing the live documents of the
+ * virtual collection.
+ * @throws IOException
+ */
+ public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
+ // TODO: Probably use Bits.MatchAllBits(int len)
+ boolean noDoc = true;
+ FixedBitSet bitset;
+
+ // There are filters set
+ if (this.filterCount > 0) {
+ bitset = new FixedBitSet(atomic.reader().maxDoc());
+
+ ArrayList<FilterOperation> filters = (ArrayList<FilterOperation>) this.filter
+ .clone();
+
+ FilterOperation kcInit = filters.remove(0);
+ if (DEBUG)
+ log.trace("FILTER: {}", kcInit);
+
+ // Init vector
+ DocIdSet docids = kcInit.filter.getDocIdSet(atomic, null);
+
+ DocIdSetIterator filterIter = docids.iterator();
+
+ // The filter has an effect
+ if (filterIter != null) {
+ if (DEBUG)
+ log.trace("InitFilter has effect");
+ bitset.or(filterIter);
+ noDoc = false;
+ };
+
+ // Apply all filters sequentially
+ for (FilterOperation kc : filters) {
+ if (DEBUG)
+ log.trace("FILTER: {}", kc);
+
+ // TODO: BUG???
+ docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null
+ : bitset);
+ filterIter = docids.iterator();
+
+ if (filterIter == null) {
+ // There must be a better way ...
+ if (kc.isFilter()) {
+ // TODO: Check if this is really correct!
+ // Maybe here is the bug
+ bitset.clear(0, bitset.length());
+ noDoc = true;
+ };
+ continue;
+ };
+ if (kc.isExtension())
+ bitset.or(filterIter);
+ else
+ bitset.and(filterIter);
+ };
+
+ if (!noDoc) {
+ FixedBitSet livedocs = (FixedBitSet) atomic.reader()
+ .getLiveDocs();
+ if (livedocs != null)
+ bitset.and(livedocs);
+ };
+ }
+ else {
+ bitset = (FixedBitSet) atomic.reader().getLiveDocs();
+ };
+
+ return bitset;
+ };
+
+
+ /**
+ * Search for the number of occurrences of different types,
+ * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
+ * collection.
+ *
+ * @param field
+ * The field containing the textual data and the
+ * annotations as a string.
+ * @param type
+ * The type of meta information,
+ * e.g. <i>documents</i> or <i>sentences</i> as a
+ * string.
+ * @return The number of the occurrences.
+ * @throws IOException
+ * @see KrillIndex#numberOf
+ */
+ public long numberOf (String field, String type) throws IOException {
+ if (this.index == null)
+ return (long) -1;
+
+ // return this.index.numberOf(this, field, type);
+ return (long) 0;
+ };
+
+
+ /**
+ * Search for the number of occurrences of different types,
+ * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
+ * collection, in the <i>base</i> foundry.
+ *
+ * @param type
+ * The type of meta information,
+ * e.g. <i>documents</i> or <i>sentences</i> as a
+ * string.
+ * @return The number of the occurrences.
+ * @throws IOException
+ * @see KrillIndex#numberOf
+ */
+ public long numberOf (String type) throws IOException {
+ if (this.index == null)
+ return (long) -1;
+
+ // return this.index.numberOf(this, "tokens", type);
+ return (long) 0;
+ };
+
+
+ // Term relation API is not in use anymore
+ /*
+ @Deprecated
+ public HashMap getTermRelation (String field) throws Exception {
+ if (this.index == null) {
+ HashMap<String, Long> map = new HashMap<>(1);
+ map.put("-docs", (long) 0);
+ return map;
+ };
+
+ return this.index.getTermRelation(this, field);
+ };
+ */
+
+
+ // Term relation API is not in use anymore
+ /*
+ @Deprecated
+ public String getTermRelationJSON (String field) throws IOException {
+ ObjectMapper mapper = new ObjectMapper();
+ StringWriter sw = new StringWriter();
+ sw.append("{\"field\":");
+ mapper.writeValue(sw, field);
+ sw.append(",");
+
+ try {
+ HashMap<String, Long> map = this.getTermRelation(field);
+
+ sw.append("\"documents\":");
+ mapper.writeValue(sw, map.remove("-docs"));
+ sw.append(",");
+
+ String[] keys = map.keySet().toArray(new String[map.size()]);
+
+ HashMap<String, Integer> setHash = new HashMap<>(20);
+ ArrayList<HashMap<String, Long>> set = new ArrayList<>(20);
+ ArrayList<Long[]> overlap = new ArrayList<>(100);
+
+ int count = 0;
+ for (String key : keys) {
+ if (!key.startsWith("#__")) {
+ HashMap<String, Long> simpleMap = new HashMap<>();
+ simpleMap.put(key, map.remove(key));
+ set.add(simpleMap);
+ setHash.put(key, count++);
+ };
+ };
+
+ keys = map.keySet().toArray(new String[map.size()]);
+ for (String key : keys) {
+ String[] comb = key.substring(3).split(":###:");
+ Long[] l = new Long[3];
+ l[0] = (long) setHash.get(comb[0]);
+ l[1] = (long) setHash.get(comb[1]);
+ l[2] = map.remove(key);
+ overlap.add(l);
+ };
+
+ sw.append("\"sets\":");
+ mapper.writeValue(sw, (Object) set);
+ sw.append(",\"overlaps\":");
+ mapper.writeValue(sw, (Object) overlap);
+ sw.append(",\"error\":null");
+ }
+ catch (Exception e) {
+ sw.append("\"error\":");
+ mapper.writeValue(sw, e.getMessage());
+ };
+
+ sw.append("}");
+ return sw.getBuffer().toString();
+ };
+ */
+
+ // Get legacy field
+ @Deprecated
+ private static String _getFieldLegacy (JsonNode json) {
+ if (!json.has("@field"))
+ return (String) null;
+
+ String field = json.get("@field").asText();
+ return field.replaceFirst("koral:field#", "");
+ };
+
+
+ // Get legacy date
+ @Deprecated
+ private static String _getDateLegacy (JsonNode json, int index) {
+ if (!json.has("operands"))
+ return (String) null;
+
+ if (!json.get("operands").has(index))
+ return (String) null;
+
+ JsonNode date = json.get("operands").get(index);
+
+ if (!date.has("@type"))
+ return (String) null;
+
+ if (!date.get("@type").asText().equals("koral:date"))
+ return (String) null;
+
+ if (!date.has("@value"))
+ return (String) null;
+
+ return date.get("@value").asText();
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java b/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
deleted file mode 100644
index 3409ff5..0000000
--- a/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
+++ /dev/null
@@ -1,203 +0,0 @@
-package de.ids_mannheim.korap;
-
-import java.util.*;
-import java.io.IOException;
-
-import de.ids_mannheim.korap.collection.CollectionBuilderNew;
-import de.ids_mannheim.korap.response.Notifications;
-
-import org.apache.lucene.search.*;
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.lucene.util.DocIdBitSet;
-import org.apache.lucene.search.BitsFilteredDocIdSet;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class KrillCollectionNew extends Notifications {
- private KrillIndex index;
- private CollectionBuilderNew.CollectionBuilderInterface cb;
-
- // Logger
- private final static Logger log = LoggerFactory
- .getLogger(KrillCollection.class);
-
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
-
- /**
- * Construct a new KrillCollection by passing a KrillIndex.
- *
- * @param index
- * The {@link KrillIndex} object.
- */
- public KrillCollectionNew (KrillIndex index) {
- this.index = index;
- };
-
- public KrillCollectionNew fromBuilder (CollectionBuilderNew.CollectionBuilderInterface cb) {
- this.cb = cb;
- return this;
- };
-
- public Filter toFilter () {
- if (this.cb == null)
- return null;
-
- return this.cb.toFilter();
- };
-
- public String toString () {
- Filter filter = this.toFilter();
- if (filter == null)
- return "";
-
- return filter.toString();
- };
-
- /*
- public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
-
- int maxDoc = atomic.reader().maxDoc();
- FixedBitSet bitset = new FixedBitSet(maxDoc);
-
- Filter filter;
- if (this.cb == null || (filter = this.cb.toFilter()) == null)
- return null;
-
- // Init vector
- DocIdSet docids = filter.getDocIdSet(atomic, null);
- DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
-
- if (filterIter == null) {
- if (!this.cb.isNegative())
- return null;
-
- bitset.set(0, maxDoc);
- }
- else {
- // Or bit set
- bitset.or(filterIter);
-
- // Revert for negation
- if (this.cb.isNegative())
- bitset.flip(0, maxDoc);
- };
-
- // Remove deleted docs
- return (FixedBitSet) BitsFilteredDocIdSet.wrap(
- (DocIdSet) bitset,
- (Bits) atomic.reader().getLiveDocs()
- ).iterator();
- };
- */
-
- /**
- * This will respect deleted documents.
- */
- public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
- AtomicReader r = atomic.reader();
- FixedBitSet bitset = new FixedBitSet(r.maxDoc());
- DocIdSet docids = this.getDocIdSet(atomic, (Bits) r.getLiveDocs());
-
- if (docids == null)
- return null;
-
- bitset.or(docids.iterator());
- return bitset;
- };
-
-
- public DocIdSet getDocIdSet (AtomicReaderContext atomic, Bits acceptDocs) throws IOException {
-
- int maxDoc = atomic.reader().maxDoc();
- FixedBitSet bitset = new FixedBitSet(maxDoc);
-
- Filter filter;
- if (this.cb == null || (filter = this.cb.toFilter()) == null)
- return null;
-
- // Init vector
- DocIdSet docids = filter.getDocIdSet(atomic, null);
- DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
-
- if (filterIter == null) {
- if (!this.cb.isNegative())
- return null;
-
- bitset.set(0, maxDoc);
- }
- else {
- // Or bit set
- bitset.or(filterIter);
-
- // Revert for negation
- if (this.cb.isNegative())
- bitset.flip(0, maxDoc);
- };
-
- // Remove deleted docs
- return (DocIdSet) BitsFilteredDocIdSet.wrap(
- (DocIdSet) bitset,
- acceptDocs
- );
- };
-
-
- /**
- * Search for the number of occurrences of different types,
- * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
- * collection.
- *
- * @param field
- * The field containing the textual data and the
- * annotations as a string.
- * @param type
- * The type of meta information,
- * e.g. <i>documents</i> or <i>sentences</i> as a
- * string.
- * @return The number of the occurrences.
- * @throws IOException
- * @see KrillIndex#numberOf
- */
- public long numberOf (String field, String type) throws IOException {
-
- // No index defined
- if (this.index == null)
- return (long) -1;
-
- // This is redundant to index stuff
- if (type.equals("documents"))
- return this.docCount();
-
- return (long) 0;
- // return this.index.numberOf(this, field, type);
- };
-
-
-
- public long docCount () {
-
- // No index defined
- if (this.index == null)
- return (long) 0;
-
- long docCount = 0;
- try {
- FixedBitSet bitset;
- for (AtomicReaderContext atomic : this.index.reader().leaves()) {
- if ((bitset = this.bits(atomic)) != null)
- docCount += bitset.cardinality();
- };
- }
- catch (IOException e) {
- log.warn(e.getLocalizedMessage());
- };
- return docCount;
- };
-};
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index bfdae31..c1c0c00 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -605,8 +605,19 @@
* @see KrillCollection#numberOf
*/
public long numberOf (KrillCollection collection, String field, String type) {
+
+ collection.setIndex(this);
+ try {
+ return collection.numberOf(field, type);
+ }
+ catch (IOException e) {
+ log.warn(e.getLocalizedMessage());
+ };
+ return (long) -1;
+
// Short cut for documents
// This will be only "texts" in the future
+ /*
if (type.equals("documents") || type.equals("base/texts")) {
if (collection.getCount() <= 0) {
try {
@@ -617,7 +628,6 @@
};
return (long) 0;
};
-
long docCount = 0;
// int i = 1;
try {
@@ -651,6 +661,7 @@
};
return occurrences;
+ */
};
@@ -707,6 +718,7 @@
* @return The number of the occurrences.
* @throws IOException
*/
+ @Deprecated
public long numberOf (Bits docvec, String field, String type)
throws IOException {
// Shortcut for documents
@@ -735,6 +747,7 @@
// Search for meta information in term vectors
// This will create the sum of all numerical payloads
// of the term in the document vector
+ @Deprecated
private long _numberOfAtomic (Bits docvec, AtomicReaderContext atomic,
Term term) throws IOException {
@@ -1144,103 +1157,6 @@
};
- @Deprecated
- public HashMap getTermRelation (String field) throws Exception {
- return this.getTermRelation(new KrillCollection(this), field);
- };
-
-
- /**
- * Analyze how terms relate
- */
- @Deprecated
- public HashMap getTermRelation (KrillCollection kc, String field)
- throws Exception {
- HashMap<String, Long> map = new HashMap<>(100);
- long docNumber = 0, checkNumber = 0;
-
- try {
- if (kc.getCount() <= 0) {
- checkNumber = (long) this.reader().numDocs();
- };
-
- for (AtomicReaderContext atomic : this.reader().leaves()) {
- HashMap<String, FixedBitSet> termVector = new HashMap<>(20);
-
- FixedBitSet docvec = kc.bits(atomic);
- if (docvec != null) {
- docNumber += docvec.cardinality();
- };
-
- Terms terms = atomic.reader().fields().terms(field);
-
- if (terms == null) {
- continue;
- };
-
- int docLength = atomic.reader().maxDoc();
- FixedBitSet bitset = new FixedBitSet(docLength);
-
- // Iterate over all tokens in this field
- TermsEnum termsEnum = terms.iterator(null);
-
- while (termsEnum.next() != null) {
-
- String termString = termsEnum.term().utf8ToString();
-
- bitset.clear(0, docLength);
-
- // Get frequency
- bitset.or((DocIdSetIterator) termsEnum.docs((Bits) docvec,
- null));
-
- long value = 0;
- if (map.containsKey(termString))
- value = map.get(termString);
-
- map.put(termString, value + bitset.cardinality());
-
- termVector.put(termString, bitset.clone());
- };
-
- int keySize = termVector.size();
- String[] keys = termVector.keySet()
- .toArray(new String[keySize]);
- java.util.Arrays.sort(keys);
-
- if (keySize > maxTermRelations) {
- throw new Exception("termRelations are limited to "
- + maxTermRelations + " sets"
- + " (requested were at least " + keySize + " sets)");
- };
-
- for (int i = 0; i < keySize; i++) {
- for (int j = i + 1; j < keySize; j++) {
- FixedBitSet comby = termVector.get(keys[i]).clone();
- comby.and(termVector.get(keys[j]));
-
- StringBuilder sb = new StringBuilder();
- sb.append("#__").append(keys[i]).append(":###:")
- .append(keys[j]);
- String combString = sb.toString();
-
- long cap = (long) comby.cardinality();
- if (map.containsKey(combString)) {
- cap += map.get(combString);
- };
- map.put(combString, cap);
- };
- };
- };
- map.put("-docs", checkNumber != 0 ? checkNumber : docNumber);
- }
- catch (IOException e) {
- log.warn(e.getMessage());
- };
- return map;
- };
-
-
/**
* Search in the index.
*/
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
index 809165f..5bd91a7 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
@@ -1,128 +1,231 @@
package de.ids_mannheim.korap.collection;
-import de.ids_mannheim.korap.collection.BooleanFilter;
-import de.ids_mannheim.korap.collection.RegexFilter;
-import de.ids_mannheim.korap.util.QueryException;
+import java.util.*;
+import java.io.IOException;
+// TEMPORARY:
+import org.apache.lucene.queries.BooleanFilter;
+import org.apache.lucene.search.BooleanClause;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.TermsFilter;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.NumericRangeFilter;
import de.ids_mannheim.korap.util.KrillDate;
-import org.apache.lucene.search.Query;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.JsonNode;
-
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-/**
- * CollectionBuilder implements a simple API for creating queries
- * constituing Virtual Collections.
- *
- * <strong>Warning</strong>: The API is likely to change.
- *
- * @author diewald
- */
-/*
- * Todo: WildCardFilter!
- * Todo: Support delete boolean etc.
- * Todo: Supports foundries
- */
+import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.collection.BooleanGroupFilter;
+
public class CollectionBuilder {
- private BooleanFilter filter;
- private String field = "tokens";
// Logger
- private final static Logger log = LoggerFactory
- .getLogger(CollectionBuilder.class);
+ private final static Logger log = LoggerFactory.getLogger(KrillCollection.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
-
- /**
- * Construct a new CollectionBuilder object.
- */
- public CollectionBuilder () {
- filter = new BooleanFilter();
+ public CollectionBuilderInterface term (String field, String term) {
+ return new CollectionBuilderTerm(field, term);
};
-
- public BooleanFilter and (String type, String ... terms) {
- BooleanFilter bf = new BooleanFilter();
- bf.and(type, terms);
- return bf;
+ public CollectionBuilderInterface re (String field, String term) {
+ return new CollectionBuilderTerm(field, term, true);
};
+ public CollectionBuilderInterface since (String field, String date) {
+ int since = new KrillDate(date).floor();
- public BooleanFilter or (String type, String ... terms) {
- BooleanFilter bf = new BooleanFilter();
- bf.or(type, terms);
- return bf;
+ if (since == 0 || since == KrillDate.BEGINNING)
+ return null;
+
+ return new CollectionBuilderRange(field, since, KrillDate.END);
};
+ public CollectionBuilderInterface till (String field, String date) {
+ try {
+ int till = new KrillDate(date).ceil();
+ if (till == 0 || till == KrillDate.END)
+ return null;
- public BooleanFilter and (String type, RegexFilter re) {
- BooleanFilter bf = new BooleanFilter();
- bf.and(type, re);
- return bf;
+ return new CollectionBuilderRange(field, KrillDate.BEGINNING, till);
+ }
+ catch (NumberFormatException e) {
+ log.warn("Parameter of till(date) is invalid");
+ };
+ return null;
};
+ public CollectionBuilderInterface date (String field, String date) {
+ KrillDate dateDF = new KrillDate(date);
- public BooleanFilter or (String type, RegexFilter re) {
- BooleanFilter bf = new BooleanFilter();
- bf.or(type, re);
- return bf;
+ if (dateDF.year == 0)
+ return null;
+
+ if (dateDF.day == 0 || dateDF.month == 0) {
+ int begin = dateDF.floor();
+ int end = dateDF.ceil();
+
+ if (end == 0
+ || (begin == KrillDate.BEGINNING && end == KrillDate.END))
+ return null;
+
+ return new CollectionBuilderRange(field, begin, end);
+ };
+
+ return new CollectionBuilderRange(field, dateDF.floor(), dateDF.ceil());
};
-
- public BooleanFilter since (String date) {
- BooleanFilter bf = new BooleanFilter();
- bf.since(date);
- return bf;
+ public CollectionBuilderGroup andGroup () {
+ return new CollectionBuilderGroup(false);
};
-
- public BooleanFilter till (String date) {
- BooleanFilter bf = new BooleanFilter();
- bf.till(date);
- return bf;
+ public CollectionBuilderGroup orGroup () {
+ return new CollectionBuilderGroup(true);
};
-
- public BooleanFilter date (String date) {
- BooleanFilter bf = new BooleanFilter();
- bf.date(date);
- return bf;
+ public interface CollectionBuilderInterface {
+ public String toString ();
+ public Filter toFilter ();
+ public boolean isNegative ();
+ public CollectionBuilderInterface not ();
};
+ public class CollectionBuilderTerm implements CollectionBuilderInterface {
+ private boolean isNegative = false;
+ private boolean regex = false;
+ private String field;
+ private String term;
- public BooleanFilter between (String date1, String date2) {
- BooleanFilter bf = new BooleanFilter();
- bf.between(date1, date2);
- return bf;
+ public CollectionBuilderTerm (String field, String term) {
+ this.field = field;
+ this.term = term;
+ };
+
+ public CollectionBuilderTerm (String field, String term, boolean regex) {
+ this.field = field;
+ this.term = term;
+ this.regex = regex;
+ };
+
+ public Filter toFilter () {
+ // Regular expression
+ if (this.regex)
+ return new QueryWrapperFilter(
+ new RegexpQuery(new Term(this.field, this.term))
+ );
+
+ // Simple term
+ return new TermsFilter(new Term(this.field, this.term));
+ };
+
+ public String toString () {
+ return this.toFilter().toString();
+ };
+
+ public boolean isNegative () {
+ return this.isNegative;
+ };
+
+
+ public CollectionBuilderInterface not () {
+ this.isNegative = true;
+ return this;
+ };
};
+ public class CollectionBuilderGroup implements CollectionBuilderInterface {
+ private boolean isOptional = false;
+ private boolean isNegative = true;
- public RegexFilter re (String regex) {
- return new RegexFilter(regex);
+ public boolean isNegative () {
+ return this.isNegative;
+ };
+
+ public boolean isOptional () {
+ return this.isOptional;
+ };
+
+ private ArrayList<CollectionBuilderInterface> operands;
+
+ public CollectionBuilderGroup (boolean optional) {
+ this.isOptional = optional;
+ this.operands = new ArrayList<CollectionBuilderInterface>(3);
+ };
+
+ public CollectionBuilderGroup with (CollectionBuilderInterface cb) {
+ if (!cb.isNegative())
+ this.isNegative = false;
+ this.operands.add(cb);
+ return this;
+ };
+
+
+ public Filter toFilter () {
+ if (this.operands == null || this.operands.isEmpty())
+ return null;
+
+ if (this.operands.size() == 1)
+ return this.operands.get(0).toFilter();
+
+ // BooleanFilter bool = new BooleanFilter();
+ BooleanGroupFilter bool = new BooleanGroupFilter(this.isOptional);
+
+ Iterator<CollectionBuilderInterface> i = this.operands.iterator();
+ while (i.hasNext()) {
+ CollectionBuilderInterface cb = i.next();
+ if (cb.isNegative()) {
+ bool.without(cb.toFilter());
+ }
+ else {
+ bool.with(cb.toFilter());
+ };
+ };
+
+ return bool;
+ };
+
+ public String toString () {
+ return this.toFilter().toString();
+ };
+
+ public CollectionBuilderInterface not () {
+ this.isNegative = true;
+ return this;
+ };
};
+ public class CollectionBuilderRange implements CollectionBuilderInterface {
+ private boolean isNegative = false;
+ private String field;
+ private int start, end;
- public BooleanFilter getBooleanFilter () {
- return this.filter;
- };
+ public CollectionBuilderRange (String field, int start, int end) {
+ this.field = field;
+ this.start = start;
+ this.end = end;
+ };
+ public boolean isNegative () {
+ return this.isNegative;
+ };
- public void setBooleanFilter (BooleanFilter bf) {
- this.filter = bf;
- };
+ public String toString () {
+ return this.toFilter().toString();
+ };
+ public Filter toFilter () {
+ return NumericRangeFilter.newIntRange(this.field,
+ this.start,
+ this.end,
+ true,
+ true);
+ };
- public Query toQuery () {
- return this.filter.toQuery();
- };
-
-
- public String toString () {
- return this.filter.toQuery().toString();
+ public CollectionBuilderInterface not () {
+ this.isNegative = true;
+ return this;
+ };
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderLegacy.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderLegacy.java
new file mode 100644
index 0000000..37fa50f
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderLegacy.java
@@ -0,0 +1,128 @@
+package de.ids_mannheim.korap.collection;
+
+import de.ids_mannheim.korap.collection.BooleanFilter;
+import de.ids_mannheim.korap.collection.RegexFilter;
+import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.util.KrillDate;
+
+import org.apache.lucene.search.Query;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * CollectionBuilderLegacy implements a simple API for creating queries
+ * constituing Virtual Collections.
+ *
+ * <strong>Warning</strong>: The API is likely to change.
+ *
+ * @author diewald
+ */
+/*
+ * Todo: WildCardFilter!
+ * Todo: Support delete boolean etc.
+ * Todo: Supports foundries
+ */
+public class CollectionBuilderLegacy {
+ private BooleanFilter filter;
+ private String field = "tokens";
+
+ // Logger
+ private final static Logger log = LoggerFactory
+ .getLogger(CollectionBuilder.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+
+ /**
+ * Construct a new CollectionBuilderLegacy object.
+ */
+ public CollectionBuilderLegacy () {
+ filter = new BooleanFilter();
+ };
+
+
+ public BooleanFilter and (String type, String ... terms) {
+ BooleanFilter bf = new BooleanFilter();
+ bf.and(type, terms);
+ return bf;
+ };
+
+
+ public BooleanFilter or (String type, String ... terms) {
+ BooleanFilter bf = new BooleanFilter();
+ bf.or(type, terms);
+ return bf;
+ };
+
+
+ public BooleanFilter and (String type, RegexFilter re) {
+ BooleanFilter bf = new BooleanFilter();
+ bf.and(type, re);
+ return bf;
+ };
+
+
+ public BooleanFilter or (String type, RegexFilter re) {
+ BooleanFilter bf = new BooleanFilter();
+ bf.or(type, re);
+ return bf;
+ };
+
+
+ public BooleanFilter since (String date) {
+ BooleanFilter bf = new BooleanFilter();
+ bf.since(date);
+ return bf;
+ };
+
+
+ public BooleanFilter till (String date) {
+ BooleanFilter bf = new BooleanFilter();
+ bf.till(date);
+ return bf;
+ };
+
+
+ public BooleanFilter date (String date) {
+ BooleanFilter bf = new BooleanFilter();
+ bf.date(date);
+ return bf;
+ };
+
+
+ public BooleanFilter between (String date1, String date2) {
+ BooleanFilter bf = new BooleanFilter();
+ bf.between(date1, date2);
+ return bf;
+ };
+
+
+ public RegexFilter re (String regex) {
+ return new RegexFilter(regex);
+ };
+
+
+ public BooleanFilter getBooleanFilter () {
+ return this.filter;
+ };
+
+
+ public void setBooleanFilter (BooleanFilter bf) {
+ this.filter = bf;
+ };
+
+
+ public Query toQuery () {
+ return this.filter.toQuery();
+ };
+
+
+ public String toString () {
+ return this.filter.toQuery().toString();
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
deleted file mode 100644
index 5bb9968..0000000
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
+++ /dev/null
@@ -1,232 +0,0 @@
-package de.ids_mannheim.korap.collection;
-
-import java.util.*;
-import java.io.IOException;
-// TEMPORARY:
-import org.apache.lucene.queries.BooleanFilter;
-import org.apache.lucene.search.BooleanClause;
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queries.TermsFilter;
-import org.apache.lucene.search.*;
-import org.apache.lucene.search.NumericRangeFilter;
-import de.ids_mannheim.korap.util.KrillDate;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import de.ids_mannheim.korap.KrillCollection;
-import de.ids_mannheim.korap.collection.BooleanGroupFilter;
-
-public class CollectionBuilderNew {
-
- // Logger
- private final static Logger log = LoggerFactory.getLogger(KrillCollection.class);
-
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
- public CollectionBuilderInterface term (String field, String term) {
- return new CollectionBuilderTerm(field, term);
- };
-
- public CollectionBuilderInterface re (String field, String term) {
- return new CollectionBuilderTerm(field, term, true);
- };
-
- public CollectionBuilderInterface since (String field, String date) {
- int since = new KrillDate(date).floor();
-
- if (since == 0 || since == KrillDate.BEGINNING)
- return null;
-
- return new CollectionBuilderRange(field, since, KrillDate.END);
- };
-
- public CollectionBuilderInterface till (String field, String date) {
- try {
- int till = new KrillDate(date).ceil();
- if (till == 0 || till == KrillDate.END)
- return null;
-
- return new CollectionBuilderRange(field, KrillDate.BEGINNING, till);
- }
- catch (NumberFormatException e) {
- log.warn("Parameter of till(date) is invalid");
- };
- return null;
- };
-
- public CollectionBuilderInterface date (String field, String date) {
- KrillDate dateDF = new KrillDate(date);
-
- if (dateDF.year == 0)
- return null;
-
- if (dateDF.day == 0 || dateDF.month == 0) {
- int begin = dateDF.floor();
- int end = dateDF.ceil();
-
- if (end == 0
- || (begin == KrillDate.BEGINNING && end == KrillDate.END))
- return null;
-
- return new CollectionBuilderRange(field, begin, end);
- };
-
- return new CollectionBuilderRange(field, dateDF.floor(), dateDF.ceil());
- };
-
-
- public CollectionBuilderGroup andGroup (CollectionBuilderInterface cb) {
- return new CollectionBuilderGroup(false).with(cb);
- };
-
- public CollectionBuilderGroup orGroup (CollectionBuilderInterface cb) {
- return new CollectionBuilderGroup(true).with(cb);
- };
-
- public interface CollectionBuilderInterface {
- public String toString ();
- public Filter toFilter ();
- public boolean isNegative ();
- public CollectionBuilderInterface not ();
- };
-
- public class CollectionBuilderTerm implements CollectionBuilderInterface {
- private boolean isNegative = false;
- private boolean regex = false;
- private String field;
- private String term;
-
- public CollectionBuilderTerm (String field, String term) {
- this.field = field;
- this.term = term;
- };
-
- public CollectionBuilderTerm (String field, String term, boolean regex) {
- this.field = field;
- this.term = term;
- this.regex = regex;
- };
-
- public Filter toFilter () {
- // Regular expression
- if (this.regex)
- return new QueryWrapperFilter(
- new RegexpQuery(new Term(this.field, this.term))
- );
-
- // Simple term
- return new TermsFilter(new Term(this.field, this.term));
- };
-
- public String toString () {
- return this.toFilter().toString();
- };
-
- public boolean isNegative () {
- return this.isNegative;
- };
-
-
- public CollectionBuilderInterface not () {
- this.isNegative = true;
- return this;
- };
- };
-
- public class CollectionBuilderGroup implements CollectionBuilderInterface {
- private boolean isOptional = false;
- private boolean isNegative = true;
-
- public boolean isNegative () {
- return this.isNegative;
- };
-
- public boolean isOptional () {
- return this.isOptional;
- };
-
- private ArrayList<CollectionBuilderInterface> operands;
-
- public CollectionBuilderGroup (boolean optional) {
- this.isOptional = optional;
- this.operands = new ArrayList<CollectionBuilderInterface>(3);
- };
-
- public CollectionBuilderGroup with (CollectionBuilderInterface cb) {
- if (!cb.isNegative())
- this.isNegative = false;
- this.operands.add(cb);
- return this;
- };
-
-
- public Filter toFilter () {
- if (this.operands == null || this.operands.isEmpty())
- return null;
-
- if (this.operands.size() == 1)
- return this.operands.get(0).toFilter();
-
- // BooleanFilter bool = new BooleanFilter();
- BooleanGroupFilter bool = new BooleanGroupFilter(this.isOptional);
-
- Iterator<CollectionBuilderInterface> i = this.operands.iterator();
- while (i.hasNext()) {
- CollectionBuilderInterface cb = i.next();
- if (cb.isNegative()) {
- bool.without(cb.toFilter());
- }
- else {
- bool.with(cb.toFilter());
- };
- };
-
- return bool;
- };
-
- public String toString () {
- return this.toFilter().toString();
- };
-
- public CollectionBuilderInterface not () {
- this.isNegative = true;
- return this;
- };
- };
-
- public class CollectionBuilderRange implements CollectionBuilderInterface {
- private boolean isNegative = false;
- private String field;
- private int start, end;
-
- public CollectionBuilderRange (String field, int start, int end) {
- this.field = field;
- this.start = start;
- this.end = end;
- };
-
- public boolean isNegative () {
- return this.isNegative;
- };
-
- public String toString () {
- return this.toFilter().toString();
- };
-
- public Filter toFilter () {
- return NumericRangeFilter.newIntRange(this.field,
- this.start,
- this.end,
- true,
- true);
- };
-
- public CollectionBuilderInterface not () {
- this.isNegative = true;
- return this;
- };
- };
-};
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java b/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilderLegacy.java
similarity index 91%
rename from src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
rename to src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilderLegacy.java
index 417de65..b8f0019 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilderLegacy.java
@@ -13,14 +13,14 @@
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
-import de.ids_mannheim.korap.collection.CollectionBuilder;
+import de.ids_mannheim.korap.collection.CollectionBuilderLegacy;
@RunWith(JUnit4.class)
-public class TestCollectionBuilder {
+public class TestCollectionBuilderLegacy {
@Test
public void filterExample () throws IOException {
- CollectionBuilder kf = new CollectionBuilder();
+ CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
assertEquals("+textClass:tree", kf.and("textClass", "tree").toString());
assertEquals("+textClass:tree +textClass:sport",
@@ -40,7 +40,7 @@
@Test
public void rangeExample () throws IOException {
- CollectionBuilder kf = new CollectionBuilder();
+ CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
assertEquals("+pubDate:[20030604 TO 20030899]",
kf.between("2003-06-04", "2003-08-99").toString());
assertEquals("+pubDate:[0 TO 20030604]", kf.till("2003-06-04")
@@ -53,7 +53,7 @@
@Test
public void rangeLimited () throws IOException {
- CollectionBuilder kf = new CollectionBuilder();
+ CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
assertEquals("+pubDate:[20050000 TO 20099999]",
kf.between("2005", "2009").toString());
assertEquals("+pubDate:[20051000 TO 20090899]",
@@ -97,7 +97,7 @@
@Test
public void rangeFailure () throws IOException {
- CollectionBuilder kf = new CollectionBuilder();
+ CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
assertEquals("", kf.between("aaaa-bb-cc", "aaaabbcc").toString());
assertEquals("", kf.till("aaaa-bb-cc").toString());
assertEquals("", kf.since("aaaa-bb-cc").toString());
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index 7cd11f0..a41bc59 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -2,8 +2,8 @@
import java.io.IOException;
import de.ids_mannheim.korap.KrillIndex;
-import de.ids_mannheim.korap.KrillCollectionNew;
-import de.ids_mannheim.korap.collection.CollectionBuilderNew;
+import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.collection.CollectionBuilder;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.index.TextAnalyzer;
@@ -28,8 +28,8 @@
ki.addDoc(createDoc2());
ki.addDoc(createDoc3());
ki.commit();
- CollectionBuilderNew cb = new CollectionBuilderNew();
- KrillCollectionNew kcn = new KrillCollectionNew(ki);
+ CollectionBuilder cb = new CollectionBuilder();
+ KrillCollection kcn = new KrillCollection(ki);
// Simple string tests
kcn.fromBuilder(cb.term("author", "Frank"));
@@ -54,47 +54,47 @@
assertEquals(1, kcn.docCount());
// Simple orGroup tests
- kcn.fromBuilder(cb.orGroup(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
+ kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
assertEquals(1, kcn.docCount());
- kcn.fromBuilder(cb.orGroup(cb.term("author", "Frank")).with(cb.term("author", "Sebastian")));
+ kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Sebastian")));
assertEquals(2, kcn.docCount());
- kcn.fromBuilder(cb.orGroup(cb.term("author", "Frank"))
+ kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank"))
.with(cb.term("author", "Sebastian"))
.with(cb.term("author", "Peter")));
assertEquals(3, kcn.docCount());
- kcn.fromBuilder(cb.orGroup(cb.term("author", "Huhu"))
+ kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Huhu"))
.with(cb.term("author", "Haha"))
.with(cb.term("author", "Hehe")));
assertEquals(0, kcn.docCount());
// Multi field orGroup tests
- kcn.fromBuilder(cb.orGroup(cb.term("ID", "doc-1")).with(cb.term("author", "Peter")));
+ kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Peter")));
assertEquals(2, kcn.docCount());
- kcn.fromBuilder(cb.orGroup(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
+ kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
assertEquals(1, kcn.docCount());
- kcn.fromBuilder(cb.orGroup(cb.term("ID", "doc-1")).with(cb.term("author", "Michael")));
+ kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Michael")));
assertEquals(1, kcn.docCount());
// Simple andGroup tests
- kcn.fromBuilder(cb.andGroup(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
+ kcn.fromBuilder(cb.andGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
assertEquals(0, kcn.docCount());
- kcn.fromBuilder(cb.andGroup(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
+ kcn.fromBuilder(cb.andGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
assertEquals(1, kcn.docCount());
// andGroup in keyword field test
- kcn.fromBuilder(cb.andGroup(cb.term("textClass", "reisen")).with(cb.term("textClass", "finanzen")));
+ kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "finanzen")));
assertEquals(1, kcn.docCount());
- kcn.fromBuilder(cb.andGroup(cb.term("textClass", "reisen")).with(cb.term("textClass", "kultur")));
+ kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "kultur")));
assertEquals(2, kcn.docCount());
- kcn.fromBuilder(cb.andGroup(cb.term("textClass", "finanzen")).with(cb.term("textClass", "kultur")));
+ kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "finanzen")).with(cb.term("textClass", "kultur")));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.term("text", "mann"));
@@ -111,8 +111,8 @@
ki.addDoc(createDoc2());
ki.addDoc(createDoc3());
ki.commit();
- CollectionBuilderNew cb = new CollectionBuilderNew();
- KrillCollectionNew kcn = new KrillCollectionNew(ki);
+ CollectionBuilder cb = new CollectionBuilder();
+ KrillCollection kcn = new KrillCollection(ki);
// Simple negation tests
kcn.fromBuilder(cb.term("author", "Frank").not());
@@ -126,12 +126,12 @@
// orGroup with simple Negation
kcn.fromBuilder(
- cb.orGroup(cb.term("textClass", "kultur").not()).with(cb.term("author", "Peter"))
+ cb.orGroup().with(cb.term("textClass", "kultur").not()).with(cb.term("author", "Peter"))
);
assertEquals(2, kcn.docCount());
kcn.fromBuilder(
- cb.orGroup(cb.term("textClass", "kultur").not()).with(cb.term("author", "Sebastian"))
+ cb.orGroup().with(cb.term("textClass", "kultur").not()).with(cb.term("author", "Sebastian"))
);
assertEquals(1, kcn.docCount());
@@ -143,8 +143,8 @@
ki.addDoc(createDoc1());
ki.addDoc(createDoc2());
ki.commit();
- CollectionBuilderNew cb = new CollectionBuilderNew();
- KrillCollectionNew kcn = new KrillCollectionNew(ki);
+ CollectionBuilder cb = new CollectionBuilder();
+ KrillCollection kcn = new KrillCollection(ki);
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
@@ -231,8 +231,8 @@
ki.addDoc(createDoc2());
ki.addDoc(createDoc3());
ki.commit();
- CollectionBuilderNew cb = new CollectionBuilderNew();
- KrillCollectionNew kcn = new KrillCollectionNew(ki);
+ CollectionBuilder cb = new CollectionBuilder();
+ KrillCollection kcn = new KrillCollection(ki);
kcn.fromBuilder(cb.date("pubDate", "2005"));
assertEquals(3, kcn.docCount());
@@ -294,8 +294,8 @@
ki.addDoc(createDoc3());
ki.commit();
- CollectionBuilderNew cb = new CollectionBuilderNew();
- KrillCollectionNew kcn = new KrillCollectionNew(ki);
+ CollectionBuilder cb = new CollectionBuilder();
+ KrillCollection kcn = new KrillCollection(ki);
kcn.fromBuilder(cb.re("author", "Fran.*"));
assertEquals(1, kcn.docCount());
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionJSON.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionJSON.java
index cbc60bc..46e59cd 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionJSON.java
@@ -30,7 +30,7 @@
String metaQuery = _getJSONString("collection_1.jsonld");
KrillCollection kc = new KrillCollection(metaQuery);
assertEquals(kc.toString(),
- "filter with QueryWrapperFilter(+pubDate:20000101); ");
+ "pubDate:[20000101 TO 20000101]");
};
@@ -39,8 +39,7 @@
String metaQuery = _getJSONString("collection_2.jsonld");
KrillCollection kc = new KrillCollection(metaQuery);
assertEquals(kc.toString(),
- "filter with QueryWrapperFilter(+(+pubDate:"
- + "[19900000 TO 99999999] +pubDate:[0 TO 20061099])); ");
+ "AndGroup(pubDate:[19900000 TO 99999999] pubDate:[0 TO 20061099])");
};
@@ -56,8 +55,7 @@
public void collection5 () {
String metaQuery = _getJSONString("collection_5.jsonld");
KrillCollection kc = new KrillCollection(metaQuery);
- assertEquals(kc.toString(), "filter with QueryWrapperFilter(+(pubDate:"
- + "[19900000 TO 99999999] title:Mannheim)); ");
+ assertEquals(kc.toString(), "OrGroup(pubDate:[19900000 TO 99999999] title:Mannheim)");
};
@@ -68,7 +66,7 @@
assertFalse(ks.hasErrors());
assertFalse(ks.hasWarnings());
assertFalse(ks.hasMessages());
- assertEquals("filter with QueryWrapperFilter(+author:/Goethe/); ", ks
+ assertEquals("author:/Goethe/", ks
.getCollection().toString());
};
@@ -80,7 +78,7 @@
assertFalse(ks.hasErrors());
assertFalse(ks.hasWarnings());
assertFalse(ks.hasMessages());
- assertEquals("filter with QueryWrapperFilter(-author:/Goethe/); ", ks
+ assertEquals("-author:/Goethe/", ks
.getCollection().toString());
};
@@ -92,12 +90,12 @@
assertFalse(ks.hasErrors());
assertFalse(ks.hasWarnings());
assertFalse(ks.hasMessages());
- assertEquals("filter with QueryWrapperFilter(-author:Goethe); ", ks
+ assertEquals("-author:Goethe", ks
.getCollection().toString());
};
- @Ignore
+ @Test
public void nocollectiontypegiven () {
String metaQuery = _getJSONString("multiterm_rewrite_collection.jsonld");
KrillCollection kc = new KrillCollection(metaQuery);
@@ -109,8 +107,8 @@
public void noCollection () {
String metaQuery = _getJSONString("no_collection.jsonld");
KrillCollection kc = new KrillCollection(metaQuery);
- assertEquals("filter with QueryWrapperFilter(+corpusID:WPD); ",
- kc.toString());
+ assertTrue(kc.hasErrors());
+ assertEquals("", kc.toString());
};
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionJSONLegacy.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionJSONLegacy.java
index f83804f..2c668c3 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionJSONLegacy.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionJSONLegacy.java
@@ -3,7 +3,7 @@
import java.util.*;
import java.io.*;
-import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.KrillCollectionLegacy;
import static de.ids_mannheim.korap.TestSimple.*;
@@ -20,7 +20,7 @@
public void metaQuery1 () {
String metaQuery = getString(getClass().getResource(
"/queries/metaquery.jsonld").getFile());
- KrillCollection kc = new KrillCollection(metaQuery);
+ KrillCollectionLegacy kc = new KrillCollectionLegacy(metaQuery);
assertEquals("filter with QueryWrapperFilter(+textClass:wissenschaft)",
kc.getFilter(0).toString());
@@ -38,7 +38,7 @@
public void metaQuery2 () {
String metaQuery = getString(getClass().getResource(
"/queries/metaquery2.jsonld").getFile());
- KrillCollection kc = new KrillCollection(metaQuery);
+ KrillCollectionLegacy kc = new KrillCollectionLegacy(metaQuery);
assertEquals(1, kc.getCount());
assertEquals(
"filter with QueryWrapperFilter(+(+author:Hesse +pubDate:[0 TO 20131205]))",
@@ -50,7 +50,7 @@
public void metaQuery3 () {
String metaQuery = getString(getClass().getResource(
"/queries/metaquery4.jsonld").getFile());
- KrillCollection kc = new KrillCollection(metaQuery);
+ KrillCollectionLegacy kc = new KrillCollectionLegacy(metaQuery);
assertEquals(1, kc.getCount());
assertEquals(
"filter with QueryWrapperFilter(+pubDate:[20000101 TO 20131231])",
@@ -62,7 +62,7 @@
public void metaQuery7 () {
String metaQuery = getString(getClass().getResource(
"/queries/metaquery7.jsonld").getFile());
- KrillCollection kc = new KrillCollection(metaQuery);
+ KrillCollectionLegacy kc = new KrillCollectionLegacy(metaQuery);
assertEquals(2, kc.getCount());
assertEquals(
"filter with QueryWrapperFilter(+(corpusID:c-1 corpusID:c-2))",
@@ -77,7 +77,7 @@
public void metaQuery9 () {
String metaQuery = getString(getClass().getResource(
"/queries/metaquery9.jsonld").getFile());
- KrillCollection kc = new KrillCollection(metaQuery);
+ KrillCollectionLegacy kc = new KrillCollectionLegacy(metaQuery);
assertEquals(1, kc.getCount());
assertEquals("filter with QueryWrapperFilter(+corpusID:WPD)", kc
.getFilter(0).toString());
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java
index 8341c1c..287ceba 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java
@@ -4,7 +4,7 @@
import de.ids_mannheim.korap.KrillIndex;
import de.ids_mannheim.korap.index.FieldDocument;
-import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.KrillCollectionLegacy;
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.KrillQuery;
import de.ids_mannheim.korap.query.QueryBuilder;
@@ -38,10 +38,10 @@
};
ki.commit();
- CollectionBuilder kf = new CollectionBuilder();
+ CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
// Create Virtual collections:
- KrillCollection kc = new KrillCollection(ki);
+ KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
assertEquals("Documents", 7, kc.numberOf("documents"));
@@ -107,10 +107,10 @@
ki.commit();
};
- CollectionBuilder kf = new CollectionBuilder();
+ CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
// Create Virtual collections:
- KrillCollection kc = new KrillCollection(ki);
+ KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
assertEquals("Documents", 7, kc.numberOf("documents"));
@@ -184,10 +184,10 @@
ki.commit();
- CollectionBuilder kf = new CollectionBuilder();
+ CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
// Create Virtual collections:
- KrillCollection kc = new KrillCollection(ki);
+ KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
kc.filter(kf.and("textClass", "reisen").and("textClass",
"freizeit-unterhaltung"));
assertEquals("Documents", 5, kc.numberOf("documents"));
@@ -237,7 +237,7 @@
assertEquals(86, kr.getTotalResults());
// Create Virtual collections:
- KrillCollection kc = new KrillCollection();
+ KrillCollectionLegacy kc = new KrillCollectionLegacy();
kc.filterUIDs(new String[] { "2", "3", "4" });
kc.setIndex(ki);
assertEquals("Documents", 3, kc.numberOf("documents"));
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java
index 8408176..52dc59d 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java
@@ -3,7 +3,7 @@
import java.util.*;
import java.io.*;
-import de.ids_mannheim.korap.collection.CollectionBuilderNew;
+import de.ids_mannheim.korap.collection.CollectionBuilder;
import static org.junit.Assert.*;
import org.junit.Test;
@@ -17,42 +17,42 @@
@Test
public void builderTerm () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("author:tree",
kc.term("author", "tree").toString());
};
@Test
public void builderRegex () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("QueryWrapperFilter(author:/tre*?/)",
kc.re("author", "tre*?").toString());
};
@Test
public void builderDateYear () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("pubDate:[20050000 TO 20059999]",
kc.date("pubDate", "2005").toString());
};
@Test
public void builderDateMonth () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("pubDate:[20051000 TO 20051099]",
kc.date("pubDate", "2005-10").toString());
};
@Test
public void builderDateDay () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("pubDate:[20051011 TO 20051011]",
kc.date("pubDate", "2005-10-11").toString());
};
@Test
public void builderDateBorders () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
// CollectionBuilderNew.CollectionBuilderInterface kbi = ;
assertNull(kc.date("pubDate", ""));
@@ -66,7 +66,7 @@
@Test
public void builderSince () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("pubDate:[20050000 TO 99999999]",
kc.since("pubDate", "2005").toString());
@@ -80,7 +80,7 @@
@Test
public void builderTill () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("pubDate:[0 TO 20059999]",
kc.till("pubDate", "2005").toString());
@@ -94,71 +94,71 @@
@Test
public void builderAndSimple () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
- assertEquals("author:tree", kc.andGroup(kc.term("author", "tree")).toString());
+ CollectionBuilder kc = new CollectionBuilder();
+ assertEquals("author:tree", kc.andGroup().with(kc.term("author", "tree")).toString());
};
@Test
public void builderOrSimple () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
- assertEquals("author:tree", kc.orGroup(kc.term("author", "tree")).toString());
+ CollectionBuilder kc = new CollectionBuilder();
+ assertEquals("author:tree", kc.orGroup().with(kc.term("author", "tree")).toString());
};
@Test
public void builderAndCombined () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("AndGroup(author:tree title:name)",
- kc.andGroup(kc.term("author", "tree"))
+ kc.andGroup().with(kc.term("author", "tree"))
.with(kc.term("title", "name")).toString());
};
@Test
public void builderAndNestedSimple () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("AndGroup(author:tree title:name)",
- kc.andGroup(kc.andGroup(kc.term("author", "tree")).with(kc.term("title", "name"))).toString());
+ kc.andGroup().with(kc.andGroup().with(kc.term("author", "tree")).with(kc.term("title", "name"))).toString());
};
@Test
public void builderOrCombined () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("OrGroup(author:tree title:name)",
- kc.orGroup(kc.term("author", "tree"))
+ kc.orGroup().with(kc.term("author", "tree"))
.with(kc.term("title", "name")).toString());
};
@Test
public void builderOrNestedSimple () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
+ CollectionBuilder kc = new CollectionBuilder();
assertEquals("OrGroup(author:tree title:name)",
- kc.orGroup(kc.orGroup(kc.term("author", "tree"))
+ kc.orGroup().with(kc.orGroup().with(kc.term("author", "tree"))
.with(kc.term("title", "name"))).toString()
);
};
@Test
public void builderGroups () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
- String g = kc.orGroup(
- kc.orGroup(kc.term("author", "tree1")).with(kc.term("title", "name1"))
+ CollectionBuilder kc = new CollectionBuilder();
+ String g = kc.orGroup().with(
+ kc.orGroup().with(kc.term("author", "tree1")).with(kc.term("title", "name1"))
).with(
- kc.andGroup(kc.term("author", "tree2")).with(kc.term("title", "name2"))
+ kc.andGroup().with(kc.term("author", "tree2")).with(kc.term("title", "name2"))
).toString();
assertEquals("OrGroup(OrGroup(author:tree1 title:name1) AndGroup(author:tree2 title:name2))", g);
};
@Test
public void builderNegationRoot () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
- CollectionBuilderNew.CollectionBuilderInterface kbi = kc.orGroup(kc.term("author", "tree1")).with(kc.term("title", "name1"));
+ CollectionBuilder kc = new CollectionBuilder();
+ CollectionBuilder.CollectionBuilderInterface kbi = kc.orGroup().with(kc.term("author", "tree1")).with(kc.term("title", "name1"));
assertEquals(
"OrGroup(author:tree1 title:name1)",
kbi.toString());
assertFalse(kbi.isNegative());
- kbi = kc.andGroup(
- kc.orGroup(kc.term("author", "tree1")).with(kc.term("title", "name1"))
+ kbi = kc.andGroup().with(
+ kc.orGroup().with(kc.term("author", "tree1")).with(kc.term("title", "name1"))
).not();
assertEquals("OrGroup(author:tree1 title:name1)", kbi.toString());
assertTrue(kbi.isNegative());
@@ -167,17 +167,17 @@
@Test
public void builderNegation () throws IOException {
- CollectionBuilderNew kc = new CollectionBuilderNew();
- CollectionBuilderNew.CollectionBuilderInterface kbi =
+ CollectionBuilder kc = new CollectionBuilder();
+ CollectionBuilder.CollectionBuilderInterface kbi =
kc.term("author", "tree").not();
assertEquals("author:tree", kbi.toString());
assertTrue(kbi.isNegative());
- kbi = kc.andGroup(kc.term("author", "tree").not());
+ kbi = kc.andGroup().with(kc.term("author", "tree").not());
assertEquals("author:tree", kbi.toString());
assertTrue(kbi.isNegative());
- kbi = kc.orGroup(kc.term("author", "tree").not());
+ kbi = kc.orGroup().with(kc.term("author", "tree").not());
assertEquals("author:tree", kbi.toString());
assertTrue(kbi.isNegative());
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestWPDIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestWPDIndex.java
index 1b1ef4d..603cbbc 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestWPDIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestWPDIndex.java
@@ -14,6 +14,7 @@
import org.junit.Test;
import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.collection.CollectionBuilder;
import de.ids_mannheim.korap.KrillIndex;
import de.ids_mannheim.korap.response.Match;
import de.ids_mannheim.korap.response.Result;
@@ -167,10 +168,17 @@
//0.8s
// Check if it includes some results
- BooleanFilter bf = new BooleanFilter();
+
+ /*
+BooleanFilter bf = new BooleanFilter();
bf.or("ID", "WPD_BBB.04463", "WPD_III.00758");
+ */
+
KrillCollection kc = new KrillCollection();
- kc.filter(bf);
+ CollectionBuilder cb = new CollectionBuilder();
+ kc.fromBuilder(cb.orGroup().with(cb.term("ID", "WPD_BBB.04463")).with(cb.term("ID", "WPD_III.00758")));
+
+ // kc.filter(bf);
ks.setCollection(kc);
kr = ks.apply(ki);
assertEquals(1094, kr.getMatch(0).getStartPos());
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index 87eeea3..2f23d35 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -10,6 +10,7 @@
import java.util.HashMap;
import org.junit.Test;
+import org.junit.Ignore;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -90,9 +91,9 @@
Krill ks = new Krill(new QueryBuilder("tokens").seg("s:Buchstaben"));
- // Todo: This is not an acceptable collection, but sigh
- ks.getCollection().filter(
- new CollectionBuilder().and("textClass", "reisen"));
+ CollectionBuilder cb = new CollectionBuilder();
+
+ ks.getCollection().fromBuilder(cb.term("textClass", "reisen"));
KrillMeta meta = ks.getMeta();
meta.setCount(3);
@@ -808,7 +809,12 @@
// Index was set but vc restricted to WPD
assertEquals(0, kc.numberOf("documents"));
+ /*
kc.extend(new CollectionBuilder().or("corpusSigle", "BZK"));
+ */
+ CollectionBuilder cb = new CollectionBuilder();
+ kc.fromBuilder(cb.orGroup().with(kc.getBuilder()).with(cb.term("corpusSigle", "BZK")));
+
ks.setCollection(kc);
assertEquals(1, kc.numberOf("documents"));
@@ -894,16 +900,20 @@
assertEquals(0, kr.getStartIndex());
assertEquals(10, kr.getItemsPerPage());
+
json = getString(getClass().getResource(
"/queries/metaquery8-filtered-nested.jsonld").getFile());
ks = new Krill(json);
kr = ks.apply(ki);
+ /*
assertEquals("filter with QueryWrapperFilter("
+ "+(ID:WPD_AAA.00003 (+tokens:s:die"
+ " +tokens:s:Schriftzeichen)))",
ks.getCollection().getFilter(1).toString());
+ */
+ assertEquals("AndGroup(OrGroup(ID:WPD_AAA.00001 ID:WPD_AAA.00002) OrGroup(ID:WPD_AAA.00003 AndGroup(tokens:s:die tokens:s:Schriftzeichen)))", ks.getCollection().toString());
assertEquals(kr.getTotalResults(), 119);
assertEquals(0, kr.getStartIndex());
@@ -1062,6 +1072,7 @@
This test will crash soon - it's just here for nostalgic reasons!
*/
@Test
+ @Ignore
public void getFoundryDistribution () throws Exception {
// Construct index
KrillIndex ki = new KrillIndex();
@@ -1078,16 +1089,19 @@
assertEquals(7, kc.numberOf("documents"));
+ /*
HashMap map = kc.getTermRelation("foundries");
assertEquals((long) 7, map.get("-docs"));
assertEquals((long) 7, map.get("treetagger"));
assertEquals((long) 6, map.get("opennlp/morpho"));
assertEquals((long) 6, map.get("#__opennlp/morpho:###:treetagger"));
assertEquals((long) 7, map.get("#__opennlp:###:treetagger"));
+ */
};
@Test
+ @Ignore
public void getTextClassDistribution () throws Exception {
KrillIndex ki = new KrillIndex();
ki.addDoc("{" + " \"fields\" : ["
@@ -1117,7 +1131,7 @@
KrillCollection kc = new KrillCollection(ki);
assertEquals(3, kc.numberOf("documents"));
-
+ /*
HashMap map = kc.getTermRelation("textClass");
assertEquals((long) 1, map.get("singing"));
assertEquals((long) 1, map.get("jumping"));
@@ -1131,12 +1145,13 @@
assertEquals((long) 1, map.get("#__jumping:###:music"));
assertEquals((long) 1, map.get("#__music:###:singing"));
assertEquals(11, map.size());
-
+ */
// System.err.println(kc.getTermRelationJSON("textClass"));
};
@Test
+ @Ignore
public void getTextClassDistribution2 () throws Exception {
KrillIndex ki = new KrillIndex();
ki.addDoc("{" + " \"fields\" : ["
@@ -1175,7 +1190,7 @@
KrillCollection kc = new KrillCollection(ki);
assertEquals(4, kc.numberOf("documents"));
-
+ /*
HashMap map = kc.getTermRelation("textClass");
assertEquals((long) 1, map.get("singing"));
assertEquals((long) 1, map.get("jumping"));
@@ -1189,5 +1204,6 @@
assertEquals((long) 1, map.get("#__jumping:###:music"));
assertEquals((long) 1, map.get("#__music:###:singing"));
assertEquals(11, map.size());
+ */
};
};
diff --git a/src/test/resources/queries/metaquery8-filtered-further.jsonld b/src/test/resources/queries/metaquery8-filtered-further.jsonld
index 5a497cf..06d3766 100644
--- a/src/test/resources/queries/metaquery8-filtered-further.jsonld
+++ b/src/test/resources/queries/metaquery8-filtered-further.jsonld
@@ -18,6 +18,72 @@
"right":["char",90]
}
},
+ "collection" : {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:or",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00001",
+ "type": "type:string"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00002",
+ "type": "type:string"
+ }
+ ]
+ },
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:or",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00003",
+ "type": "type:string"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00002",
+ "type": "type:string"
+ }
+ ]
+ },
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:or",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00001",
+ "type": "type:string"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00005",
+ "type": "type:string"
+ }
+ ]
+ }
+ ]
+ },
"collections": [
{
"@type": "koral:meta-filter",
diff --git a/src/test/resources/queries/metaquery8-filtered-nested.jsonld b/src/test/resources/queries/metaquery8-filtered-nested.jsonld
index 7c8cf43..fd14b53 100644
--- a/src/test/resources/queries/metaquery8-filtered-nested.jsonld
+++ b/src/test/resources/queries/metaquery8-filtered-nested.jsonld
@@ -1,73 +1,132 @@
{
- "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
- "query": {
- "@type": "koral:token",
- "wrap": {
- "@type": "koral:term",
- "foundry": "mate",
- "layer" : "lemma",
- "key":"der",
- "match": "match:eq"
- }
- },
- "meta":{
- "startPage":1,
- "count": 10,
- "context":{
- "left":["char",90],
- "right":["char",90]
- }
- },
- "collections": [
- {
- "@type": "koral:meta-filter",
- "@id": "korap-filter#id-1223232",
- "@value": {
- "@type": "koral:group",
- "relation": "or",
- "@field": "koral:field#ID",
- "operands": [
- {
- "@type": "koral:term",
- "@value": "WPD_AAA.00001"
- },
- {
- "@type": "koral:term",
- "@value": "WPD_AAA.00002"
- }
- ]
- }
- },
- {
- "@type": "koral:meta-filter",
- "@id": "korap-filter#id-1223232",
- "@value": {
- "@type": "koral:group",
- "relation": "or",
- "@field": "koral:field#ID",
- "operands": [
- {
- "@type": "koral:term",
- "@value": "WPD_AAA.00003"
- },
- {
- "@type": "koral:group",
- "relation": "and",
- "@field": "koral:field#tokens",
- "operands": [
- {
- "@type": "koral:term",
- "@value": "s:die"
- },
- {
- "@type": "koral:term",
- "@value": "s:Schriftzeichen"
- }
-
- ]
- }
- ]
- }
- }
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer" : "lemma",
+ "key":"der",
+ "match": "match:eq"
+ }
+ },
+ "meta":{
+ "startPage":1,
+ "count": 10,
+ "context":{
+ "left":["char",90],
+ "right":["char",90]
+ }
+ },
+ "collection" : {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:or",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00001",
+ "type": "type:string"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00002",
+ "type": "type:string"
+ }
+ ]
+ },
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:or",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00003",
+ "type": "type:string"
+ },
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "tokens",
+ "match": "match:eq",
+ "value": "s:die",
+ "type": "type:string"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "tokens",
+ "match": "match:eq",
+ "value": "s:Schriftzeichen",
+ "type": "type:string"
+ }
+ ]
+ }
+ ]
+ }
]
+ },
+ "collections": [
+ {
+ "@type": "koral:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "koral:group",
+ "relation": "or",
+ "@field": "koral:field#ID",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "@value": "WPD_AAA.00001"
+ },
+ {
+ "@type": "koral:term",
+ "@value": "WPD_AAA.00002"
+ }
+ ]
+ }
+ },
+ {
+ "@type": "koral:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "koral:group",
+ "relation": "or",
+ "@field": "koral:field#ID",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "@value": "WPD_AAA.00003"
+ },
+ {
+ "@type": "koral:group",
+ "relation": "and",
+ "@field": "koral:field#tokens",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "@value": "s:die"
+ },
+ {
+ "@type": "koral:term",
+ "@value": "s:Schriftzeichen"
+ }
+
+ ]
+ }
+ ]
+ }
+ }
+ ]
}
diff --git a/src/test/resources/queries/metaquery8-filtered.jsonld b/src/test/resources/queries/metaquery8-filtered.jsonld
index 9e7ee8a..1a3ed3c 100644
--- a/src/test/resources/queries/metaquery8-filtered.jsonld
+++ b/src/test/resources/queries/metaquery8-filtered.jsonld
@@ -1,61 +1,107 @@
{
- "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
- "query": {
- "@type": "koral:token",
- "wrap": {
- "@type": "koral:term",
- "foundry": "mate",
- "layer" : "lemma",
- "key":"der",
- "match": "match:eq"
- }
- },
- "meta":{
- "startPage":1,
- "count": 10,
- "context":{
- "left":["char",90],
- "right":["char",90]
- }
- },
- "collections": [
- {
- "@type": "koral:meta-filter",
- "@id": "korap-filter#id-1223232",
- "@value": {
- "@type": "koral:group",
- "relation": "or",
- "@field": "koral:field#ID",
- "operands": [
- {
- "@type": "koral:term",
- "@value": "WPD_AAA.00001"
- },
- {
- "@type": "koral:term",
- "@value": "WPD_AAA.00002"
- }
- ]
- }
- },
- {
- "@type": "koral:meta-filter",
- "@id": "korap-filter#id-1223232",
- "@value": {
- "@type": "koral:group",
- "relation": "or",
- "@field": "koral:field#ID",
- "operands": [
- {
- "@type": "koral:term",
- "@value": "WPD_AAA.00003"
- },
- {
- "@type": "koral:term",
- "@value": "WPD_AAA.00002"
- }
- ]
- }
- }
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer" : "lemma",
+ "key":"der",
+ "match": "match:eq"
+ }
+ },
+ "meta":{
+ "startPage":1,
+ "count": 10,
+ "context":{
+ "left":["char",90],
+ "right":["char",90]
+ }
+ },
+ "collection" : {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:or",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00001",
+ "type": "type:string"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00002",
+ "type": "type:string"
+ }
+ ]
+ },
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:or",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00003",
+ "type": "type:string"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00002",
+ "type": "type:string"
+ }
+ ]
+ }
]
+ },
+ "collections": [
+ {
+ "@type": "koral:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "koral:group",
+ "relation": "or",
+ "@field": "koral:field#ID",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "@value": "WPD_AAA.00001"
+ },
+ {
+ "@type": "koral:term",
+ "@value": "WPD_AAA.00002"
+ }
+ ]
+ }
+ },
+ {
+ "@type": "koral:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "koral:group",
+ "relation": "or",
+ "@field": "koral:field#ID",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "@value": "WPD_AAA.00003"
+ },
+ {
+ "@type": "koral:term",
+ "@value": "WPD_AAA.00002"
+ }
+ ]
+ }
+ }
+ ]
}
diff --git a/src/test/resources/queries/metaquery8.jsonld b/src/test/resources/queries/metaquery8.jsonld
index f2417f0..e3a2a57 100644
--- a/src/test/resources/queries/metaquery8.jsonld
+++ b/src/test/resources/queries/metaquery8.jsonld
@@ -1,42 +1,41 @@
{
- "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
- "query": {
- "@type": "koral:token",
- "wrap": {
- "@type": "koral:term",
- "foundry": "mate",
- "layer" : "lemma",
- "key":"der",
- "match": "match:eq"
- }
- },
- "meta":{
- "startPage":1,
- "count": 10,
- "context":{
- "left":["char",90],
- "right":["char",90]
- }
- },
- "collections": [
- {
- "@type": "koral:meta-filter",
- "@id": "korap-filter#id-1223232",
- "@value": {
- "@type": "koral:group",
- "relation": "or",
- "@field": "koral:field#ID",
- "operands": [
- {
- "@type": "koral:term",
- "@value": "WPD_AAA.00001"
- },
- {
- "@type": "koral:term",
- "@value": "WPD_AAA.00002"
- }
- ]
- }
- }
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "mate",
+ "layer" : "lemma",
+ "key":"der",
+ "match": "match:eq"
+ }
+ },
+ "meta":{
+ "startPage":1,
+ "count": 10,
+ "context":{
+ "left":["char",90],
+ "right":["char",90]
+ }
+ },
+ "collection" : {
+ "@type" : "koral:docGroup",
+ "operation": "operation:or",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00001",
+ "type": "type:string"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "ID",
+ "match": "match:eq",
+ "value": "WPD_AAA.00002",
+ "type": "type:string"
+ }
]
+ }
}