Rename and refactor KorapFilter (1)
diff --git a/Changes b/Changes
index bac3b68..ed0ba02 100644
--- a/Changes
+++ b/Changes
@@ -17,7 +17,8 @@
renamed /analysis to /model,
renamed shrink() to focus(),
removed KorapPrimaryData,
- fixed a lot of wrong tests for WithinSpans (diewald)
+ fixed a lot of wrong tests for WithinSpans,
+ renamed KorapFilter to /collection/CollectionBuilder (diewald)
- [feature] Improved deserialization of SpanSubSpanQueries
(margaretha)
- [feature] Introducing the potential need for resorting queries
diff --git a/src/main/java/de/ids_mannheim/korap/KorapCollection.java b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
index 6e4ad8e..232b5bb 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
@@ -8,6 +8,7 @@
import de.ids_mannheim.korap.util.QueryException;
import de.ids_mannheim.korap.collection.BooleanFilter;
import de.ids_mannheim.korap.collection.FilterOperation;
+import de.ids_mannheim.korap.collection.CollectionBuilder;
import de.ids_mannheim.korap.response.Notifications;
import org.apache.lucene.search.spans.SpanQuery;
@@ -26,28 +27,32 @@
import org.slf4j.LoggerFactory;
/**
- * That's a pretty ugly API to
- * create virtual collections.
- * It works - so I got that going for
- * me, which is nice.
+ * Create a Virtual Collection of documents by means of a KoralQuery
+ * or by applying manual filters and extensions on Lucene fields.
+ *
+ * <blockquote><pre>
+ * KorapCollection kc = new KorapCollection(json);
+ * kc.filterUIDS("a1", "a2", "a3");
+ * </pre></blockquote>
+ *
+ * <strong>Warning</strong>: This API is deprecated and will
+ * be replaced in future versions. It supports legacy versions of
+ * KoralQuery.
*
* @author diewald
*/
-
-
-// TODO: Make a cache for the bits!!! DELETE IT IN CASE OF AN EXTENSION OR A FILTER!
-// TODO: Maybe use randomaccessfilterstrategy
-// TODO: Maybe a constantScoreQuery can make things faster?
-
-// THIS MAY CHANGE for stuff like combining virtual collections
-// See http://mail-archives.apache.org/mod_mbox/lucene-java-user/
-// 200805.mbox/%3C17080852.post@talk.nabble.com%3E
-
+/*
+ * TODO: Make a cache for the bits
+ * Delete it in case of an extension or a filter
+ * TODO: Maybe use randomaccessfilterstrategy
+ * TODO: Maybe a constantScoreQuery can make things faster?
+ * See http://mail-archives.apache.org/mod_mbox/lucene-java-user/
+ * 200805.mbox/%3C17080852.post@talk.nabble.com%3E
+ */
public class KorapCollection extends Notifications {
private KorapIndex index;
private KorapDate created;
private String id;
- // private String error;
private ArrayList<FilterOperation> filter;
private int filterCount = 0;
@@ -62,10 +67,13 @@
this.filter = new ArrayList<FilterOperation>(5);
};
+
/**
- * Construct a new KorapCollection by passing a JSON query.
- * This supports collections with key "collection" and
+ * Construct a new KorapCollection by passing a KoralQuery.
+ * This supports collections with the key "collection" and
* legacy collections with the key "collections".
+ *
+ * @param jsonString The virtual collection as a KoralQuery.
*/
public KorapCollection (String jsonString) {
ObjectMapper mapper = new ObjectMapper();
@@ -73,7 +81,8 @@
try {
JsonNode json = mapper.readTree(jsonString);
-
+
+ // Deserialize from recent collections
if (json.has("collection")) {
this.fromJSON(json.get("collection"));
}
@@ -90,8 +99,9 @@
};
};
}
+ // Some exceptions ...
catch (QueryException qe) {
- this.addError(qe.getErrorCode(),qe.getMessage());
+ this.addError(qe.getErrorCode(), qe.getMessage());
}
catch (IOException e) {
this.addError(
@@ -104,11 +114,20 @@
};
+ /**
+ * Construct a new KorapCollection.
+ */
public KorapCollection () {
this.filter = new ArrayList<FilterOperation>(5);
};
+ /**
+ * Import the "collection" part of a KoralQuery.
+ *
+ * @param jsonString The "collection" part of a KoralQuery.
+ * @throws QueryException
+ */
public void fromJSON (String jsonString) throws QueryException {
ObjectMapper mapper = new ObjectMapper();
try {
@@ -120,14 +139,26 @@
};
+ /**
+ * Import the "collection" part of a KoralQuery.
+ *
+ * @param json The "collection" part of a KoralQuery
+ * as a {@link JsonNode} object.
+ * @throws QueryException
+ */
public void fromJSON (JsonNode json) throws QueryException {
- this.filter(new KorapFilter(json));
+ this.filter(new CollectionBuilder(json));
};
/**
- * Legacy API for collection filters.
+ * Import the "collections" part of a KoralQuery.
+ * This method is deprecated and will vanish in future versions.
+ *
+ * @param jsonString The "collections" part of a KoralQuery.
+ * @throws QueryException
*/
+ @Deprecated
public void fromJSONLegacy (String jsonString) throws QueryException {
ObjectMapper mapper = new ObjectMapper();
try {
@@ -140,7 +171,12 @@
/**
- * Legacy API for collection filters.
+ * Import the "collections" part of a KoralQuery.
+ * This method is deprecated and will vanish in future versions.
+ *
+ * @param json The "collections" part of a KoralQuery
+ * as a {@link JsonNode} object.
+ * @throws QueryException
*/
public void fromJSONLegacy (JsonNode json) throws QueryException {
if (!json.has("@type"))
@@ -151,14 +187,17 @@
String type = json.get("@type").asText();
- KorapFilter kf = new KorapFilter();
+ CollectionBuilder kf = new CollectionBuilder();
kf.setBooleanFilter(kf.fromJSONLegacy(json.get("@value"), "tokens"));
+
+ // Filter the collection
if (type.equals("korap:meta-filter")) {
if (DEBUG)
log.trace("Add Filter LEGACY");
this.filter(kf);
}
-
+
+ // Extend the collection
else if (type.equals("korap:meta-extend")) {
if (DEBUG)
log.trace("Add Extend LEGACY");
@@ -166,15 +205,27 @@
};
};
- public int getCount() {
- return this.filterCount;
+
+ /**
+ * Set the {@link KorapIndex} the virtual collection refers to.
+ *
+ * @param index The {@link KorapIndex} the virtual collection refers to.
+ */
+ public void setIndex (KorapIndex index) {
+ this.index = index;
};
- public void setIndex (KorapIndex ki) {
- this.index = ki;
- };
- // The checks asre not necessary
+ /**
+ * Add a filter by means of a {@link BooleanFilter}.
+ *
+ * <strong>Warning</strong>: Filters are part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @param filter The filter to add to the collection.
+ * @return The {@link KorapCollection} object for chaining.
+ */
+ // TODO: The checks may not be necessary
public KorapCollection filter (BooleanFilter filter) {
if (DEBUG)
log.trace("Added filter: {}", filter.toString());
@@ -199,7 +250,68 @@
return this;
};
- // Filter based on UIDs
+
+ /**
+ * Add a filter by means of a {@link CollectionBuilder} object.
+ *
+ * <strong>Warning</strong>: Filters are part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @param filter The filter to add to the collection.
+ * @return The {@link KorapCollection} object for chaining.
+ */
+ public KorapCollection filter (CollectionBuilder filter) {
+ return this.filter(filter.getBooleanFilter());
+ };
+
+
+ /**
+ * Add an extension by means of a {@link BooleanFilter}.
+ *
+ * <strong>Warning</strong>: Extensions are part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @param extension The extension to add to the collection.
+ * @return The {@link KorapCollection} object for chaining.
+ */
+ public KorapCollection extend (BooleanFilter extension) {
+ if (DEBUG)
+ log.trace("Added extension: {}", extension.toString());
+
+ this.filter.add(
+ new FilterOperation(
+ (Filter) new QueryWrapperFilter(extension.toQuery()),
+ true
+ )
+ );
+ this.filterCount++;
+ return this;
+ };
+
+
+ /**
+ * Add an extension by means of a {@link CollectionBuilder} object.
+ *
+ * <strong>Warning</strong>: Extensions are part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @param extension The extension to add to the collection.
+ * @return The {@link KorapCollection} object for chaining.
+ */
+ public KorapCollection extend (CollectionBuilder extension) {
+ return this.extend(extension.getBooleanFilter());
+ };
+
+
+ /**
+ * Add a filter based on a list of unique document identifiers.
+ * UIDs may be indexed in the field "UID".
+ *
+ * This filter is not part of the legacy API!
+ *
+ * @param uids The list of unique document identifier.
+ * @return The {@link KorapCollection} object for chaining.
+ */
public KorapCollection filterUIDs (String ... uids) {
BooleanFilter filter = new BooleanFilter();
filter.or("UID", uids);
@@ -209,38 +321,55 @@
};
- public KorapCollection filter (KorapFilter filter) {
- return this.filter(filter.getBooleanFilter());
- };
-
-
- public KorapCollection extend (BooleanFilter filter) {
- if (DEBUG)
- log.trace("Added extension: {}", filter.toString());
- this.filter.add(
- new FilterOperation(
- (Filter) new QueryWrapperFilter(filter.toQuery()),
- true
- )
- );
- this.filterCount++;
- return this;
- };
-
- public KorapCollection extend (KorapFilter filter) {
- return this.extend(filter.getBooleanFilter());
- };
-
-
- public ArrayList<FilterOperation> getFilters () {
+ /**
+ * Get the list of filters constructing the collection.
+ *
+ * <strong>Warning</strong>: This is part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @return The list of filters.
+ */
+ public List<FilterOperation> getFilters () {
return this.filter;
};
- public FilterOperation getFilter (int i) {
- return this.filter.get(i);
+
+ /**
+ * Get a certain {@link FilterOperation} from the list of filters
+ * constructing the collection by its numerical index.
+ *
+ * <strong>Warning</strong>: This is part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @param index The index position of the requested {@link FilterOperation}.
+ * @return The {@link FilterOperation} at the certain list position.
+ */
+ public FilterOperation getFilter (int index) {
+ return this.filter.get(index);
};
+ /**
+ * Get the number of filter operations constructing this collection.
+ *
+ * <strong>Warning</strong>: This is part of the collections
+ * legacy API and may vanish without warning.
+ *
+ * @return The number of filter operations constructing this collection.
+ */
+ public int getCount() {
+ return this.filterCount;
+ };
+
+
+ /**
+ * Generate a string representatio of the virtual collection.
+ *
+ * <strong>Warning</strong>: This currently does not generate a valid
+ * KoralQuery string, so this may change in a future version.
+ *
+ * @return A string representation of the virtual collection.
+ */
public String toString () {
StringBuilder sb = new StringBuilder();
for (FilterOperation fo : this.filter) {
@@ -249,9 +378,19 @@
return sb.toString();
};
+
/**
- * Search in the virtual collection. This is just used for
- * testing purposes and not recommended for serious usage.
+ * Search in the virtual collection.
+ * This is mostly used for testing purposes
+ * and <strong>is not recommended</strong>
+ * as a common search API.
+ *
+ * Please use {@link KorapQuery#run} instead.
+ *
+ * @param query a {@link SpanQuery} to apply on the
+ * virtual collection.
+ * @return A {@link KorapResult} object representing the search's
+ * result.
*/
public KorapResult search (SpanQuery query) {
return this.index.search(
@@ -264,37 +403,47 @@
);
};
+
+ /**
+ * Create a bit vector representing the live documents of the
+ * virtual collection to be used in searches.
+ *
+ * @param The {@link AtomicReaderContext} to search in.
+ * @return A bit vector representing the live documents of the
+ * virtual collection.
+ * @throws IOException
+ */
public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
- /*
- Use Bits.MatchAllBits(int len)
- */
+ // TODO: Probably use Bits.MatchAllBits(int len)
boolean noDoc = true;
FixedBitSet bitset;
-
+
+ // There are filters set
if (this.filterCount > 0) {
bitset = new FixedBitSet(atomic.reader().maxDoc());
- ArrayList<FilterOperation> filters = (ArrayList<FilterOperation>) this.filter.clone();
+ ArrayList<FilterOperation> filters =
+ (ArrayList<FilterOperation>) this.filter.clone();
FilterOperation kcInit = filters.remove(0);
if (DEBUG)
log.trace("FILTER: {}", kcInit);
-
+
// Init vector
DocIdSet docids = kcInit.filter.getDocIdSet(atomic, null);
DocIdSetIterator filterIter = docids.iterator();
-
+
+ // The filter has an effect
if (filterIter != null) {
- if (DEBUG)
- log.trace("InitFilter has effect");
+ if (DEBUG) log.trace("InitFilter has effect");
bitset.or(filterIter);
noDoc = false;
};
-
+
+ // Apply all filters sequentially
for (FilterOperation kc : filters) {
- if (DEBUG)
- log.trace("FILTER: {}", kc);
+ if (DEBUG) log.trace("FILTER: {}", kc);
// TODO: BUG???
docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null : bitset);
@@ -310,22 +459,16 @@
};
continue;
};
- if (kc.isExtension()) {
- // System.err.println("Term found!");
- // System.err.println("Old Card:" + bitset.cardinality());
+ if (kc.isExtension())
bitset.or(filterIter);
- // System.err.println("New Card:" + bitset.cardinality());
- }
- else {
+ else
bitset.and(filterIter);
- };
};
if (!noDoc) {
FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs();
- if (livedocs != null) {
+ if (livedocs != null)
bitset.and(livedocs);
- };
};
}
else {
@@ -335,13 +478,39 @@
return bitset;
};
- public long numberOf (String foundry, String type) throws IOException {
+
+ /**
+ * Search for the number of occurrences of different types,
+ * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
+ * collection.
+ *
+ * @param field The field containing the textual data and the
+ * annotations as a string.
+ * @param type The type of meta information,
+ * e.g. <i>documents</i> or <i>sentences</i> as a string.
+ * @return The number of the occurrences.
+ * @throws IOException
+ * @see KorapIndex#numberOf
+ */
+ public long numberOf (String field, String type) throws IOException {
if (this.index == null)
return (long) -1;
- return this.index.numberOf(this, foundry, type);
+ return this.index.numberOf(this, field, type);
};
+
+ /**
+ * Search for the number of occurrences of different types,
+ * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
+ * collection, in the <i>base</i> foundry.
+ *
+ * @param type The type of meta information,
+ * e.g. <i>documents</i> or <i>sentences</i> as a string.
+ * @return The number of the occurrences.
+ * @throws IOException
+ * @see KorapIndex#numberOf
+ */
public long numberOf (String type) throws IOException {
if (this.index == null)
return (long) -1;
@@ -349,9 +518,9 @@
return this.index.numberOf(this, "tokens", type);
};
- // This is only for testing purposes!
+
@Deprecated
- public HashMap getTermRelation(String field) throws Exception {
+ public HashMap getTermRelation (String field) throws Exception {
if (this.index == null) {
HashMap<String,Long> map = new HashMap<>(1);
map.put("-docs", (long) 0);
@@ -361,8 +530,9 @@
return this.index.getTermRelation(this, field);
};
+
@Deprecated
- public String getTermRelationJSON(String field) throws IOException {
+ public String getTermRelationJSON (String field) throws IOException {
ObjectMapper mapper = new ObjectMapper();
StringWriter sw = new StringWriter();
sw.append("{\"field\":");
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 17ffb03..d64fd7f 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -36,6 +36,24 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+/**
+ * KorapIndex implements a simple API for searching in and writing to a
+ * Lucene index and requesting several information about the index' nature.
+ * <br />
+ *
+ * <blockquote><pre>
+ * KorapIndex ki = new KorapIndex(
+ * new MMapDirectory(new File("/myindex"))
+ * );
+ * </pre></blockquote>
+ *
+ * Properties can be stored in a properies file called 'index.properties'.
+ * Relevant properties are <code>lucene.version</code> and
+ * <code>lucene.name</code>.
+ *
+ * @author diewald
+ */
+
/*
TODO: Add word count as a meta data field!
TODO: Validate document import!
@@ -61,25 +79,6 @@
-> search for frequencies of VVFIN/gehen
-> c:VVFIN:[^:]*?:gehen:past:...
*/
-
-/**
- *
- * KorapIndex implements a simple API for searching in and writing to a
- * Lucene index and requesting several information about the index' nature.
- * <br />
- *
- * <pre>
- * KorapIndex ki = new KorapIndex(
- * new MMapDirectory(new File("/myindex"))
- * );
- * </pre>
- *
- * Properties can be stored in a properies file called 'index.properties'.
- * Relevant properties are <code>lucene.version</code> and
- * <code>lucene.name</code>.
- *
- * @author diewald
- */
public class KorapIndex {
// Todo: Use configuration
@@ -468,6 +467,7 @@
* @param type The type of meta information,
* e.g. <i>documents</i> or <i>sentences</i> as a string.
* @return The number of the occurrences.
+ * @see KorapCollection#numberOf
*/
public long numberOf (KorapCollection collection,
String field,
@@ -524,7 +524,17 @@
};
-
+ /**
+ * Search for the number of occurrences of different types,
+ * e.g. <i>documents</i>, <i>sentences</i> etc.
+ *
+ * @param field The field containing the textual data and the
+ * annotations as a string.
+ * @param type The type of meta information,
+ * e.g. <i>documents</i> or <i>sentences</i> as a string.
+ * @return The number of the occurrences.
+ * @see KorapCollection#numberOf
+ */
public long numberOf (String field, String type) {
return this.numberOf(new KorapCollection(this), field, type);
};
@@ -538,6 +548,7 @@
* @param type The type of meta information,
* e.g. <i>documents</i> or <i>sentences</i> as a string.
* @return The number of the occurrences.
+ * @see KorapCollection#numberOf
*/
public long numberOf (String type) {
return this.numberOf("tokens", type);
diff --git a/src/main/java/de/ids_mannheim/korap/collection/BooleanFilter.java b/src/main/java/de/ids_mannheim/korap/collection/BooleanFilter.java
index 1d755dd..2388729 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/BooleanFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/BooleanFilter.java
@@ -12,8 +12,7 @@
import org.apache.lucene.search.NumericRangeQuery;
import de.ids_mannheim.korap.util.KorapDate;
-import de.ids_mannheim.korap.KorapFilter;
-
+import de.ids_mannheim.korap.KorapCollection;
import de.ids_mannheim.korap.util.QueryException;
import org.slf4j.Logger;
@@ -35,7 +34,7 @@
private String type;
// Logger
- private final static Logger log = LoggerFactory.getLogger(KorapFilter.class);
+ private final static Logger log = LoggerFactory.getLogger(KorapCollection.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
diff --git a/src/main/java/de/ids_mannheim/korap/KorapFilter.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
similarity index 87%
rename from src/main/java/de/ids_mannheim/korap/KorapFilter.java
rename to src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
index b567741..10a6b65 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
@@ -1,4 +1,4 @@
-package de.ids_mannheim.korap;
+package de.ids_mannheim.korap.collection;
import de.ids_mannheim.korap.collection.BooleanFilter;
import de.ids_mannheim.korap.collection.RegexFilter;
@@ -13,37 +13,36 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
-/*
- Todo: WildCardFilter!
- Support: delete boolean etc.
- Support: supports foundries
-*/
-
/**
- * @author diewald
- *
- * KorapFilter implements a simple API for creating meta queries
+ * CollectionBuilder implements a simple API for creating queries
* constituing Virtual Collections.
+ *
+ * @author diewald
*/
-public class KorapFilter {
+/*
+ * Todo: WildCardFilter!
+ * Todo: Support delete boolean etc.
+ * Todo: Supports foundries
+ */
+public class CollectionBuilder {
private BooleanFilter filter;
// Logger
- private final static Logger log = LoggerFactory.getLogger(KorapFilter.class);
+ private final static Logger log = LoggerFactory.getLogger(CollectionBuilder.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
- public KorapFilter () {
+ public CollectionBuilder () {
filter = new BooleanFilter();
};
- public KorapFilter (JsonNode json) throws QueryException {
+ public CollectionBuilder (JsonNode json) throws QueryException {
filter = this.fromJSON(json, "tokens");
};
- protected BooleanFilter fromJSON (JsonNode json, String field) throws QueryException {
+
+ public BooleanFilter fromJSON (JsonNode json, String field) throws QueryException {
BooleanFilter bfilter = new BooleanFilter();
// TODO: THIS UNFORTUNATELY BREAKS TESTS
@@ -70,7 +69,7 @@
if (!json.has("value"))
throw new QueryException(612, "Dates require value fields");
-//-
+
String dateStr = json.get("value").asText();
if (json.has("match"))
match = json.get("match").asText();
@@ -87,18 +86,17 @@
bfilter.till(dateStr);
break;
};
- /*
- No good reason for gt or lt
- */
+ // No good reason for gt or lt
return bfilter;
}
+
else if (valtype.equals("type:string")) {
if (json.has("match"))
match = json.get("match").asText();
- if (match.equals("match:eq")) {
+ if (match.equals("match:eq"))
bfilter.and(key, json.get("value").asText());
- };
+
return bfilter;
};
}
@@ -106,7 +104,6 @@
// nested group
else if (type.equals("korap:docGroup")) {
if (!json.has("operands") || !json.get("operands").isArray())
-//-
throw new QueryException(612, "Groups need operands");
String operation = "operation:and";
@@ -116,30 +113,27 @@
BooleanFilter group = new BooleanFilter();
for (JsonNode operand : json.get("operands")) {
- if (operation.equals("operation:and")) {
+ if (operation.equals("operation:and"))
group.and(this.fromJSON(operand, field));
- }
- else if (operation.equals("operation:or")) {
+
+ else if (operation.equals("operation:or"))
group.or(this.fromJSON(operand, field));
- }
- else {
+
+ else
throw new QueryException(613, "Unknown document group operation");
- };
};
bfilter.and(group);
return bfilter;
}
// Unknown type
- else {
-// -
- throw new QueryException(613, "Collection query type has to be doc or docGroup");
- };
-
+ else throw new QueryException(613, "Collection query type has to be doc or docGroup");
+
return new BooleanFilter();
};
-
- /*
+
+
+ /*
String type = json.get("@type").asText();
String field = _getField(json);
@@ -153,13 +147,11 @@
};
};
*/
- // };
- protected BooleanFilter fromJSONLegacy (JsonNode json, String field)
+ public BooleanFilter fromJSONLegacy (JsonNode json, String field)
throws QueryException {
BooleanFilter bfilter = new BooleanFilter();
-//-
if (!json.has("@type"))
throw new QueryException(612, "JSON-LD group has no @type attribute");
@@ -177,21 +169,16 @@
return bfilter;
}
else if (type.equals("korap:group")) {
-//-
if (!json.has("relation"))
throw new QueryException(612, "Group needs relation");
if (!json.has("operands"))
-//-
throw new QueryException(612, "Group needs operand list");
- //return bfilter;
-
String dateStr, till;
JsonNode operands = json.get("operands");
if (!operands.isArray())
-//-
throw new QueryException(612, "Group needs operand list");
if (DEBUG)
@@ -226,7 +213,6 @@
break;
case "and":
-//-
if (operands.size() < 1)
throw new QueryException(612, "Operation needs at least two operands");
@@ -237,7 +223,6 @@
break;
case "or":
-//-
if (operands.size() < 1)
throw new QueryException(612, "Operation needs at least two operands");
@@ -247,7 +232,6 @@
bfilter.and(group);
break;
-//-
default:
throw new QueryException(613, "Relation is not supported");
};
@@ -295,8 +279,6 @@
};
public BooleanFilter or (String type, String ... terms) {
- if (DEBUG)
- log.debug("Got some terms here");
BooleanFilter bf = new BooleanFilter();
bf.or(type, terms);
return bf;
@@ -342,7 +324,7 @@
return new RegexFilter(regex);
};
- public BooleanFilter getBooleanFilter() {
+ public BooleanFilter getBooleanFilter () {
return this.filter;
};
diff --git a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
index 213d11d..17dea33 100644
--- a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
+++ b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
@@ -6,7 +6,6 @@
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.KorapCollection;
-import de.ids_mannheim.korap.KorapFilter;
import de.ids_mannheim.korap.KorapSearch;
import de.ids_mannheim.korap.KorapResult;
import de.ids_mannheim.korap.KorapQuery;
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java b/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
new file mode 100644
index 0000000..18c8e57
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
@@ -0,0 +1,83 @@
+package de.ids_mannheim.korap.collection;
+
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.collection.CollectionBuilder;
+
+@RunWith(JUnit4.class)
+public class TestCollectionBuilder {
+
+ @Test
+ public void filterExample () throws IOException {
+ CollectionBuilder kf = new CollectionBuilder();
+
+ assertEquals("+textClass:tree", kf.and("textClass","tree").toString());
+ assertEquals("+textClass:tree +textClass:sport", kf.and("textClass","tree").and("textClass","sport").toString());
+ assertEquals("+textClass:tree +textClass:sport textClass:news", kf.and("textClass","tree").and("textClass","sport").or("textClass","news").toString());
+ assertEquals("+textClass:tree +textClass:sport +textClass:news", kf.and("textClass", "tree", "sport", "news").toString());
+
+ assertEquals("corpusID:c-1 corpusID:c-2 corpusID:c-3", kf.or("corpusID", "c-1", "c-2", "c-3").toString());
+ };
+
+
+ @Test
+ public void rangeExample () throws IOException {
+ CollectionBuilder kf = new CollectionBuilder();
+ assertEquals("+pubDate:[20030604 TO 20030899]", kf.between("2003-06-04", "2003-08-99").toString());
+ assertEquals("+pubDate:[0 TO 20030604]", kf.till("2003-06-04").toString());
+ assertEquals("+pubDate:[20030604 TO 99999999]", kf.since("2003-06-04").toString());
+ assertEquals("+pubDate:20030604", kf.date("2003-06-04").toString());
+ };
+
+
+ @Test
+ public void rangeLimited () throws IOException {
+ CollectionBuilder kf = new CollectionBuilder();
+ assertEquals("+pubDate:[20050000 TO 20099999]", kf.between("2005", "2009").toString());
+ assertEquals("+pubDate:[20051000 TO 20090899]", kf.between("200510", "200908").toString());
+ assertEquals("+pubDate:[20051000 TO 20090899]", kf.between("2005-10", "2009-08").toString());
+ assertEquals("+pubDate:[20051006 TO 20090803]", kf.between("2005-1006", "2009-0803").toString());
+ assertEquals("+pubDate:[20051006 TO 20090803]", kf.between("2005-10-06", "2009-08-03").toString());
+
+ assertEquals("+pubDate:[0 TO 20059999]", kf.till("2005").toString());
+ assertEquals("+pubDate:[0 TO 20051099]", kf.till("200510").toString());
+ assertEquals("+pubDate:[0 TO 20051099]", kf.till("2005-10").toString());
+ assertEquals("+pubDate:[0 TO 20051006]", kf.till("2005-1006").toString());
+ assertEquals("+pubDate:[0 TO 20051006]", kf.till("2005-10-06").toString());
+
+ assertEquals("+pubDate:[20050000 TO 99999999]", kf.since("2005").toString());
+ assertEquals("+pubDate:[20051000 TO 99999999]", kf.since("200510").toString());
+ assertEquals("+pubDate:[20051000 TO 99999999]", kf.since("2005-10").toString());
+ assertEquals("+pubDate:[20051006 TO 99999999]", kf.since("2005-1006").toString());
+ assertEquals("+pubDate:[20051006 TO 99999999]", kf.since("2005-10-06").toString());
+
+ assertEquals("+pubDate:[20050000 TO 20059999]", kf.date("2005").toString());
+ assertEquals("+pubDate:[20051000 TO 20051099]", kf.date("200510").toString());
+ assertEquals("+pubDate:[20051000 TO 20051099]", kf.date("2005-10").toString());
+ assertEquals("+pubDate:20051006", kf.date("2005-1006").toString());
+ assertEquals("+pubDate:20051006", kf.date("2005-10-06").toString());
+ };
+
+ @Test
+ public void rangeFailure () throws IOException {
+ CollectionBuilder kf = new CollectionBuilder();
+ assertEquals("", kf.between("aaaa-bb-cc", "aaaabbcc").toString());
+ assertEquals("", kf.till("aaaa-bb-cc").toString());
+ assertEquals("", kf.since("aaaa-bb-cc").toString());
+ assertEquals("", kf.date("aaaa-bb-cc").toString());
+ };
+
+
+ // TODO: More extensive testing!
+};
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionLegacy.java b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionLegacy.java
index eb09d8a..514cb28 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionLegacy.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionLegacy.java
@@ -5,7 +5,6 @@
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.KorapCollection;
-import de.ids_mannheim.korap.KorapFilter;
import de.ids_mannheim.korap.KorapResult;
import de.ids_mannheim.korap.KorapQuery;
import de.ids_mannheim.korap.collection.BooleanFilter;
@@ -43,7 +42,7 @@
};
ki.commit();
- KorapFilter kf = new KorapFilter();
+ CollectionBuilder kf = new CollectionBuilder();
// Create Virtual collections:
KorapCollection kc = new KorapCollection(ki);
@@ -118,7 +117,7 @@
ki.commit();
};
- KorapFilter kf = new KorapFilter();
+ CollectionBuilder kf = new CollectionBuilder();
// Create Virtual collections:
KorapCollection kc = new KorapCollection(ki);
@@ -198,7 +197,7 @@
ki.commit();
- KorapFilter kf = new KorapFilter();
+ CollectionBuilder kf = new CollectionBuilder();
// Create Virtual collections:
KorapCollection kc = new KorapCollection(ki);
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKorapFilter.java b/src/test/java/de/ids_mannheim/korap/collection/TestKorapFilter.java
deleted file mode 100644
index f89145f..0000000
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKorapFilter.java
+++ /dev/null
@@ -1,88 +0,0 @@
-package de.ids_mannheim.korap.collection;
-
-import java.util.*;
-import java.io.*;
-
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Bits;
-
-import static org.junit.Assert.*;
-import org.junit.Test;
-import org.junit.Ignore;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import de.ids_mannheim.korap.KorapFilter;
-
-@RunWith(JUnit4.class)
-public class TestKorapFilter {
-
- @Test
- public void filterExample () throws IOException {
-
- KorapFilter kf = new KorapFilter();
-
- assertEquals("+textClass:tree", kf.and("textClass","tree").toString());
- assertEquals("+textClass:tree +textClass:sport", kf.and("textClass","tree").and("textClass","sport").toString());
- assertEquals("+textClass:tree +textClass:sport textClass:news", kf.and("textClass","tree").and("textClass","sport").or("textClass","news").toString());
- assertEquals("+textClass:tree +textClass:sport +textClass:news", kf.and("textClass", "tree", "sport", "news").toString());
-
- assertEquals("corpusID:c-1 corpusID:c-2 corpusID:c-3", kf.or("corpusID", "c-1", "c-2", "c-3").toString());
- };
-
- @Test
- public void rangeExample () throws IOException {
-
- KorapFilter kf = new KorapFilter();
-
- assertEquals("+pubDate:[20030604 TO 20030899]", kf.between("2003-06-04", "2003-08-99").toString());
- assertEquals("+pubDate:[0 TO 20030604]", kf.till("2003-06-04").toString());
- assertEquals("+pubDate:[20030604 TO 99999999]", kf.since("2003-06-04").toString());
- assertEquals("+pubDate:20030604", kf.date("2003-06-04").toString());
- };
-
- @Test
- public void rangeLimited () throws IOException {
-
- KorapFilter kf = new KorapFilter();
-
- assertEquals("+pubDate:[20050000 TO 20099999]", kf.between("2005", "2009").toString());
- assertEquals("+pubDate:[20051000 TO 20090899]", kf.between("200510", "200908").toString());
- assertEquals("+pubDate:[20051000 TO 20090899]", kf.between("2005-10", "2009-08").toString());
- assertEquals("+pubDate:[20051006 TO 20090803]", kf.between("2005-1006", "2009-0803").toString());
- assertEquals("+pubDate:[20051006 TO 20090803]", kf.between("2005-10-06", "2009-08-03").toString());
-
- assertEquals("+pubDate:[0 TO 20059999]", kf.till("2005").toString());
- assertEquals("+pubDate:[0 TO 20051099]", kf.till("200510").toString());
- assertEquals("+pubDate:[0 TO 20051099]", kf.till("2005-10").toString());
- assertEquals("+pubDate:[0 TO 20051006]", kf.till("2005-1006").toString());
- assertEquals("+pubDate:[0 TO 20051006]", kf.till("2005-10-06").toString());
-
- assertEquals("+pubDate:[20050000 TO 99999999]", kf.since("2005").toString());
- assertEquals("+pubDate:[20051000 TO 99999999]", kf.since("200510").toString());
- assertEquals("+pubDate:[20051000 TO 99999999]", kf.since("2005-10").toString());
- assertEquals("+pubDate:[20051006 TO 99999999]", kf.since("2005-1006").toString());
- assertEquals("+pubDate:[20051006 TO 99999999]", kf.since("2005-10-06").toString());
-
- assertEquals("+pubDate:[20050000 TO 20059999]", kf.date("2005").toString());
- assertEquals("+pubDate:[20051000 TO 20051099]", kf.date("200510").toString());
- assertEquals("+pubDate:[20051000 TO 20051099]", kf.date("2005-10").toString());
- assertEquals("+pubDate:20051006", kf.date("2005-1006").toString());
- assertEquals("+pubDate:20051006", kf.date("2005-10-06").toString());
- };
-
- @Test
- public void rangeFailure () throws IOException {
-
- KorapFilter kf = new KorapFilter();
- assertEquals("", kf.between("aaaa-bb-cc", "aaaabbcc").toString());
- assertEquals("", kf.till("aaaa-bb-cc").toString());
- assertEquals("", kf.since("aaaa-bb-cc").toString());
- assertEquals("", kf.date("aaaa-bb-cc").toString());
- };
-
-
- // TODO: More extensive testing!
-
-};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index e1aae5e..b5f3dcf 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -11,7 +11,7 @@
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.index.SearchContext;
-import de.ids_mannheim.korap.KorapFilter;
+import de.ids_mannheim.korap.collection.CollectionBuilder;
import de.ids_mannheim.korap.KorapResult;
import java.nio.file.Files;
import java.nio.file.FileSystem;
@@ -97,7 +97,7 @@
new KorapQuery("tokens").seg("s:Buchstaben")
);
ks.getCollection().filter(
- new KorapFilter().and("textClass", "reisen")
+ new CollectionBuilder().and("textClass", "reisen")
);
ks.setCount(3);
ks.setStartIndex(5);
@@ -913,7 +913,7 @@
assertEquals(0, kc.numberOf("documents"));
kc.extend(
- new KorapFilter().or("corpusSigle", "BZK")
+ new CollectionBuilder().or("corpusSigle", "BZK")
);
ks.setCollection(kc);
assertEquals(1, kc.numberOf("documents"));
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
index 34b1f29..387b2cb 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
@@ -11,7 +11,6 @@
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.index.SearchContext;
-import de.ids_mannheim.korap.KorapFilter;
import de.ids_mannheim.korap.KorapResult;
import java.nio.file.Files;
import java.nio.file.FileSystem;