Finally removed legacy collection classes
Change-Id: I05948702b76d14a9024cefa3c7c5882e00cb718a
diff --git a/src/main/java/de/ids_mannheim/korap/Krill.java b/src/main/java/de/ids_mannheim/korap/Krill.java
index e026682..0d02b79 100644
--- a/src/main/java/de/ids_mannheim/korap/Krill.java
+++ b/src/main/java/de/ids_mannheim/korap/Krill.java
@@ -217,7 +217,8 @@
}
else if (json.has("collections")) {
- this.addError(899, "Collections are not supported anymore in favour of a single collection");
+ this.addError(899,
+ "Collections are not supported anymore in favour of a single collection");
};
}
catch (QueryException q) {
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index ceb6671..14cbec1 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -93,7 +93,8 @@
this.fromJson(json.get("collection"));
else if (json.has("collections"))
- this.addError(899, "Collections are not supported anymore in favour of a single collection");
+ this.addError(899,
+ "Collections are not supported anymore in favour of a single collection");
}
// Query Exception
@@ -155,7 +156,8 @@
};
- private CollectionBuilder.Interface _fromJson (JsonNode json) throws QueryException {
+ private CollectionBuilder.Interface _fromJson (JsonNode json)
+ throws QueryException {
if (!json.has("@type")) {
throw new QueryException(701,
@@ -189,14 +191,14 @@
// TODO: This isn't stable yet
switch (match) {
- case "match:eq":
- return this.cb.date(key, dateStr);
- case "match:ne":
- return this.cb.date(key, dateStr).not();
- case "match:geq":
- return this.cb.since(key, dateStr);
- case "match:leq":
- return this.cb.till(key, dateStr);
+ case "match:eq":
+ return this.cb.date(key, dateStr);
+ case "match:ne":
+ return this.cb.date(key, dateStr).not();
+ case "match:geq":
+ return this.cb.since(key, dateStr);
+ case "match:leq":
+ return this.cb.till(key, dateStr);
};
throw new QueryException(841, "Match relation unknown for type");
@@ -209,22 +211,26 @@
switch (match) {
- case "match:eq":
- return this.cb.term(key, json.get("value").asText());
- case "match:ne":
- return this.cb.term(key, json.get("value").asText()).not();
+ case "match:eq":
+ return this.cb.term(key, json.get("value").asText());
+ case "match:ne":
+ return this.cb.term(key, json.get("value").asText())
+ .not();
- // This may change - but for now it means the elements are lowercased
- case "match:contains":
- return this.cb.term(key, json.get("value").asText().toLowerCase());
+ // This may change - but for now it means the elements are lowercased
+ case "match:contains":
+ return this.cb.term(key, json.get("value").asText()
+ .toLowerCase());
- case "match:containsnot":
- return this.cb.term(key, json.get("value").asText().toLowerCase()).not();
+ case "match:containsnot":
+ return this.cb.term(key,
+ json.get("value").asText().toLowerCase()).not();
- // <LEGACY>
- case "match:excludes":
- return this.cb.term(key, json.get("value").asText().toLowerCase()).not();
- // </LEGACY>
+ // <LEGACY>
+ case "match:excludes":
+ return this.cb.term(key,
+ json.get("value").asText().toLowerCase()).not();
+ // </LEGACY>
};
throw new QueryException(841, "Match relation unknown for type");
@@ -259,21 +265,23 @@
else if (type.equals("koral:docGroup")) {
if (!json.has("operands") || !json.get("operands").isArray())
- throw new QueryException(842, "Document group needs operand list");
+ throw new QueryException(842,
+ "Document group needs operand list");
CollectionBuilder.Group group;
String operation = "operation:and";
if (json.has("operation"))
- operation = json.get("operation").asText();
+ operation = json.get("operation").asText();
if (operation.equals("operation:or"))
group = this.cb.orGroup();
else if (operation.equals("operation:and"))
group = this.cb.andGroup();
else
- throw new QueryException(810, "Unknown document group operation");
-
+ throw new QueryException(810,
+ "Unknown document group operation");
+
for (JsonNode operand : json.get("operands")) {
group.with(this._fromJson(operand));
};
@@ -282,7 +290,8 @@
// Unknown type
throw new QueryException(813, "Collection type is not supported");
- };
+ };
+
// Returns the number of filters - always one!
@Deprecated
@@ -294,13 +303,15 @@
/**
* Set the collection from a {@link CollectionBuilder} object.
*
- * @param cb The CollectionBuilder object.
+ * @param cb
+ * The CollectionBuilder object.
*/
public KrillCollection fromBuilder (CollectionBuilder.Interface cbi) {
this.cbi = cbi;
return this;
};
+
public CollectionBuilder.Interface getBuilder () {
return this.cbi;
};
@@ -310,12 +321,15 @@
return this.cb;
};
+
public KrillCollection filter (CollectionBuilder.Interface filter) {
return this.fromBuilder(this.cb.andGroup().with(this.cbi).with(filter));
};
+
public KrillCollection extend (CollectionBuilder.Interface extension) {
- return this.fromBuilder(this.cb.orGroup().with(this.cbi).with(extension));
+ return this.fromBuilder(this.cb.orGroup().with(this.cbi)
+ .with(extension));
};
@@ -434,12 +448,13 @@
* This will respect deleted documents.
*
* @param atomic
- * The {@link AtomicReaderContext} to search in.
+ * The {@link AtomicReaderContext} to search in.
* @param accepted
* {@link Bits} vector of accepted documents.
* @throws IOException
*/
- public DocIdSet getDocIdSet (AtomicReaderContext atomic, Bits acceptDocs) throws IOException {
+ public DocIdSet getDocIdSet (AtomicReaderContext atomic, Bits acceptDocs)
+ throws IOException {
int maxDoc = atomic.reader().maxDoc();
FixedBitSet bitset = new FixedBitSet(maxDoc);
@@ -455,7 +470,8 @@
// Init vector
DocIdSet docids = filter.getDocIdSet(atomic, null);
- DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
+ DocIdSetIterator filterIter = (docids == null) ? null : docids
+ .iterator();
if (filterIter == null) {
if (!this.cbi.isNegative())
@@ -479,16 +495,16 @@
};
// Remove deleted docs
- return (DocIdSet) BitsFilteredDocIdSet.wrap(
- (DocIdSet) bitset,
- acceptDocs
- );
+ return (DocIdSet) BitsFilteredDocIdSet.wrap((DocIdSet) bitset,
+ acceptDocs);
};
+
public long numberOf (String type) throws IOException {
return this.numberOf("tokens", type);
};
+
/**
* Search for the number of occurrences of different types,
* e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
@@ -525,7 +541,7 @@
else
return this.docCount();
};
-
+
// Create search term
// This may be prefixed by foundries
Term term = new Term(field, "-:" + type);
@@ -544,10 +560,11 @@
occurrences += this._numberOfAtomic(bits, atomic, term);
if (DEBUG)
- log.debug("Added up to {} for {}/{}", occurrences, field, type);
+ log.debug("Added up to {} for {}/{}", occurrences, field,
+ type);
};
}
-
+
// Something went wrong
catch (IOException e) {
log.warn(e.getMessage());
@@ -577,11 +594,8 @@
// TODO: Reuse a DocsAndPositionsEnum!!
// Start an iterator to fetch all payloads of the term
- DocsAndPositionsEnum docs = termsEnum.docsAndPositions(
- docvec,
- null,
- DocsAndPositionsEnum.FLAG_PAYLOADS
- );
+ DocsAndPositionsEnum docs = termsEnum.docsAndPositions(docvec,
+ null, DocsAndPositionsEnum.FLAG_PAYLOADS);
// The iterator is empty
@@ -605,17 +619,17 @@
// Copy payload with the offset of the BytesRef
payload = docs.getPayload();
if (payload != null) {
- System.arraycopy(payload.bytes, payload.offset, pl, 0, 4);
+ System.arraycopy(payload.bytes, payload.offset, pl, 0,
+ 4);
// Add payload as integer
occurrences += bb.wrap(pl).getInt();
if (DEBUG)
- log.debug("Value for {} incremented by {} to {} in {}",
- term,
- bb.wrap(pl).getInt(),
- occurrences,
- docs.docID());
+ log.debug(
+ "Value for {} incremented by {} to {} in {}",
+ term, bb.wrap(pl).getInt(), occurrences,
+ docs.docID());
};
};
@@ -626,7 +640,7 @@
// Nothing found
return 0;
- };
+ };
/**
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollectionLegacy.java b/src/main/java/de/ids_mannheim/korap/KrillCollectionLegacy.java
deleted file mode 100644
index 17fa1fe..0000000
--- a/src/main/java/de/ids_mannheim/korap/KrillCollectionLegacy.java
+++ /dev/null
@@ -1,937 +0,0 @@
-package de.ids_mannheim.korap;
-
-import java.util.*;
-import java.io.IOException;
-
-import de.ids_mannheim.korap.*;
-import de.ids_mannheim.korap.util.KrillDate;
-import de.ids_mannheim.korap.util.QueryException;
-import de.ids_mannheim.korap.collection.BooleanFilter;
-import de.ids_mannheim.korap.collection.RegexFilter;
-import de.ids_mannheim.korap.collection.FilterOperation;
-import de.ids_mannheim.korap.collection.CollectionBuilderLegacy;
-import de.ids_mannheim.korap.response.Notifications;
-import de.ids_mannheim.korap.response.Result;
-
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.*;
-
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.Bits;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.JsonNode;
-
-import java.io.StringWriter;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Create a Virtual Collection of documents by means of a KoralQuery
- * collection object.
- * Alternatively by applying manual filters and extensions on Lucene
- * fields.
- *
- * <blockquote><pre>
- * KrillCollectionLegacy kc = new KrillCollectionLegacy(json);
- * kc.filterUIDS("a1", "a2", "a3");
- * </pre></blockquote>
- *
- * <strong>Warning</strong>: This API is deprecated and will
- * be replaced in future versions. It supports legacy versions of
- * KoralQuery that will be disabled.
- *
- * @author diewald
- */
-/*
- * TODO: Clean up for new KoralQuery
- * TODO: Make a cache for the bits
- * Delete it in case of an extension or a filter
- * TODO: Maybe use randomaccessfilterstrategy
- * TODO: Maybe a constantScoreQuery can make things faster?
- * See http://mail-archives.apache.org/mod_mbox/lucene-java-user/
- * 200805.mbox/%3C17080852.post@talk.nabble.com%3E
- */
-public class KrillCollectionLegacy extends Notifications {
- private KrillIndex index;
- private KrillDate created;
- private String id;
- private ArrayList<FilterOperation> filter;
- private int filterCount = 0;
- private JsonNode json;
-
- // Logger
- private final static Logger log = LoggerFactory
- .getLogger(KrillCollection.class);
-
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
-
- /**
- * Construct a new KrillCollectionLegacy by passing a KrillIndex.
- *
- * @param index
- * The {@link KrillIndex} object.
- */
- public KrillCollectionLegacy (KrillIndex index) {
- this.index = index;
- this.filter = new ArrayList<FilterOperation>(5);
- };
-
-
- /**
- * Construct a new KrillCollectionLegacy by passing a KoralQuery.
- * This supports collections with the key "collection" and
- * legacy collections with the key "collections".
- *
- * @param jsonString
- * The virtual collection as a KoralQuery.
- */
- public KrillCollectionLegacy (String jsonString) {
- ObjectMapper mapper = new ObjectMapper();
- this.filter = new ArrayList<FilterOperation>(5);
-
- try {
- JsonNode json = mapper.readTree(jsonString);
-
- // Deserialize from recent collections
- if (json.has("collection")) {
- this.fromJson(json.get("collection"));
- }
-
- // Legacy collection serialization
- // This will be removed!
- else if (json.has("collections")) {
- this.addMessage(850,
- "Collections are deprecated in favour of a single collection");
- for (JsonNode collection : json.get("collections")) {
- this.fromJsonLegacy(collection);
- };
- };
- }
- // Some exceptions ...
- catch (QueryException qe) {
- this.addError(qe.getErrorCode(), qe.getMessage());
- }
- catch (IOException e) {
- this.addError(621, "Unable to parse JSON", "KrillCollectionLegacy",
- e.getLocalizedMessage());
- };
- };
-
-
- /**
- * Construct a new KrillCollectionLegacy.
- */
- public KrillCollectionLegacy () {
- this.filter = new ArrayList<FilterOperation>(5);
- };
-
-
- /**
- * Import the "collection" part of a KoralQuery.
- *
- * @param jsonString
- * The "collection" part of a KoralQuery.
- * @throws QueryException
- */
- public KrillCollectionLegacy fromJson (String jsonString) throws QueryException {
- ObjectMapper mapper = new ObjectMapper();
- try {
- this.fromJson((JsonNode) mapper.readTree(jsonString));
- }
- catch (Exception e) {
- this.addError(621, "Unable to parse JSON", "KrillCollection");
- };
-
- return this;
- };
-
-
- /**
- * Import the "collection" part of a KoralQuery.
- *
- * @param json
- * The "collection" part of a KoralQuery
- * as a {@link JsonNode} object.
- * @throws QueryException
- */
- public KrillCollectionLegacy fromJson (JsonNode json) throws QueryException {
- this.json = json;
- this.filter(this._fromJson(json));
- return this;
- };
-
-
- // Create a boolean filter from JSON
- private BooleanFilter _fromJson (JsonNode json) throws QueryException {
- return this._fromJson(json, "tokens");
- };
-
-
- // Create a booleanfilter from JSON
- private BooleanFilter _fromJson (JsonNode json, String field)
- throws QueryException {
- BooleanFilter bfilter = new BooleanFilter();
-
- if (!json.has("@type")) {
- throw new QueryException(701,
- "JSON-LD group has no @type attribute");
- };
-
- String type = json.get("@type").asText();
-
- // Single filter
- if (type.equals("koral:doc")) {
-
- String key = "tokens";
- String valtype = "type:string";
- String match = "match:eq";
-
- if (json.has("key"))
- key = json.get("key").asText();
-
- if (json.has("type"))
- valtype = json.get("type").asText();
-
- // Filter based on date
- if (valtype.equals("type:date")) {
-
- if (!json.has("value"))
- throw new QueryException(612, "Dates require value fields");
-
- String dateStr = json.get("value").asText();
- if (json.has("match"))
- match = json.get("match").asText();
-
- // TODO: This isn't stable yet
- switch (match) {
- case "match:eq":
- bfilter.date(dateStr);
- break;
- case "match:geq":
- bfilter.since(dateStr);
- break;
- case "match:leq":
- bfilter.till(dateStr);
- break;
- };
-
- // No good reason for gt or lt
- return bfilter;
- }
-
- // Filter based on string
- else if (valtype.equals("type:string")) {
- if (json.has("match"))
- match = json.get("match").asText();
-
- if (match.equals("match:eq")) {
- bfilter.and(key, json.get("value").asText());
- }
- else if (match.equals("match:ne")) {
- bfilter.andNot(key, json.get("value").asText());
- }
- // This may change - but for now it means the elements are lowercased
- else if (match.equals("match:contains")) {
- bfilter.and(key, json.get("value").asText().toLowerCase());
- }
- else if (match.equals("match:containsnot")) {
- bfilter.andNot(key, json.get("value").asText().toLowerCase());
- }
- // <LEGACY>
- else if (match.equals("match:excludes")) {
- bfilter.andNot(key, json.get("value").asText().toLowerCase());
- }
- // </LEGACY>
- else {
- throw new QueryException(0, "Unknown match type");
- };
-
- return bfilter;
- }
-
- // Filter based on regex
- else if (valtype.equals("type:regex")) {
- if (json.has("match"))
- match = json.get("match").asText();
-
- if (match.equals("match:eq")) {
- return bfilter.and(key, new RegexFilter(json.get("value")
- .asText()));
- }
- else if (match.equals("match:ne")) {
- return bfilter.andNot(key, new RegexFilter(json
- .get("value").asText()));
- };
-
- // TODO! for excludes and contains
- throw new QueryException(0, "Unknown document type");
- };
-
- // TODO!
- throw new QueryException(0, "Unknown document operation");
- }
-
- // nested group
- else if (type.equals("koral:docGroup")) {
- if (!json.has("operands") || !json.get("operands").isArray())
- throw new QueryException(612, "Groups need operands");
-
- String operation = "operation:and";
- if (json.has("operation"))
- operation = json.get("operation").asText();
-
- BooleanFilter group = new BooleanFilter();
-
- for (JsonNode operand : json.get("operands")) {
- if (operation.equals("operation:and"))
- group.and(this._fromJson(operand, field));
-
- else if (operation.equals("operation:or"))
- group.or(this._fromJson(operand, field));
-
- else
- throw new QueryException(613,
- "Unknown document group operation");
- };
- bfilter.and(group);
- return bfilter;
- }
-
- // Unknown type
- else
- throw new QueryException(613,
- "Collection query type has to be doc or docGroup");
-
- // return new BooleanFilter();
- };
-
-
- /**
- * Import the "collections" part of a KoralQuery.
- * This method is deprecated and will vanish in future versions.
- *
- * @param jsonString
- * The "collections" part of a KoralQuery.
- * @throws QueryException
- */
- @Deprecated
- public KrillCollectionLegacy fromJsonLegacy (String jsonString)
- throws QueryException {
- ObjectMapper mapper = new ObjectMapper();
- try {
- this.fromJsonLegacy((JsonNode) mapper.readValue(jsonString,
- JsonNode.class));
- }
- catch (Exception e) {
- this.addError(621, "Unable to parse JSON", "KrillCollection");
- };
- return this;
- };
-
-
- /**
- * Import the "collections" part of a KoralQuery.
- * This method is deprecated and will vanish in future versions.
- *
- * @param json
- * The "collections" part of a KoralQuery
- * as a {@link JsonNode} object.
- * @throws QueryException
- */
- @Deprecated
- public KrillCollectionLegacy fromJsonLegacy (JsonNode json) throws QueryException {
- if (!json.has("@type"))
- throw new QueryException(701,
- "JSON-LD group has no @type attribute");
-
- if (!json.has("@value"))
- throw new QueryException(851, "Legacy filter need @value fields");
-
- BooleanFilter bf = this._fromJsonLegacy(json.get("@value"), "tokens");
- String type = json.get("@type").asText();
-
- // Filter the collection
- if (type.equals("koral:meta-filter")) {
- if (DEBUG)
- log.trace("Add Filter LEGACY");
- this.filter(bf);
- }
-
- // Extend the collection
- else if (type.equals("koral:meta-extend")) {
- if (DEBUG)
- log.trace("Add Extend LEGACY");
- this.extend(bf);
- };
-
- return this;
- };
-
-
- // Create a boolean filter from a Json string
- @Deprecated
- private BooleanFilter _fromJsonLegacy (JsonNode json, String field)
- throws QueryException {
- BooleanFilter bfilter = new BooleanFilter();
-
- if (!json.has("@type"))
- throw new QueryException(612,
- "JSON-LD group has no @type attribute");
-
- String type = json.get("@type").asText();
-
- if (DEBUG)
- log.trace("@type: " + type);
-
- if (json.has("@field"))
- field = _getFieldLegacy(json);
-
- if (type.equals("koral:term")) {
- if (field != null && json.has("@value"))
- bfilter.and(field, json.get("@value").asText());
- return bfilter;
- }
- else if (type.equals("koral:group")) {
- if (!json.has("relation"))
- throw new QueryException(612, "Group needs relation");
-
- if (!json.has("operands"))
- throw new QueryException(612, "Group needs operand list");
-
- String dateStr, till;
- JsonNode operands = json.get("operands");
-
- if (!operands.isArray())
- throw new QueryException(612, "Group needs operand list");
-
- if (DEBUG)
- log.trace("relation found {}", json.get("relation").asText());
-
- BooleanFilter group = new BooleanFilter();
-
- switch (json.get("relation").asText()) {
- case "between":
- dateStr = _getDateLegacy(json, 0);
- till = _getDateLegacy(json, 1);
- if (dateStr != null && till != null)
- bfilter.between(dateStr, till);
- break;
-
- case "until":
- dateStr = _getDateLegacy(json, 0);
- if (dateStr != null)
- bfilter.till(dateStr);
- break;
-
- case "since":
- dateStr = _getDateLegacy(json, 0);
- if (dateStr != null)
- bfilter.since(dateStr);
- break;
-
- case "equals":
- dateStr = _getDateLegacy(json, 0);
- if (dateStr != null)
- bfilter.date(dateStr);
- break;
-
- case "and":
- if (operands.size() < 1)
- throw new QueryException(612,
- "Operation needs at least two operands");
-
- for (JsonNode operand : operands) {
- group.and(this._fromJsonLegacy(operand, field));
- }
- ;
- bfilter.and(group);
- break;
-
- case "or":
- if (operands.size() < 1)
- throw new QueryException(612,
- "Operation needs at least two operands");
-
- for (JsonNode operand : operands) {
- group.or(this._fromJsonLegacy(operand, field));
- }
- ;
- bfilter.and(group);
- break;
-
- default:
- throw new QueryException(613, "Relation is not supported");
- };
- }
- else {
- throw new QueryException(613,
- "Filter type is not a supported group");
- };
- return bfilter;
- };
-
-
- /**
- * Set the {@link KrillIndex} the virtual collection refers to.
- *
- * @param index
- * The {@link KrillIndex} the virtual collection refers
- * to.
- */
- public void setIndex (KrillIndex index) {
- this.index = index;
- };
-
-
- /**
- * Add a filter by means of a {@link BooleanFilter}.
- *
- * <strong>Warning</strong>: Filters are part of the collections
- * legacy API and may vanish without warning.
- *
- * @param filter
- * The filter to add to the collection.
- * @return The {@link KrillCollectionLegacy} object for chaining.
- */
- // TODO: The checks may not be necessary
- public KrillCollectionLegacy filter (BooleanFilter filter) {
- if (DEBUG)
- log.trace("Added filter: {}", filter.toString());
-
- if (filter == null) {
- this.addWarning(830, "Filter was empty");
- return this;
- };
-
- Filter f = (Filter) new QueryWrapperFilter(filter.toQuery());
- if (f == null) {
- this.addWarning(831, "Filter is not wrappable");
- return this;
- };
- FilterOperation fo = new FilterOperation(f, false);
- if (fo == null) {
- this.addWarning(832, "Filter operation is invalid");
- return this;
- };
- this.filter.add(fo);
- this.filterCount++;
- return this;
- };
-
-
- /**
- * Add a filter by means of a {@link CollectionBuilderLegacy} object.
- *
- * <strong>Warning</strong>: Filters are part of the collections
- * legacy API and may vanish without warning.
- *
- * @param filter
- * The filter to add to the collection.
- * @return The {@link KrillCollectionLegacy} object for chaining.
- */
- public KrillCollectionLegacy filter (CollectionBuilderLegacy filter) {
- return this.filter(filter.getBooleanFilter());
- };
-
-
- /**
- * Add an extension by means of a {@link BooleanFilter}.
- *
- * <strong>Warning</strong>: Extensions are part of the
- * collections
- * legacy API and may vanish without warning.
- *
- * @param extension
- * The extension to add to the collection.
- * @return The {@link KrillCollectionLegacy} object for chaining.
- */
- public KrillCollectionLegacy extend (BooleanFilter extension) {
- if (DEBUG)
- log.trace("Added extension: {}", extension.toString());
-
- this.filter.add(new FilterOperation((Filter) new QueryWrapperFilter(
- extension.toQuery()), true));
- this.filterCount++;
- return this;
- };
-
-
- /**
- * Add an extension by means of a {@link CollectionBuilderLegacy}
- * object.
- *
- * <strong>Warning</strong>: Extensions are part of the
- * collections
- * legacy API and may vanish without warning.
- *
- * @param extension
- * The extension to add to the collection.
- * @return The {@link KrillCollectionLegacy} object for chaining.
- */
- public KrillCollectionLegacy extend (CollectionBuilderLegacy extension) {
- return this.extend(extension.getBooleanFilter());
- };
-
-
- /**
- * Add a filter based on a list of unique document identifiers.
- * UIDs may be indexed in the field "UID".
- *
- * This filter is not part of the legacy API!
- *
- * @param uids
- * The list of unique document identifier.
- * @return The {@link KrillCollectionLegacy} object for chaining.
- */
- public KrillCollectionLegacy filterUIDs (String ... uids) {
- BooleanFilter filter = new BooleanFilter();
- filter.or("UID", uids);
- if (DEBUG)
- log.debug("UID based filter: {}", filter.toString());
- return this.filter(filter);
- };
-
-
- /**
- * Get the list of filters constructing the collection.
- *
- * <strong>Warning</strong>: This is part of the collections
- * legacy API and may vanish without warning.
- *
- * @return The list of filters.
- */
- public List<FilterOperation> getFilters () {
- return this.filter;
- };
-
-
- /**
- * Get a certain {@link FilterOperation} from the list of filters
- * constructing the collection by its numerical index.
- *
- * <strong>Warning</strong>: This is part of the collections
- * legacy API and may vanish without warning.
- *
- * @param index
- * The index position of the requested
- * {@link FilterOperation}.
- * @return The {@link FilterOperation} at the certain list
- * position.
- */
- public FilterOperation getFilter (int index) {
- return this.filter.get(index);
- };
-
-
- /**
- * Get the number of filter operations constructing this
- * collection.
- *
- * <strong>Warning</strong>: This is part of the collections
- * legacy API and may vanish without warning.
- *
- * @return The number of filter operations constructing this
- * collection.
- */
- public int getCount () {
- return this.filterCount;
- };
-
-
- /**
- * Generate a string representatio of the virtual collection.
- *
- * <strong>Warning</strong>: This currently does not generate a
- * valid
- * KoralQuery string, so this may change in a future version.
- *
- * @return A string representation of the virtual collection.
- */
- public String toString () {
- StringBuilder sb = new StringBuilder();
- for (FilterOperation fo : this.filter) {
- sb.append(fo.toString()).append("; ");
- };
- return sb.toString();
- };
-
-
- /**
- * Return the associated KoralQuery collection object
- * as a {@link JsonNode}. This won't work,
- * if the object was build using a CollectionBuilderLegacy,
- * therefore it is limited to mirror a deserialized KoralQuery
- * object.
- *
- * @return The {@link JsonNode} representing the collection object
- * of a deserialized KoralQuery object.
- */
- public JsonNode toJsonNode () {
- return this.json;
- };
-
-
-
- /**
- * Search in the virtual collection.
- * This is mostly used for testing purposes
- * and <strong>is not recommended</strong>
- * as a common search API.
- *
- * Please use {@link KrillQuery#run} instead.
- *
- * @param query
- * a {@link SpanQuery} to apply on the
- * virtual collection.
- * @return A {@link Result} object representing the search's
- * result.
- */
- public Result search (SpanQuery query) {
- /*
-return this.index.search(this, query, 0, (short) 20, true, (short) 5,
- true, (short) 5);
- */
- return null;
- };
-
-
- /**
- * Create a bit vector representing the live documents of the
- * virtual collection to be used in searches.
- *
- * @param The
- * {@link AtomicReaderContext} to search in.
- * @return A bit vector representing the live documents of the
- * virtual collection.
- * @throws IOException
- */
- public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
- // TODO: Probably use Bits.MatchAllBits(int len)
- boolean noDoc = true;
- FixedBitSet bitset;
-
- // There are filters set
- if (this.filterCount > 0) {
- bitset = new FixedBitSet(atomic.reader().maxDoc());
-
- ArrayList<FilterOperation> filters = (ArrayList<FilterOperation>) this.filter
- .clone();
-
- FilterOperation kcInit = filters.remove(0);
- if (DEBUG)
- log.trace("FILTER: {}", kcInit);
-
- // Init vector
- DocIdSet docids = kcInit.filter.getDocIdSet(atomic, null);
-
- DocIdSetIterator filterIter = docids.iterator();
-
- // The filter has an effect
- if (filterIter != null) {
- if (DEBUG)
- log.trace("InitFilter has effect");
- bitset.or(filterIter);
- noDoc = false;
- };
-
- // Apply all filters sequentially
- for (FilterOperation kc : filters) {
- if (DEBUG)
- log.trace("FILTER: {}", kc);
-
- // TODO: BUG???
- docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null
- : bitset);
- filterIter = docids.iterator();
-
- if (filterIter == null) {
- // There must be a better way ...
- if (kc.isFilter()) {
- // TODO: Check if this is really correct!
- // Maybe here is the bug
- bitset.clear(0, bitset.length());
- noDoc = true;
- };
- continue;
- };
- if (kc.isExtension())
- bitset.or(filterIter);
- else
- bitset.and(filterIter);
- };
-
- if (!noDoc) {
- FixedBitSet livedocs = (FixedBitSet) atomic.reader()
- .getLiveDocs();
- if (livedocs != null)
- bitset.and(livedocs);
- };
- }
- else {
- bitset = (FixedBitSet) atomic.reader().getLiveDocs();
- };
-
- return bitset;
- };
-
-
- /**
- * Search for the number of occurrences of different types,
- * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
- * collection.
- *
- * @param field
- * The field containing the textual data and the
- * annotations as a string.
- * @param type
- * The type of meta information,
- * e.g. <i>documents</i> or <i>sentences</i> as a
- * string.
- * @return The number of the occurrences.
- * @throws IOException
- * @see KrillIndex#numberOf
- */
- public long numberOf (String field, String type) throws IOException {
- if (this.index == null)
- return (long) -1;
-
- // return this.index.numberOf(this, field, type);
- return (long) 0;
- };
-
-
- /**
- * Search for the number of occurrences of different types,
- * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
- * collection, in the <i>base</i> foundry.
- *
- * @param type
- * The type of meta information,
- * e.g. <i>documents</i> or <i>sentences</i> as a
- * string.
- * @return The number of the occurrences.
- * @throws IOException
- * @see KrillIndex#numberOf
- */
- public long numberOf (String type) throws IOException {
- if (this.index == null)
- return (long) -1;
-
- // return this.index.numberOf(this, "tokens", type);
- return (long) 0;
- };
-
-
- // Term relation API is not in use anymore
- /*
- @Deprecated
- public HashMap getTermRelation (String field) throws Exception {
- if (this.index == null) {
- HashMap<String, Long> map = new HashMap<>(1);
- map.put("-docs", (long) 0);
- return map;
- };
-
- return this.index.getTermRelation(this, field);
- };
- */
-
-
- // Term relation API is not in use anymore
- /*
- @Deprecated
- public String getTermRelationJSON (String field) throws IOException {
- ObjectMapper mapper = new ObjectMapper();
- StringWriter sw = new StringWriter();
- sw.append("{\"field\":");
- mapper.writeValue(sw, field);
- sw.append(",");
-
- try {
- HashMap<String, Long> map = this.getTermRelation(field);
-
- sw.append("\"documents\":");
- mapper.writeValue(sw, map.remove("-docs"));
- sw.append(",");
-
- String[] keys = map.keySet().toArray(new String[map.size()]);
-
- HashMap<String, Integer> setHash = new HashMap<>(20);
- ArrayList<HashMap<String, Long>> set = new ArrayList<>(20);
- ArrayList<Long[]> overlap = new ArrayList<>(100);
-
- int count = 0;
- for (String key : keys) {
- if (!key.startsWith("#__")) {
- HashMap<String, Long> simpleMap = new HashMap<>();
- simpleMap.put(key, map.remove(key));
- set.add(simpleMap);
- setHash.put(key, count++);
- };
- };
-
- keys = map.keySet().toArray(new String[map.size()]);
- for (String key : keys) {
- String[] comb = key.substring(3).split(":###:");
- Long[] l = new Long[3];
- l[0] = (long) setHash.get(comb[0]);
- l[1] = (long) setHash.get(comb[1]);
- l[2] = map.remove(key);
- overlap.add(l);
- };
-
- sw.append("\"sets\":");
- mapper.writeValue(sw, (Object) set);
- sw.append(",\"overlaps\":");
- mapper.writeValue(sw, (Object) overlap);
- sw.append(",\"error\":null");
- }
- catch (Exception e) {
- sw.append("\"error\":");
- mapper.writeValue(sw, e.getMessage());
- };
-
- sw.append("}");
- return sw.getBuffer().toString();
- };
- */
-
- // Get legacy field
- @Deprecated
- private static String _getFieldLegacy (JsonNode json) {
- if (!json.has("@field"))
- return (String) null;
-
- String field = json.get("@field").asText();
- return field.replaceFirst("koral:field#", "");
- };
-
-
- // Get legacy date
- @Deprecated
- private static String _getDateLegacy (JsonNode json, int index) {
- if (!json.has("operands"))
- return (String) null;
-
- if (!json.get("operands").has(index))
- return (String) null;
-
- JsonNode date = json.get("operands").get(index);
-
- if (!date.has("@type"))
- return (String) null;
-
- if (!date.get("@type").asText().equals("koral:date"))
- return (String) null;
-
- if (!date.has("@value"))
- return (String) null;
-
- return date.get("@value").asText();
- };
-};
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index c1c0c00..7dc25ee 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -218,8 +218,7 @@
analyzerPerField.put("keywords", new KeywordAnalyzer());
analyzerPerField.put("foundries", new KeywordAnalyzer());
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
- new TextAnalyzer(), analyzerPerField
- );
+ new TextAnalyzer(), analyzerPerField);
// Create configuration with base analyzer
this.config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);
@@ -430,9 +429,7 @@
if (field == null || term == null)
return false;
try {
- this.writer().deleteDocuments(
- new Term(field, term)
- );
+ this.writer().deleteDocuments(new Term(field, term));
if (++commitCounter > autoCommit) {
this.commit();
commitCounter = 0;
@@ -1011,7 +1008,7 @@
field);
HashSet<String> fields = (HashSet<String>) new Krill()
- .getMeta().getFields().clone();
+ .getMeta().getFields().clone();
fields.add(field);
@@ -1025,8 +1022,7 @@
match.populateDocument(doc, field, fields);
if (DEBUG)
log.trace("The document has the id '{}' or the sigle '{}'",
- match.getDocID(),
- match.getTextSigle());
+ match.getDocID(), match.getTextSigle());
// Todo:
SearchContext context = match.getContext();
@@ -1034,7 +1030,8 @@
// Search for minimal surrounding sentences
if (extendToSentence) {
- String element = (match.getTextSigle() == null ? "s" : "base/s:s");
+ String element = (match.getTextSigle() == null ? "s"
+ : "base/s:s");
// SUPPORT FOR LEGACY ANNOTATIONS
int[] spanContext = match.expandContextToSpan(element);
@@ -1043,8 +1040,7 @@
log.trace("Extend to sentence element '{}'", element);
// </legacy>
- if (spanContext[0] >= 0 &&
- spanContext[0] < spanContext[1]) {
+ if (spanContext[0] >= 0 && spanContext[0] < spanContext[1]) {
match.setStartPos(spanContext[0]);
match.setEndPos(spanContext[1]);
match.startMore = false;
@@ -1222,7 +1218,7 @@
// Todo: Make kr subclassing ks - so ks has a method for a new Result!
Result kr = new Result(query.toString(), meta.getStartIndex(),
meta.getCount(), meta.getContext());
-
+
// Set version info to result
if (this.getVersion() != null)
kr.setVersion(this.getVersion());
@@ -1350,8 +1346,8 @@
int docID = atomic.docBase + localDocID;
// Do not load all of this, in case the doc is the same!
- Document doc = (fields != null) ? lreader.document(localDocID, fields) :
- lreader.document(localDocID);
+ Document doc = (fields != null) ? lreader.document(
+ localDocID, fields) : lreader.document(localDocID);
// Create new Match
Match match = new Match(pto, localDocID, spans.start(),
@@ -1451,6 +1447,7 @@
return kr;
};
+
public void getFields () {
/*
* Return a map of key, value pairs:
@@ -1460,10 +1457,12 @@
*/
};
+
public void getValues (String field) {
};
+
// Collect matches
public MatchCollector collect (Krill ks, MatchCollector mc) {
if (DEBUG)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
index 72fc389..07a3c3b 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
@@ -399,39 +399,39 @@
// Branch on operation
switch (operation) {
- case "operation:junction":
- return this._operationJunctionFromJson(operands);
+ case "operation:junction":
+ return this._operationJunctionFromJson(operands);
- case "operation:position":
- return this._operationPositionFromJson(json, operands);
+ case "operation:position":
+ return this._operationPositionFromJson(json, operands);
- case "operation:sequence":
- return this._operationSequenceFromJson(json, operands);
+ case "operation:sequence":
+ return this._operationSequenceFromJson(json, operands);
- case "operation:class":
- return this._operationClassFromJson(json, operands);
+ case "operation:class":
+ return this._operationClassFromJson(json, operands);
- case "operation:repetition":
- return this._operationRepetitionFromJson(json, operands);
+ case "operation:repetition":
+ return this._operationRepetitionFromJson(json, operands);
- case "operation:relation":
- if (!json.has("relation")) {
- throw new QueryException(717, "Missing relation node");
- }
+ case "operation:relation":
+ if (!json.has("relation")) {
+ throw new QueryException(717, "Missing relation node");
+ }
- return _operationRelationFromJson(operands,
- json.get("relation"));
- /*throw new QueryException(765,
- "Relations are currently not supported");*/
+ return _operationRelationFromJson(operands,
+ json.get("relation"));
+ /*throw new QueryException(765,
+ "Relations are currently not supported");*/
- case "operation:or": // Deprecated in favor of operation:junction
- return this._operationJunctionFromJson(operands);
- /*
- case "operation:submatch": // Deprecated in favor of koral:reference
- return this._operationSubmatchFromJson(json, operands);
- */
- case "operation:disjunction":
- return this._operationJunctionFromJson(operands);
+ case "operation:or": // Deprecated in favor of operation:junction
+ return this._operationJunctionFromJson(operands);
+ /*
+ case "operation:submatch": // Deprecated in favor of koral:reference
+ return this._operationSubmatchFromJson(json, operands);
+ */
+ case "operation:disjunction":
+ return this._operationJunctionFromJson(operands);
};
// Unknown
diff --git a/src/main/java/de/ids_mannheim/korap/collection/BooleanFilter.java b/src/main/java/de/ids_mannheim/korap/collection/BooleanFilter.java
deleted file mode 100644
index 19693c6..0000000
--- a/src/main/java/de/ids_mannheim/korap/collection/BooleanFilter.java
+++ /dev/null
@@ -1,229 +0,0 @@
-package de.ids_mannheim.korap.collection;
-
-import java.util.*;
-
-import org.apache.lucene.index.Term;
-
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.RegexpQuery;
-import org.apache.lucene.search.NumericRangeQuery;
-
-import de.ids_mannheim.korap.util.KrillDate;
-import de.ids_mannheim.korap.KrillCollection;
-import de.ids_mannheim.korap.util.QueryException;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
-/*
- THIS IS LIMITED TO PUBDATE AT THE MOMENT AND COMPLETELY LEGACY!
-*/
-
-/**
- * @author Nils Diewald
- *
- * BooleanFilter implements a simple API for boolean
- * operations
- * on constraints for KorapFilter.
- */
-public class BooleanFilter {
- private String type;
-
- // Logger
- private final static Logger log = LoggerFactory
- .getLogger(KrillCollection.class);
-
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
- private BooleanQuery bool;
- private String error;
-
-
- public BooleanFilter () {
- bool = new BooleanQuery();
- };
-
-
- public BooleanFilter or (String type, String ... terms) {
- for (String term : terms) {
-
- if (DEBUG)
- log.trace("Filter: OR {}={}", type, term);
-
- bool.add(new TermQuery(new Term(type, term)),
- BooleanClause.Occur.SHOULD);
- };
- return this;
- };
-
-
- public BooleanFilter or (String type, RegexFilter value) {
- bool.add(value.toQuery(type), BooleanClause.Occur.SHOULD);
- return this;
- };
-
-
- public BooleanFilter or (BooleanFilter bf) {
- if (bf.bool.clauses().size() == 1) {
- BooleanClause bc = bf.bool.getClauses()[0];
- bc.setOccur(BooleanClause.Occur.SHOULD);
- bool.add(bc);
- return this;
- }
- bool.add(bf.toQuery(), BooleanClause.Occur.SHOULD);
- return this;
- };
-
-
- public BooleanFilter or (NumericRangeQuery<Integer> nrq) {
- bool.add(nrq, BooleanClause.Occur.SHOULD);
- return this;
- };
-
-
- public BooleanFilter and (String type, String ... terms) {
- for (String term : terms) {
- bool.add(new TermQuery(new Term(type, term)),
- BooleanClause.Occur.MUST);
- };
- return this;
- };
-
-
- public BooleanFilter and (String type, RegexFilter value) {
- bool.add(value.toQuery(type), BooleanClause.Occur.MUST);
- return this;
- };
-
-
- public BooleanFilter and (BooleanFilter bf) {
- if (bf.bool.clauses().size() == 1) {
- BooleanClause bc = bf.bool.getClauses()[0];
- bc.setOccur(BooleanClause.Occur.MUST);
- bool.add(bc);
- return this;
- }
- bool.add(bf.toQuery(), BooleanClause.Occur.MUST);
- return this;
- };
-
-
- public BooleanFilter andNot (String type, String ... terms) {
- for (String term : terms) {
- bool.add(new TermQuery(new Term(type, term)),
- BooleanClause.Occur.MUST_NOT);
- };
- return this;
- };
-
-
- public BooleanFilter andNot (String type, RegexFilter value) {
- bool.add(value.toQuery(type), BooleanClause.Occur.MUST_NOT);
- return this;
- };
-
-
- public BooleanFilter andNot (BooleanFilter bf) {
- if (bf.bool.clauses().size() == 1) {
- BooleanClause bc = bf.bool.getClauses()[0];
- bc.setOccur(BooleanClause.Occur.MUST_NOT);
- bool.add(bc);
- return this;
- }
- bool.add(bf.toQuery(), BooleanClause.Occur.MUST_NOT);
- return this;
- };
-
-
- public BooleanFilter since (String dateStr) {
- int since = new KrillDate(dateStr).floor();
-
- if (since == 0 || since == KrillDate.BEGINNING)
- return this;
-
- bool.add(NumericRangeQuery.newIntRange("pubDate", since, KrillDate.END,
- true, true), BooleanClause.Occur.MUST);
-
- return this;
- };
-
-
- public BooleanFilter till (String dateStr) {
- try {
- int till = new KrillDate(dateStr).ceil();
- if (till == 0 || till == KrillDate.END)
- return this;
-
- bool.add(NumericRangeQuery.newIntRange("pubDate",
- KrillDate.BEGINNING, till, true, true),
- BooleanClause.Occur.MUST);
- }
- catch (NumberFormatException e) {
- log.warn("Parameter of till(date) is invalid");
- };
- return this;
- };
-
-
- public BooleanFilter between (String beginStr, String endStr) {
- KrillDate beginDF = new KrillDate(beginStr);
-
- int begin = beginDF.floor();
-
- int end = new KrillDate(endStr).ceil();
-
- if (end == 0)
- return this;
-
- if (begin == KrillDate.BEGINNING && end == KrillDate.END)
- return this;
-
- if (begin == end) {
- this.and("pubDate", beginDF.toString());
- return this;
- };
-
- this.bool.add(NumericRangeQuery.newIntRange("pubDate", begin, end,
- true, true), BooleanClause.Occur.MUST);
- return this;
- };
-
-
- public BooleanFilter date (String dateStr) {
- KrillDate dateDF = new KrillDate(dateStr);
-
- if (dateDF.year == 0)
- return this;
-
- if (dateDF.day == 0 || dateDF.month == 0) {
- int begin = dateDF.floor();
- int end = dateDF.ceil();
-
- if (end == 0
- || (begin == KrillDate.BEGINNING && end == KrillDate.END))
- return this;
-
- this.bool.add(NumericRangeQuery.newIntRange("pubDate", begin, end,
- true, true), BooleanClause.Occur.MUST);
- return this;
- };
-
- this.and("pubDate", dateDF.toString());
- return this;
- };
-
-
- public Query toQuery () {
- return this.bool;
- };
-
-
- public String toString () {
- return this.bool.toString();
- };
-};
diff --git a/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java b/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
index 60c4ebb..53b0b66 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
@@ -20,17 +20,19 @@
/**
* A container Filter that allows Boolean composition of Filters
* in groups (either or-groups or and-groups).
- *
+ *
* @author Nils Diewald
- *
- * This filter is roughly based on org.apache.lucene.queries.BooleanFilter.
+ *
+ * This filter is roughly based on
+ * org.apache.lucene.queries.BooleanFilter.
*/
public class BooleanGroupFilter extends Filter {
// Group is either an or- or an and-Group
private boolean isOptional;
// Logger
- private final static Logger log = LoggerFactory.getLogger(KrillCollection.class);
+ private final static Logger log = LoggerFactory
+ .getLogger(KrillCollection.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
@@ -43,6 +45,7 @@
public Filter filter;
public boolean isNegative;
+
// Operand has filter and negativity information
public GroupFilterOperand (Filter filter, boolean negative) {
this.filter = filter;
@@ -50,6 +53,7 @@
};
};
+
/**
* Create a new BooleanGroupFilter.
* Accepts a boolean parameter to make it an or-Group
@@ -82,7 +86,7 @@
public boolean equals (Object obj) {
if (this == obj)
return true;
-
+
if ((obj == null) || (obj.getClass() != this.getClass()))
return false;
@@ -92,16 +96,15 @@
@Override
- public int hashCode() {
+ public int hashCode () {
return 657153719 ^ operands.hashCode();
};
-
+
@Override
public String toString () {
- StringBuilder buffer = new StringBuilder(
- this.isOptional ? "OrGroup(" : "AndGroup("
- );
+ StringBuilder buffer = new StringBuilder(this.isOptional ? "OrGroup("
+ : "AndGroup(");
boolean first = true;
for (final GroupFilterOperand operand : this.operands) {
if (first)
@@ -117,12 +120,13 @@
return buffer.append(')').toString();
};
-
+
@Override
- public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs)
+ throws IOException {
final AtomicReader reader = context.reader();
int maxDoc = reader.maxDoc();
- FixedBitSet bitset = new FixedBitSet(maxDoc);
+ FixedBitSet bitset = new FixedBitSet(maxDoc);
FixedBitSet combinator = new FixedBitSet(maxDoc);
boolean init = true;
@@ -131,17 +135,18 @@
for (final GroupFilterOperand operand : this.operands) {
final DocIdSet docids = operand.filter.getDocIdSet(context, null);
- final DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
+ final DocIdSetIterator filterIter = (docids == null) ? null
+ : docids.iterator();
if (DEBUG)
log.debug("> Filter to bitset of {} ({} negative)",
- operand.filter.toString(),
- operand.isNegative);
+ operand.filter.toString(), operand.isNegative);
// Filter resulted in no docs
if (filterIter == null) {
- if (DEBUG) log.debug("- Filter is null");
+ if (DEBUG)
+ log.debug("- Filter is null");
// Filter matches
if (operand.isNegative) {
@@ -150,25 +155,29 @@
if (this.isOptional) {
// Everything is allowed
- if (DEBUG) log.debug("- Filter to allow all documents");
+ if (DEBUG)
+ log.debug("- Filter to allow all documents");
bitset.set(0, maxDoc);
return BitsFilteredDocIdSet.wrap(bitset, acceptDocs);
};
// There is no possible match
- if (DEBUG) log.debug("- Filter to allow no documents (1)");
+ if (DEBUG)
+ log.debug("- Filter to allow no documents (1)");
return null;
}
// The result is unimportant
else if (this.isOptional) {
- if (DEBUG) log.debug("- Filter is ignorable");
+ if (DEBUG)
+ log.debug("- Filter is ignorable");
continue;
};
// There is no possible match
- if (DEBUG) log.debug("- Filter to allow no documents (2)");
+ if (DEBUG)
+ log.debug("- Filter to allow no documents (2)");
return null;
}
@@ -177,30 +186,40 @@
bitset.or(filterIter);
- if (DEBUG) log.debug("- Filter is inial with card {}", bitset.cardinality());
+ if (DEBUG)
+ log.debug("- Filter is inial with card {}",
+ bitset.cardinality());
// Flip the matching documents
if (operand.isNegative) {
bitset.flip(0, maxDoc);
- if (DEBUG) log.debug("- Filter is negative - so flipped to card {} (1)", bitset.cardinality());
+ if (DEBUG)
+ log.debug(
+ "- Filter is negative - so flipped to card {} (1)",
+ bitset.cardinality());
};
init = false;
}
else {
- if (DEBUG) log.debug("- Filter is fine and operating");
+ if (DEBUG)
+ log.debug("- Filter is fine and operating");
// Operator is negative and needs to be flipped
if (operand.isNegative) {
if (this.isOptional) {
- if (DEBUG) log.debug("- Filter is negative optional");
+ if (DEBUG)
+ log.debug("- Filter is negative optional");
// Negative or ... may be slow
combinator.or(filterIter);
combinator.flip(0, maxDoc);
- if (DEBUG) log.debug("- Filter is negative - so flipped to card {} (2)", combinator.cardinality());
+ if (DEBUG)
+ log.debug(
+ "- Filter is negative - so flipped to card {} (2)",
+ combinator.cardinality());
bitset.or(combinator);
combinator.clear(0, maxDoc);
@@ -208,22 +227,27 @@
// Negative and
else {
- if (DEBUG) log.debug("- Filter is negative not optional");
+ if (DEBUG)
+ log.debug("- Filter is negative not optional");
bitset.andNot(filterIter);
- if (DEBUG) log.debug("- Filter is negative - so andNotted");
+ if (DEBUG)
+ log.debug("- Filter is negative - so andNotted");
}
}
else if (this.isOptional) {
- if (DEBUG) log.debug("- Filter is simply optional");
+ if (DEBUG)
+ log.debug("- Filter is simply optional");
bitset.or(filterIter);
}
else {
- if (DEBUG) log.debug("- Filter is simply not optional");
+ if (DEBUG)
+ log.debug("- Filter is simply not optional");
bitset.and(filterIter);
// TODO: Check with nextSetBit() if the filter is not applicable
};
- if (DEBUG) log.debug("- Subresult has card {} ", bitset.cardinality());
+ if (DEBUG)
+ log.debug("- Subresult has card {} ", bitset.cardinality());
};
};
return BitsFilteredDocIdSet.wrap(bitset, acceptDocs);
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
index 836aaa1..350d852 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
@@ -25,19 +25,23 @@
public class CollectionBuilder {
// Logger
- private final static Logger log = LoggerFactory.getLogger(KrillCollection.class);
+ private final static Logger log = LoggerFactory
+ .getLogger(KrillCollection.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
+
public CollectionBuilder.Interface term (String field, String term) {
return new CollectionBuilder.Term(field, term);
};
+
public CollectionBuilder.Interface re (String field, String term) {
return new CollectionBuilder.Term(field, term, true);
};
+
public CollectionBuilder.Interface since (String field, String date) {
int since = new KrillDate(date).floor();
@@ -47,6 +51,7 @@
return new CollectionBuilder.Range(field, since, KrillDate.END);
};
+
public CollectionBuilder.Interface till (String field, String date) {
try {
int till = new KrillDate(date).ceil();
@@ -61,8 +66,10 @@
return null;
};
+
// This will be optimized away in future versions
- public CollectionBuilder.Interface between (String field, String start, String end) {
+ public CollectionBuilder.Interface between (String field, String start,
+ String end) {
CollectionBuilder.Interface startObj = this.since(field, start);
if (startObj == null)
return null;
@@ -74,6 +81,7 @@
return this.andGroup().with(startObj).with(endObj);
};
+
public CollectionBuilder.Interface date (String field, String date) {
KrillDate dateDF = new KrillDate(date);
@@ -85,7 +93,7 @@
int end = dateDF.ceil();
if (end == 0
- || (begin == KrillDate.BEGINNING && end == KrillDate.END))
+ || (begin == KrillDate.BEGINNING && end == KrillDate.END))
return null;
return new CollectionBuilder.Range(field, begin, end);
@@ -94,18 +102,26 @@
return new CollectionBuilder.Range(field, dateDF.floor(), dateDF.ceil());
};
+
public CollectionBuilder.Group andGroup () {
return new CollectionBuilder.Group(false);
};
+
public CollectionBuilder.Group orGroup () {
return new CollectionBuilder.Group(true);
};
public interface Interface {
public String toString ();
+
+
public Filter toFilter ();
+
+
public boolean isNegative ();
+
+
public CollectionBuilder.Interface not ();
};
@@ -115,28 +131,33 @@
private String field;
private String term;
+
public Term (String field, String term) {
this.field = field;
this.term = term;
};
+
public Term (String field, String term, boolean regex) {
this.field = field;
this.term = term;
this.regex = regex;
};
+
public Filter toFilter () {
// Regular expression
if (this.regex)
return new QueryWrapperFilter(
- new RegexpQuery(new org.apache.lucene.index.Term(this.field, this.term))
- );
-
+ new RegexpQuery(new org.apache.lucene.index.Term(
+ this.field, this.term)));
+
// Simple term
- return new TermsFilter(new org.apache.lucene.index.Term(this.field, this.term));
+ return new TermsFilter(new org.apache.lucene.index.Term(this.field,
+ this.term));
};
+
public String toString () {
Filter filter = this.toFilter();
if (filter == null)
@@ -144,6 +165,7 @@
return filter.toString();
};
+
public boolean isNegative () {
return this.isNegative;
};
@@ -159,21 +181,25 @@
private boolean isOptional = false;
private boolean isNegative = true;
+
public boolean isNegative () {
return this.isNegative;
};
+
public boolean isOptional () {
return this.isOptional;
};
private ArrayList<CollectionBuilder.Interface> operands;
+
public Group (boolean optional) {
this.isOptional = optional;
this.operands = new ArrayList<CollectionBuilder.Interface>(3);
};
+
public Group with (CollectionBuilder.Interface cb) {
if (cb == null)
return this;
@@ -184,12 +210,14 @@
return this;
};
+
public Group with (String field, String term) {
if (field == null || term == null)
return this;
return this.with(new CollectionBuilder.Term(field, term));
};
+
public Filter toFilter () {
if (this.operands == null || this.operands.isEmpty())
return null;
@@ -214,6 +242,7 @@
return bool;
};
+
public String toString () {
Filter filter = this.toFilter();
if (filter == null)
@@ -221,6 +250,7 @@
return filter.toString();
};
+
public CollectionBuilder.Interface not () {
this.isNegative = true;
return this;
@@ -232,16 +262,19 @@
private String field;
private int start, end;
+
public Range (String field, int start, int end) {
this.field = field;
this.start = start;
this.end = end;
};
+
public boolean isNegative () {
return this.isNegative;
};
+
public String toString () {
Filter filter = this.toFilter();
if (filter == null)
@@ -249,14 +282,13 @@
return filter.toString();
};
+
public Filter toFilter () {
- return NumericRangeFilter.newIntRange(this.field,
- this.start,
- this.end,
- true,
- true);
+ return NumericRangeFilter.newIntRange(this.field, this.start,
+ this.end, true, true);
};
+
public CollectionBuilder.Interface not () {
this.isNegative = true;
return this;
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderLegacy.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderLegacy.java
deleted file mode 100644
index 37fa50f..0000000
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderLegacy.java
+++ /dev/null
@@ -1,128 +0,0 @@
-package de.ids_mannheim.korap.collection;
-
-import de.ids_mannheim.korap.collection.BooleanFilter;
-import de.ids_mannheim.korap.collection.RegexFilter;
-import de.ids_mannheim.korap.util.QueryException;
-import de.ids_mannheim.korap.util.KrillDate;
-
-import org.apache.lucene.search.Query;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.JsonNode;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * CollectionBuilderLegacy implements a simple API for creating queries
- * constituing Virtual Collections.
- *
- * <strong>Warning</strong>: The API is likely to change.
- *
- * @author diewald
- */
-/*
- * Todo: WildCardFilter!
- * Todo: Support delete boolean etc.
- * Todo: Supports foundries
- */
-public class CollectionBuilderLegacy {
- private BooleanFilter filter;
- private String field = "tokens";
-
- // Logger
- private final static Logger log = LoggerFactory
- .getLogger(CollectionBuilder.class);
-
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
-
- /**
- * Construct a new CollectionBuilderLegacy object.
- */
- public CollectionBuilderLegacy () {
- filter = new BooleanFilter();
- };
-
-
- public BooleanFilter and (String type, String ... terms) {
- BooleanFilter bf = new BooleanFilter();
- bf.and(type, terms);
- return bf;
- };
-
-
- public BooleanFilter or (String type, String ... terms) {
- BooleanFilter bf = new BooleanFilter();
- bf.or(type, terms);
- return bf;
- };
-
-
- public BooleanFilter and (String type, RegexFilter re) {
- BooleanFilter bf = new BooleanFilter();
- bf.and(type, re);
- return bf;
- };
-
-
- public BooleanFilter or (String type, RegexFilter re) {
- BooleanFilter bf = new BooleanFilter();
- bf.or(type, re);
- return bf;
- };
-
-
- public BooleanFilter since (String date) {
- BooleanFilter bf = new BooleanFilter();
- bf.since(date);
- return bf;
- };
-
-
- public BooleanFilter till (String date) {
- BooleanFilter bf = new BooleanFilter();
- bf.till(date);
- return bf;
- };
-
-
- public BooleanFilter date (String date) {
- BooleanFilter bf = new BooleanFilter();
- bf.date(date);
- return bf;
- };
-
-
- public BooleanFilter between (String date1, String date2) {
- BooleanFilter bf = new BooleanFilter();
- bf.between(date1, date2);
- return bf;
- };
-
-
- public RegexFilter re (String regex) {
- return new RegexFilter(regex);
- };
-
-
- public BooleanFilter getBooleanFilter () {
- return this.filter;
- };
-
-
- public void setBooleanFilter (BooleanFilter bf) {
- this.filter = bf;
- };
-
-
- public Query toQuery () {
- return this.filter.toQuery();
- };
-
-
- public String toString () {
- return this.filter.toQuery().toString();
- };
-};
diff --git a/src/main/java/de/ids_mannheim/korap/collection/FilterOperation.java b/src/main/java/de/ids_mannheim/korap/collection/FilterOperation.java
deleted file mode 100644
index 5d3be07..0000000
--- a/src/main/java/de/ids_mannheim/korap/collection/FilterOperation.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package de.ids_mannheim.korap.collection;
-
-import org.apache.lucene.search.Filter;
-
-public class FilterOperation {
- private boolean extension;
- public Filter filter;
-
-
- public FilterOperation (Filter filter, boolean extension) {
- this.extension = extension;
- this.filter = filter;
- };
-
-
- public boolean isExtension () {
- return this.extension;
- };
-
-
- public boolean isFilter () {
- return !(this.extension);
- };
-
-
- @Override
- public Object clone () throws CloneNotSupportedException {
- return (Object) new FilterOperation(this.filter, this.extension);
- };
-
-
- @Override
- public String toString () {
- StringBuilder sb = new StringBuilder();
- if (this.extension) {
- sb.append("extend with ");
- }
- else {
- sb.append("filter with ");
- };
- sb.append(this.filter.toString());
- return sb.toString();
- };
-};
diff --git a/src/main/java/de/ids_mannheim/korap/collection/RegexFilter.java b/src/main/java/de/ids_mannheim/korap/collection/RegexFilter.java
deleted file mode 100644
index c948dab..0000000
--- a/src/main/java/de/ids_mannheim/korap/collection/RegexFilter.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package de.ids_mannheim.korap.collection;
-
-import java.util.*;
-
-import org.apache.lucene.search.RegexpQuery;
-import org.apache.lucene.index.Term;
-
-/**
- * @author Nils Diewald
- *
- * RegexFilter implements a helper object for
- * regular expressions used in KrillCollection
- * constraints.
- */
-
-public class RegexFilter {
- String regex;
-
-
- public RegexFilter (String regex) {
- this.regex = regex;
- };
-
-
- public RegexpQuery toQuery (String field) {
- return new RegexpQuery(new Term(field, this.regex));
- };
-};
diff --git a/src/main/java/de/ids_mannheim/korap/index/KeywordAnalyzer.java b/src/main/java/de/ids_mannheim/korap/index/KeywordAnalyzer.java
index e20af6f..0725778 100644
--- a/src/main/java/de/ids_mannheim/korap/index/KeywordAnalyzer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/KeywordAnalyzer.java
@@ -10,10 +10,11 @@
public class KeywordAnalyzer extends Analyzer {
- @Override
- protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
- final Tokenizer source = new WhitespaceTokenizer(reader);
- TokenStream sink = new LowerCaseFilter(source);
- return new TokenStreamComponents(source, sink);
- };
+ @Override
+ protected TokenStreamComponents createComponents (final String fieldName,
+ final Reader reader) {
+ final Tokenizer source = new WhitespaceTokenizer(reader);
+ TokenStream sink = new LowerCaseFilter(source);
+ return new TokenStreamComponents(source, sink);
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java b/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java
index bca39b5..771c6c1 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java
@@ -10,10 +10,11 @@
public class TextAnalyzer extends Analyzer {
- @Override
- protected TokenStreamComponents createComponents (final String fieldName, final Reader reader) {
- final Tokenizer source = new StandardTokenizer(reader);
- TokenStream sink = new LowerCaseFilter(source);
- return new TokenStreamComponents(source, sink);
- };
+ @Override
+ protected TokenStreamComponents createComponents (final String fieldName,
+ final Reader reader) {
+ final Tokenizer source = new StandardTokenizer(reader);
+ TokenStream sink = new LowerCaseFilter(source);
+ return new TokenStreamComponents(source, sink);
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index b324ffc..617f07a 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -374,7 +374,7 @@
this.addHighlight(new Highlight(target, target, id));
};
-
+
/**
* Populate document meta information with information coming from
* the index.
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/DocIdentifier.java b/src/main/java/de/ids_mannheim/korap/response/match/DocIdentifier.java
index b384222..53280c7 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/DocIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/DocIdentifier.java
@@ -8,8 +8,8 @@
public class DocIdentifier {
protected String textSigle, // fine
- corpusID, // LEGACY
- docID; // LEGACY
+ corpusID, // LEGACY
+ docID; // LEGACY
// Legacy
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
index f9fdfbe..7ebf161 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
@@ -9,11 +9,9 @@
private ArrayList<int[]> pos = new ArrayList<>(8);
// TODO: "contains" is necessary for a compatibility bug in Kustvakt
- Pattern idRegex = Pattern
- .compile("^(?:match-|contains-)"
- + "(?:([^!]+?)[!\\.])?"
- + "([^!]+)-p([0-9]+)-([0-9]+)"
- + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)" + "(?:c.+?)?$");
+ Pattern idRegex = Pattern.compile("^(?:match-|contains-)"
+ + "(?:([^!]+?)[!\\.])?" + "([^!]+)-p([0-9]+)-([0-9]+)"
+ + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)" + "(?:c.+?)?$");
Pattern posRegex = Pattern.compile("\\(([0-9]+)\\)([0-9]+)-([0-9]+)");