Fixed KorapResponse deserialization and collection extension bug
diff --git a/src/main/java/de/ids_mannheim/korap/KorapCollection.java b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
index e7b8d02..1e0d173 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
@@ -31,7 +31,7 @@
* It works - so I got that going for
* me, which is nice.
*
- * @author Nils Diewald
+ * @author diewald
*/
@@ -292,48 +292,40 @@
noDoc = false;
};
+ for (FilterOperation kc : filters) {
+ if (DEBUG)
+ log.trace("FILTER: {}", kc);
+
+ // TODO: BUG???
+ docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null : bitset);
+ filterIter = docids.iterator();
+
+ if (filterIter == null) {
+ // There must be a better way ...
+ if (kc.isFilter()) {
+ // TODO: Check if this is really correct!
+ // Maybe here is the bug
+ bitset.clear(0, bitset.length());
+ noDoc = true;
+ };
+ continue;
+ };
+ if (kc.isExtension()) {
+ // System.err.println("Term found!");
+ // System.err.println("Old Card:" + bitset.cardinality());
+ bitset.or(filterIter);
+ // System.err.println("New Card:" + bitset.cardinality());
+ }
+ else {
+ bitset.and(filterIter);
+ };
+ };
+
if (!noDoc) {
- for (FilterOperation kc : filters) {
- if (DEBUG)
- log.trace("FILTER: {}", kc);
-
- // TODO: BUG!!!!!!!!!!
- docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null : bitset);
- filterIter = docids.iterator();
-
- if (filterIter == null) {
- // There must be a better way ...
- if (kc.isFilter()) {
- // TODO: Check if this is really correct!
- // Maybe here is the bug
- bitset.clear(0, bitset.length());
- noDoc = true;
- }
- else {
- // System.err.println("No term found");
- };
- continue;
- };
- if (kc.isExtension()) {
- // System.err.println("Term found!");
- // System.err.println("Old Card:" + bitset.cardinality());
- bitset.or(filterIter);
- // System.err.println("New Card:" + bitset.cardinality());
- }
- else {
- bitset.and(filterIter);
- };
+ FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs();
+ if (livedocs != null) {
+ bitset.and(livedocs);
};
-
- if (!noDoc) {
- FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs();
- if (livedocs != null) {
- bitset.and(livedocs);
- };
- };
- }
- else {
- return bitset;
};
}
else {
@@ -353,7 +345,7 @@
public long numberOf (String type) throws IOException {
if (this.index == null)
return (long) -1;
-
+
return this.index.numberOf(this, "tokens", type);
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 9781476..17ffb03 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -1266,12 +1266,18 @@
// Do not load all of this, in case the doc is the same!
Document doc = lreader.document(localDocID, fields);
- KorapMatch match = kr.addMatch(
+
+ // Create new KorapMatch
+ KorapMatch match = new KorapMatch(
pto,
localDocID,
spans.start(),
spans.end()
);
+ match.setContext(kr.getContext());
+
+ // Add match to KorapResult
+ kr.add(match);
if (spans.isPayloadAvailable())
match.addPayload((List<byte[]>) spans.getPayload());
diff --git a/src/main/java/de/ids_mannheim/korap/KorapResult.java b/src/main/java/de/ids_mannheim/korap/KorapResult.java
index 6b5516e..524f54f 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapResult.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapResult.java
@@ -19,42 +19,56 @@
import java.util.List;
/*
-TODO: Reuse the KorapSearch code for data serialization!
+ TODO: Reuse the KorapSearch code for data serialization!
*/
-
+/**
+ * Response class for search results.
+ *
+ * @author diewald
+ * @see KorapResponse
+ */
@JsonInclude(Include.NON_NULL)
@JsonIgnoreProperties(ignoreUnknown = true)
public class KorapResult extends KorapResponse {
ObjectMapper mapper = new ObjectMapper();
@JsonIgnore
- public static final short ITEMS_PER_PAGE = 25;
+ public static final short ITEMS_PER_PAGE = 25;
+ public static final short ITEMS_PER_PAGE_MAX = 100;
private int startIndex = 0;
- private long totalTexts, totalResults;
-
private String query;
private List<KorapMatch> matches;
-
private SearchContext context;
private short itemsPerPage = ITEMS_PER_PAGE,
- itemsPerResource = 0;
+ itemsPerResource = 0;
private JsonNode request;
-
// Logger
// This is KorapMatch instead of KorapResult!
private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
- // Empty result
+
+ /**
+ * Construct a new KorapResult object.
+ */
public KorapResult() {
mapper.enable(SerializationFeature.INDENT_OUTPUT);
};
+
+ /**
+ * Construct a new KorapResult object.
+ *
+ * @param query Query representation as a string.
+ * @param startIndex Offset position in match array.
+ * @param itemsPerPage Number of matches per page.
+ * @param context Requested {@link SearchContext}
+ */
public KorapResult(String query,
int startIndex,
short itemsPerPage,
@@ -67,71 +81,66 @@
this.matches = new ArrayList<>(itemsPerPage);
this.query = query;
this.startIndex = startIndex;
- this.itemsPerPage = (itemsPerPage > 50 || itemsPerPage < 1) ?
- ITEMS_PER_PAGE : itemsPerPage;
+ this.itemsPerPage =
+ (itemsPerPage > ITEMS_PER_PAGE_MAX || itemsPerPage < 1) ?
+ ITEMS_PER_PAGE : itemsPerPage;
this.context = context;
};
+ /**
+ * Add a new match to the result set.
+ *
+ * @param match A {@link KorapMatch} to add.
+ */
public void add (KorapMatch km) {
this.matches.add(km);
};
- public KorapMatch addMatch (PositionsToOffset pto,
- int localDocID,
- int startPos,
- int endPos) {
- KorapMatch km = new KorapMatch(pto, localDocID, startPos, endPos);
- // Temporary - should use the same interface like results
- // in the future:
- km.setContext(this.context);
- this.add(km);
- return km;
- };
-
- public short getItemsPerPage() {
+ /**
+ * Get number of items shown per page.
+ *
+ * @return Number of items shown per page.
+ */
+ public short getItemsPerPage () {
return this.itemsPerPage;
};
- public void setRequest(JsonNode request) {
- this.request = request;
+
+ /**
+ * Set number of items shown per page.
+ *
+ * @param count Number of items shown per page.
+ * @return {@link KorapResult} object for chaining.
+ */
+ public KorapResult setItemsPerPage (short count) {
+ this.itemsPerPage = count;
+ return this;
};
- public JsonNode getRequest() {
+
+ /**
+ * Get serialized query as a {@link JsonNode}.
+ *
+ * @return {@link JsonNode} representation of the query object.
+ */
+ public JsonNode getRequest () {
return this.request;
};
- // Make this working in a KorapResult class
- // that is independent from search and collection
- public KorapResult setTotalTexts (long i) {
- this.totalTexts = i;
- return this;
+
+ /**
+ * Set serialized query as a {@link JsonNode}.
+ *
+ * @param request {@link JsonNode} representation of the query object.
+ * @return {@link KorapResult} object for chaining.
+ */
+ public KorapResult setRequest (JsonNode request) {
+ this.request = request;
+ return this;
};
- public KorapResult incrTotalTexts (int i) {
- this.totalTexts += i;
- return this;
- };
-
- public long getTotalTexts() {
- return this.totalTexts;
- };
-
-
- public KorapResult setTotalResults (long i) {
- this.totalResults = i;
- return this;
- };
-
- public KorapResult incrTotalResults (int i) {
- this.totalResults += i;
- return this;
- };
-
- public long getTotalResults() {
- return this.totalResults;
- };
@JsonIgnore
public void setItemsPerResource (short value) {
diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index bc5d205..08c2da1 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
@@ -15,29 +15,33 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
-// Todo: Use configuration file
-
/*
- Todo: Let this class extend KorapResult!
- KorapResult = new KorapSearch(String json).run(KorapIndex ki);
-*/
+ * Todo: Use configuration file
+ * Todo: Let this class extend KorapResult!
+ * KorapResult = new KorapSearch(String json).run(KorapIndex ki);
+ * Todo: Set timeout default value per config file
+ */
/**
- * @author Nils Diewald
+ * KorapSearch is the central class for parameterized searches
+ * in the index, including the query, the collection,
+ * and result parameters.
*
- * KorapSearch implements an object for all search relevant parameters.
+ * @author diewald
+ *
*/
public class KorapSearch extends Notifications {
- private int startIndex = 0,
- limit = 0;
- private short count = 25,
- countMax = 50;
+ private int
+ startIndex = 0,
+ limit = 0;
+ private short
+ count = 25,
+ countMax = 50;
private boolean cutOff = false;
private short itemsPerResource = 0;
private SpanQuery query;
private KorapCollection collection;
private KorapIndex index;
- // private String error, warning;
// Timeout search after milliseconds
private long timeout = (long) 120_000;
@@ -52,135 +56,143 @@
private long timeoutStart = Long.MIN_VALUE;
{
- context = new SearchContext();
+ context = new SearchContext();
- // Lift legacy fields per default
- fields = new HashSet<String>(16);
- for (String field : new String[]{
- "ID",
- "UID",
- "textSigle",
- "corpusID",
- "author",
- "title",
- "subTitle",
- "textClass",
- "pubPlace",
- "pubDate",
- "foundries",
- "layerInfo",
- "tokenization"}) {
- fields.add(field);
- };
+ // Lift legacy fields per default
+ fields = new HashSet<String>(16);
+ for (String field : new String[]{
+ "ID",
+ "UID",
+ "textSigle",
+ "corpusID",
+ "author",
+ "title",
+ "subTitle",
+ "textClass",
+ "pubPlace",
+ "pubDate",
+ "foundries",
+ "layerInfo",
+ "tokenization"}) {
+ fields.add(field);
+ };
};
public KorapSearch (String jsonString) {
- ObjectMapper mapper = new ObjectMapper();
+ ObjectMapper mapper = new ObjectMapper();
- try {
- // Todo - use correct method!
- this.request = mapper.readTree(jsonString);
+ try {
+ // Todo - use correct method!
+ this.request = mapper.readTree(jsonString);
+
+ // "query" value
+ if (this.request.has("query")) {
+ try {
+ KorapQuery kq = new KorapQuery("tokens");
+ SpanQueryWrapper qw = kq.fromJson(this.request.get("query"));
+
+ if (qw.isEmpty()) {
+
+ // Unable to process result
+ this.addError(780, "This query matches everywhere");
+ }
+ else {
+ this.query = qw.toQuery();
+ if (qw.isOptional())
+ this.addWarning(781, "Optionality of query is ignored");
+ if (qw.isNegative())
+ this.addWarning(782, "Exclusivity of query is ignored");
+
+ };
+ // Copy notifications from query
+ this.copyNotificationsFrom(kq);
+ kq.clearNotifications();
+ }
+ catch (QueryException q) {
+ this.addError(q.getErrorCode(), q.getMessage());
+ };
+ }
+ else {
+ this.addError(700, "No query given");
+ };
+
+ // <legacycode>
+ if (this.request.has("warning") &&
+ this.request.get("warning").asText().length() > 0) {
+ this.addWarning(
+ 799,
+ this.request.get("warning").asText()
+ );
+ };
+ // </legacycode>
+
+ // <legacycode>
+ if (this.request.has("warnings")) {
+ JsonNode warnings = this.request.get("warnings");
+ for (JsonNode node : warnings)
+ if (node.asText().length() > 0)
+ this.addWarning(799, node.asText());
+ };
+ // </legacycode>
+
+ // Copy notifications from request
+ this.copyNotificationsFrom(this.request);
- // "query" value
- if (this.request.has("query")) {
- try {
- KorapQuery kq = new KorapQuery("tokens");
- SpanQueryWrapper qw = kq.fromJson(this.request.get("query"));
+ // virtual collections
+ if (this.request.has("collection") ||
+ // <legacycode>
+ this.request.has("collections")
+ // </legacycode>
+ ) {
+ this.setCollection(new KorapCollection(jsonString));
+ };
- if (qw.isEmpty()) {
+ // No errors - go on with parsing
+ if (!this.hasErrors()) {
+ if (this.request.has("meta")) {
+ JsonNode meta = this.request.get("meta");
- // Unable to process result
- this.addError(780, "This query matches everywhere");
- }
- else {
- this.query = qw.toQuery();
- if (qw.isOptional())
- this.addWarning(781, "Optionality of query is ignored");
- if (qw.isNegative())
- this.addWarning(782, "Exclusivity of query is ignored");
+ // Defined count
+ if (meta.has("count"))
+ this.setCount(meta.get("count").asInt());
- };
- // Copy notifications from query
- this.copyNotificationsFrom(kq);
- kq.clearNotifications();
- }
- catch (QueryException q) {
- this.addError(q.getErrorCode(), q.getMessage());
- };
- }
- else {
- this.addError(700, "No query given");
- };
+ // Defined startIndex
+ if (meta.has("startIndex"))
+ this.setStartIndex(meta.get("startIndex").asInt());
- // <legacycode>
- if (this.request.has("warning") &&
- this.request.get("warning").asText().length() > 0)
- this.addWarning(
- 799,
- this.request.get("warning").asText()
- );
- // </legacycode>
- // <legacycode>
- if (this.request.has("warnings")) {
- JsonNode warnings = this.request.get("warnings");
- for (JsonNode node : warnings)
- if (node.asText().length() > 0)
- this.addWarning(799, node.asText());
- };
- // </legacycode>
+ // Defined startPage
+ if (meta.has("startPage"))
+ this.setStartPage(meta.get("startPage").asInt());
- // Copy notifications from request
- this.copyNotificationsFrom(this.request);
-
- // virtual collections
- if (this.request.has("collection") ||
- // Legacy collections
- this.request.has("collections"))
- this.setCollection(new KorapCollection(jsonString));
+ // Defined cutOff
+ if (meta.has("cutOff"))
+ this.setCutOff(meta.get("cutOff").asBoolean());
- if (!this.hasErrors()) {
- if (this.request.has("meta")) {
- JsonNode meta = this.request.get("meta");
+ // Defined contexts
+ if (meta.has("context"))
+ this.context.fromJson(meta.get("context"));
- // Defined count
- if (meta.has("count"))
- this.setCount(meta.get("count").asInt());
+ // Defined resource count
+ if (meta.has("timeout"))
+ this.setTimeOut(meta.get("timeout").asLong());
- // Defined startIndex
- if (meta.has("startIndex"))
- this.setStartIndex(meta.get("startIndex").asInt());
+ // Defined resource count
+ if (meta.has("itemsPerResource"))
+ this.setItemsPerResource(
+ meta.get("itemsPerResource").asInt()
+ );
- // Defined startPage
- if (meta.has("startPage"))
- this.setStartPage(meta.get("startPage").asInt());
+ // Only lift a limited amount of fields from the metadata
+ if (meta.has("fields")) {
+
+ // Remove legacy default fields
+ this.fields.clear();
- // Defined cutOff
- if (meta.has("cutOff"))
- this.setCutOff(meta.get("cutOff").asBoolean());
-
- // Defined contexts
- if (meta.has("context"))
- this.context.fromJson(meta.get("context"));
-
- // Defined resource count
- if (meta.has("timeout"))
- this.setTimeOut(meta.get("timeout").asLong());
-
- // Defined resource count
- if (meta.has("itemsPerResource"))
- this.setItemsPerResource(meta.get("itemsPerResource").asInt());
-
- // Only lift a limited amount of fields from the metadata
- if (meta.has("fields")) {
-
- // Remove legacy default fields
- this.fields.clear();
-
- // Add fields
- if (meta.get("fields").isArray()) {
- for (JsonNode field : (JsonNode) meta.get("fields")) {
- this.addField(field.asText());
- };
+ // Add fields
+ if (meta.get("fields").isArray()) {
+ for (JsonNode field : (JsonNode) meta.get("fields")) {
+ this.addField(field.asText());
+ };
}
else
this.addField(meta.get("fields").asText());
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 8c86a24..4b7b9f8 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -129,6 +129,7 @@
this.setPrimaryData((String) node.get("text"));
String fieldName = (String) node.get("name");
+
MultiTermTokenStream mtts = this.newMultiTermTokenStream();
// Iterate over all tokens in stream
@@ -138,8 +139,9 @@
MultiTermToken mtt = new MultiTermToken(token.remove(0));
// Add rest of the list
- for (String term : token)
+ for (String term : token) {
mtt.add(term);
+ };
// Add MultiTermToken to stream
mtts.addMultiTermToken(mtt);
@@ -164,7 +166,7 @@
/**
* Deserialize token stream data (LEGACY).
*/
- public void setFields (ArrayList<Map<String,Object>> fields) {
+ public void setFields (ArrayList<Map<String, Object>> fields) {
Map<String,Object> primary = fields.remove(0);
this.setPrimaryData((String) primary.get("primaryData"));
diff --git a/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java b/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
index 114adac..982002f 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
@@ -5,55 +5,33 @@
public class MatchCollector extends KorapResponse {
public int totalResultDocs = 0;
- private int totalResults;
- private long totalTexts;
+ /*
+ private int totalResults;
+ private long totalTexts;
+ */
public void add (int uniqueDocID, int matchcount) {
- this.totalResultDocs++;
- this.incrTotalResults(matchcount);
+ this.totalResultDocs++;
+ this.incrTotalResults(matchcount);
};
public MatchCollector setTotalResultDocs (int i) {
- this.totalResultDocs = i;
- return this;
+ this.totalResultDocs = i;
+ return this;
};
public MatchCollector incrTotalResultDocs (int i) {
- this.totalResultDocs += i;
- return this;
+ this.totalResultDocs += i;
+ return this;
};
public int getTotalResultDocs () {
- return totalResultDocs;
+ return totalResultDocs;
};
- // Make this working in a KorapResult class
- // that is independent from search and collection
- public MatchCollector setTotalTexts (long i) {
- this.totalTexts = i;
- return this;
- };
+ public void commit () {};
- public long getTotalTexts() {
- return this.totalTexts;
- };
-
- public MatchCollector setTotalResults (int i) {
- this.totalResults = i;
- return this;
- };
-
- public MatchCollector incrTotalResults (int i) {
- this.totalResults += i;
- return this;
- };
-
- public int getTotalResults() {
- return this.totalResults;
- };
-
- public void commit() {};
- public void close() {};
+ public void close () {};
/*
* The following methods are shared and should be used from KorapResult
diff --git a/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java b/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java
index 7ec1e10..806d54c 100644
--- a/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java
+++ b/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java
@@ -31,6 +31,9 @@
ObjectMapper mapper = new ObjectMapper();
private String version, name, node, listener;
+ private long
+ totalTexts = -2, // Not set
+ totalResults = -2; // Not set
private String benchmark;
private boolean timeExceeded = false;
@@ -223,6 +226,92 @@
/**
+ * Get the total number of results.
+ *
+ * @return The total number of results.
+ */
+ public long getTotalResults () {
+ if (this.totalResults == -2)
+ return (long) 0;
+ return this.totalResults;
+ };
+
+
+ /**
+ * Set the total number of results.
+ *
+ * @param results The total number of results.
+ * @return {link KorapResponse} object for chaining.
+ */
+ public KorapResponse setTotalResults (long results) {
+ this.totalResults = results;
+ return this;
+ };
+
+
+ /**
+ * Increment the total number of results by a certain value.
+ *
+ * @param incr The number of results the total number should
+ * be incremented by.
+ * @return {@link KorapResponse} object for chaining.
+ */
+ public KorapResponse incrTotalResults (int incr) {
+ if (this.totalResults < 0)
+ this.totalResults = incr;
+ else
+ this.totalResults += incr;
+ return this;
+ };
+
+
+ /**
+ * Get the total number of texts the total number of
+ * results occur in.
+ *
+ * @return The total number of texts the total number of
+ * results occur in.
+ */
+ public long getTotalTexts () {
+ if (this.totalTexts == -2)
+ return (long) 0;
+ return this.totalTexts;
+ };
+
+
+ /**
+ * Set the total number of texts the total number of
+ * results occur in.
+ *
+ * @param texts The total number of texts the total
+ * number of results occur in.
+ * @return {@link KorapResponse} object for chaining.
+ */
+ public KorapResponse setTotalTexts (long texts) {
+ this.totalTexts = texts;
+ return this;
+ };
+
+
+ /**
+ * Increment the total number of texts the total number
+ * of results occur in by a certain value.
+ *
+ * @param incr The number of texts the total number of
+ * results occur in should be incremented by.
+ * (I don't care that this isn't English!)
+ * @return {@link KorapResponse} object for chaining.
+ */
+ public KorapResponse incrTotalTexts (int i) {
+ if (this.totalTexts < 0)
+ this.totalTexts = i;
+ else
+ this.totalTexts += i;
+ return this;
+ };
+
+
+ /**
* Serialize response as a {@link JsonNode}.
*
* @return {@link JsonNode} representation of the response
@@ -260,6 +349,14 @@
if (this.getBenchmark() != null)
json.put("benchmark", this.getBenchmark());
+ // totalTexts is set
+ if (this.totalTexts != -2)
+ json.put("totalTexts", this.totalTexts);
+
+ // totalResults is set
+ if (this.totalResults != -2)
+ json.put("totalResults", this.totalResults);
+
return (JsonNode) json;
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/Notifications.java b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
index 68bae83..c3d7edd 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Notifications.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
@@ -74,7 +74,7 @@
* Set warnings by means of a {@link JsonNode}.
*
* @param msgs JSON array of warnings.
- * @return Notifications object for chaining.
+ * @return {@link Notifications} object for chaining.
*/
public Notifications setWarnings (JsonNode msgs) {
for (JsonNode msg : msgs)
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 98b2947..71a8d03 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -21,7 +21,7 @@
# Collections
#log4j.logger.de.ids_mannheim.korap.KorapFilter = TRACE, stdout
-#log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
+# log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
# Results:
#log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout