Fixed KorapResponse deserialization and collection extension bug
diff --git a/Changes b/Changes
index a2f7ddf..3e98af2 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,9 @@
-0.49.3 2014-01-15
+0.49.3 2014-01-26
- [documentation] Improved documentation for API classes (diewald)
- [documentation] Improved documentation for various queries (margaretha)
+ - [feature] Added deserialization of SpanSubSpanQueries (margaretha)
+ - [bugfix] Null filters are now correctly extended (diewald)
+ - [cleanup] Refactoring of KorapResult and KorapResponse (diewald)
0.49.2 2014-12-05
- [documentation] Improved documentation for various queries (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/KorapCollection.java b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
index e7b8d02..1e0d173 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
@@ -31,7 +31,7 @@
* It works - so I got that going for
* me, which is nice.
*
- * @author Nils Diewald
+ * @author diewald
*/
@@ -292,48 +292,40 @@
noDoc = false;
};
+ for (FilterOperation kc : filters) {
+ if (DEBUG)
+ log.trace("FILTER: {}", kc);
+
+ // TODO: BUG???
+ docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null : bitset);
+ filterIter = docids.iterator();
+
+ if (filterIter == null) {
+ // There must be a better way ...
+ if (kc.isFilter()) {
+ // TODO: Check if this is really correct!
+ // Maybe here is the bug
+ bitset.clear(0, bitset.length());
+ noDoc = true;
+ };
+ continue;
+ };
+ if (kc.isExtension()) {
+ // System.err.println("Term found!");
+ // System.err.println("Old Card:" + bitset.cardinality());
+ bitset.or(filterIter);
+ // System.err.println("New Card:" + bitset.cardinality());
+ }
+ else {
+ bitset.and(filterIter);
+ };
+ };
+
if (!noDoc) {
- for (FilterOperation kc : filters) {
- if (DEBUG)
- log.trace("FILTER: {}", kc);
-
- // TODO: BUG!!!!!!!!!!
- docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null : bitset);
- filterIter = docids.iterator();
-
- if (filterIter == null) {
- // There must be a better way ...
- if (kc.isFilter()) {
- // TODO: Check if this is really correct!
- // Maybe here is the bug
- bitset.clear(0, bitset.length());
- noDoc = true;
- }
- else {
- // System.err.println("No term found");
- };
- continue;
- };
- if (kc.isExtension()) {
- // System.err.println("Term found!");
- // System.err.println("Old Card:" + bitset.cardinality());
- bitset.or(filterIter);
- // System.err.println("New Card:" + bitset.cardinality());
- }
- else {
- bitset.and(filterIter);
- };
+ FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs();
+ if (livedocs != null) {
+ bitset.and(livedocs);
};
-
- if (!noDoc) {
- FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs();
- if (livedocs != null) {
- bitset.and(livedocs);
- };
- };
- }
- else {
- return bitset;
};
}
else {
@@ -353,7 +345,7 @@
public long numberOf (String type) throws IOException {
if (this.index == null)
return (long) -1;
-
+
return this.index.numberOf(this, "tokens", type);
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 9781476..17ffb03 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -1266,12 +1266,18 @@
// Do not load all of this, in case the doc is the same!
Document doc = lreader.document(localDocID, fields);
- KorapMatch match = kr.addMatch(
+
+ // Create new KorapMatch
+ KorapMatch match = new KorapMatch(
pto,
localDocID,
spans.start(),
spans.end()
);
+ match.setContext(kr.getContext());
+
+ // Add match to KorapResult
+ kr.add(match);
if (spans.isPayloadAvailable())
match.addPayload((List<byte[]>) spans.getPayload());
diff --git a/src/main/java/de/ids_mannheim/korap/KorapResult.java b/src/main/java/de/ids_mannheim/korap/KorapResult.java
index 6b5516e..524f54f 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapResult.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapResult.java
@@ -19,42 +19,56 @@
import java.util.List;
/*
-TODO: Reuse the KorapSearch code for data serialization!
+ TODO: Reuse the KorapSearch code for data serialization!
*/
-
+/**
+ * Response class for search results.
+ *
+ * @author diewald
+ * @see KorapResponse
+ */
@JsonInclude(Include.NON_NULL)
@JsonIgnoreProperties(ignoreUnknown = true)
public class KorapResult extends KorapResponse {
ObjectMapper mapper = new ObjectMapper();
@JsonIgnore
- public static final short ITEMS_PER_PAGE = 25;
+ public static final short ITEMS_PER_PAGE = 25;
+ public static final short ITEMS_PER_PAGE_MAX = 100;
private int startIndex = 0;
- private long totalTexts, totalResults;
-
private String query;
private List<KorapMatch> matches;
-
private SearchContext context;
private short itemsPerPage = ITEMS_PER_PAGE,
- itemsPerResource = 0;
+ itemsPerResource = 0;
private JsonNode request;
-
// Logger
// This is KorapMatch instead of KorapResult!
private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
- // Empty result
+
+ /**
+ * Construct a new KorapResult object.
+ */
public KorapResult() {
mapper.enable(SerializationFeature.INDENT_OUTPUT);
};
+
+ /**
+ * Construct a new KorapResult object.
+ *
+ * @param query Query representation as a string.
+ * @param startIndex Offset position in match array.
+ * @param itemsPerPage Number of matches per page.
+ * @param context Requested {@link SearchContext}
+ */
public KorapResult(String query,
int startIndex,
short itemsPerPage,
@@ -67,71 +81,66 @@
this.matches = new ArrayList<>(itemsPerPage);
this.query = query;
this.startIndex = startIndex;
- this.itemsPerPage = (itemsPerPage > 50 || itemsPerPage < 1) ?
- ITEMS_PER_PAGE : itemsPerPage;
+ this.itemsPerPage =
+ (itemsPerPage > ITEMS_PER_PAGE_MAX || itemsPerPage < 1) ?
+ ITEMS_PER_PAGE : itemsPerPage;
this.context = context;
};
+ /**
+ * Add a new match to the result set.
+ *
+ * @param match A {@link KorapMatch} to add.
+ */
public void add (KorapMatch km) {
this.matches.add(km);
};
- public KorapMatch addMatch (PositionsToOffset pto,
- int localDocID,
- int startPos,
- int endPos) {
- KorapMatch km = new KorapMatch(pto, localDocID, startPos, endPos);
- // Temporary - should use the same interface like results
- // in the future:
- km.setContext(this.context);
- this.add(km);
- return km;
- };
-
- public short getItemsPerPage() {
+ /**
+ * Get number of items shown per page.
+ *
+ * @return Number of items shown per page.
+ */
+ public short getItemsPerPage () {
return this.itemsPerPage;
};
- public void setRequest(JsonNode request) {
- this.request = request;
+
+ /**
+ * Set number of items shown per page.
+ *
+ * @param count Number of items shown per page.
+ * @return {@link KorapResult} object for chaining.
+ */
+ public KorapResult setItemsPerPage (short count) {
+ this.itemsPerPage = count;
+ return this;
};
- public JsonNode getRequest() {
+
+ /**
+ * Get serialized query as a {@link JsonNode}.
+ *
+ * @return {@link JsonNode} representation of the query object.
+ */
+ public JsonNode getRequest () {
return this.request;
};
- // Make this working in a KorapResult class
- // that is independent from search and collection
- public KorapResult setTotalTexts (long i) {
- this.totalTexts = i;
- return this;
+
+ /**
+ * Set serialized query as a {@link JsonNode}.
+ *
+ * @param request {@link JsonNode} representation of the query object.
+ * @return {@link KorapResult} object for chaining.
+ */
+ public KorapResult setRequest (JsonNode request) {
+ this.request = request;
+ return this;
};
- public KorapResult incrTotalTexts (int i) {
- this.totalTexts += i;
- return this;
- };
-
- public long getTotalTexts() {
- return this.totalTexts;
- };
-
-
- public KorapResult setTotalResults (long i) {
- this.totalResults = i;
- return this;
- };
-
- public KorapResult incrTotalResults (int i) {
- this.totalResults += i;
- return this;
- };
-
- public long getTotalResults() {
- return this.totalResults;
- };
@JsonIgnore
public void setItemsPerResource (short value) {
diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index bc5d205..08c2da1 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
@@ -15,29 +15,33 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
-// Todo: Use configuration file
-
/*
- Todo: Let this class extend KorapResult!
- KorapResult = new KorapSearch(String json).run(KorapIndex ki);
-*/
+ * Todo: Use configuration file
+ * Todo: Let this class extend KorapResult!
+ * KorapResult = new KorapSearch(String json).run(KorapIndex ki);
+ * Todo: Set timeout default value per config file
+ */
/**
- * @author Nils Diewald
+ * KorapSearch is the central class for parameterized searches
+ * in the index, including the query, the collection,
+ * and result parameters.
*
- * KorapSearch implements an object for all search relevant parameters.
+ * @author diewald
+ *
*/
public class KorapSearch extends Notifications {
- private int startIndex = 0,
- limit = 0;
- private short count = 25,
- countMax = 50;
+ private int
+ startIndex = 0,
+ limit = 0;
+ private short
+ count = 25,
+ countMax = 50;
private boolean cutOff = false;
private short itemsPerResource = 0;
private SpanQuery query;
private KorapCollection collection;
private KorapIndex index;
- // private String error, warning;
// Timeout search after milliseconds
private long timeout = (long) 120_000;
@@ -52,135 +56,143 @@
private long timeoutStart = Long.MIN_VALUE;
{
- context = new SearchContext();
+ context = new SearchContext();
- // Lift legacy fields per default
- fields = new HashSet<String>(16);
- for (String field : new String[]{
- "ID",
- "UID",
- "textSigle",
- "corpusID",
- "author",
- "title",
- "subTitle",
- "textClass",
- "pubPlace",
- "pubDate",
- "foundries",
- "layerInfo",
- "tokenization"}) {
- fields.add(field);
- };
+ // Lift legacy fields per default
+ fields = new HashSet<String>(16);
+ for (String field : new String[]{
+ "ID",
+ "UID",
+ "textSigle",
+ "corpusID",
+ "author",
+ "title",
+ "subTitle",
+ "textClass",
+ "pubPlace",
+ "pubDate",
+ "foundries",
+ "layerInfo",
+ "tokenization"}) {
+ fields.add(field);
+ };
};
public KorapSearch (String jsonString) {
- ObjectMapper mapper = new ObjectMapper();
+ ObjectMapper mapper = new ObjectMapper();
- try {
- // Todo - use correct method!
- this.request = mapper.readTree(jsonString);
+ try {
+ // Todo - use correct method!
+ this.request = mapper.readTree(jsonString);
+
+ // "query" value
+ if (this.request.has("query")) {
+ try {
+ KorapQuery kq = new KorapQuery("tokens");
+ SpanQueryWrapper qw = kq.fromJson(this.request.get("query"));
+
+ if (qw.isEmpty()) {
+
+ // Unable to process result
+ this.addError(780, "This query matches everywhere");
+ }
+ else {
+ this.query = qw.toQuery();
+ if (qw.isOptional())
+ this.addWarning(781, "Optionality of query is ignored");
+ if (qw.isNegative())
+ this.addWarning(782, "Exclusivity of query is ignored");
+
+ };
+ // Copy notifications from query
+ this.copyNotificationsFrom(kq);
+ kq.clearNotifications();
+ }
+ catch (QueryException q) {
+ this.addError(q.getErrorCode(), q.getMessage());
+ };
+ }
+ else {
+ this.addError(700, "No query given");
+ };
+
+ // <legacycode>
+ if (this.request.has("warning") &&
+ this.request.get("warning").asText().length() > 0) {
+ this.addWarning(
+ 799,
+ this.request.get("warning").asText()
+ );
+ };
+ // </legacycode>
+
+ // <legacycode>
+ if (this.request.has("warnings")) {
+ JsonNode warnings = this.request.get("warnings");
+ for (JsonNode node : warnings)
+ if (node.asText().length() > 0)
+ this.addWarning(799, node.asText());
+ };
+ // </legacycode>
+
+ // Copy notifications from request
+ this.copyNotificationsFrom(this.request);
- // "query" value
- if (this.request.has("query")) {
- try {
- KorapQuery kq = new KorapQuery("tokens");
- SpanQueryWrapper qw = kq.fromJson(this.request.get("query"));
+ // virtual collections
+ if (this.request.has("collection") ||
+ // <legacycode>
+ this.request.has("collections")
+ // </legacycode>
+ ) {
+ this.setCollection(new KorapCollection(jsonString));
+ };
- if (qw.isEmpty()) {
+ // No errors - go on with parsing
+ if (!this.hasErrors()) {
+ if (this.request.has("meta")) {
+ JsonNode meta = this.request.get("meta");
- // Unable to process result
- this.addError(780, "This query matches everywhere");
- }
- else {
- this.query = qw.toQuery();
- if (qw.isOptional())
- this.addWarning(781, "Optionality of query is ignored");
- if (qw.isNegative())
- this.addWarning(782, "Exclusivity of query is ignored");
+ // Defined count
+ if (meta.has("count"))
+ this.setCount(meta.get("count").asInt());
- };
- // Copy notifications from query
- this.copyNotificationsFrom(kq);
- kq.clearNotifications();
- }
- catch (QueryException q) {
- this.addError(q.getErrorCode(), q.getMessage());
- };
- }
- else {
- this.addError(700, "No query given");
- };
+ // Defined startIndex
+ if (meta.has("startIndex"))
+ this.setStartIndex(meta.get("startIndex").asInt());
- // <legacycode>
- if (this.request.has("warning") &&
- this.request.get("warning").asText().length() > 0)
- this.addWarning(
- 799,
- this.request.get("warning").asText()
- );
- // </legacycode>
- // <legacycode>
- if (this.request.has("warnings")) {
- JsonNode warnings = this.request.get("warnings");
- for (JsonNode node : warnings)
- if (node.asText().length() > 0)
- this.addWarning(799, node.asText());
- };
- // </legacycode>
+ // Defined startPage
+ if (meta.has("startPage"))
+ this.setStartPage(meta.get("startPage").asInt());
- // Copy notifications from request
- this.copyNotificationsFrom(this.request);
-
- // virtual collections
- if (this.request.has("collection") ||
- // Legacy collections
- this.request.has("collections"))
- this.setCollection(new KorapCollection(jsonString));
+ // Defined cutOff
+ if (meta.has("cutOff"))
+ this.setCutOff(meta.get("cutOff").asBoolean());
- if (!this.hasErrors()) {
- if (this.request.has("meta")) {
- JsonNode meta = this.request.get("meta");
+ // Defined contexts
+ if (meta.has("context"))
+ this.context.fromJson(meta.get("context"));
- // Defined count
- if (meta.has("count"))
- this.setCount(meta.get("count").asInt());
+ // Defined resource count
+ if (meta.has("timeout"))
+ this.setTimeOut(meta.get("timeout").asLong());
- // Defined startIndex
- if (meta.has("startIndex"))
- this.setStartIndex(meta.get("startIndex").asInt());
+ // Defined resource count
+ if (meta.has("itemsPerResource"))
+ this.setItemsPerResource(
+ meta.get("itemsPerResource").asInt()
+ );
- // Defined startPage
- if (meta.has("startPage"))
- this.setStartPage(meta.get("startPage").asInt());
+ // Only lift a limited amount of fields from the metadata
+ if (meta.has("fields")) {
+
+ // Remove legacy default fields
+ this.fields.clear();
- // Defined cutOff
- if (meta.has("cutOff"))
- this.setCutOff(meta.get("cutOff").asBoolean());
-
- // Defined contexts
- if (meta.has("context"))
- this.context.fromJson(meta.get("context"));
-
- // Defined resource count
- if (meta.has("timeout"))
- this.setTimeOut(meta.get("timeout").asLong());
-
- // Defined resource count
- if (meta.has("itemsPerResource"))
- this.setItemsPerResource(meta.get("itemsPerResource").asInt());
-
- // Only lift a limited amount of fields from the metadata
- if (meta.has("fields")) {
-
- // Remove legacy default fields
- this.fields.clear();
-
- // Add fields
- if (meta.get("fields").isArray()) {
- for (JsonNode field : (JsonNode) meta.get("fields")) {
- this.addField(field.asText());
- };
+ // Add fields
+ if (meta.get("fields").isArray()) {
+ for (JsonNode field : (JsonNode) meta.get("fields")) {
+ this.addField(field.asText());
+ };
}
else
this.addField(meta.get("fields").asText());
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 8c86a24..4b7b9f8 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -129,6 +129,7 @@
this.setPrimaryData((String) node.get("text"));
String fieldName = (String) node.get("name");
+
MultiTermTokenStream mtts = this.newMultiTermTokenStream();
// Iterate over all tokens in stream
@@ -138,8 +139,9 @@
MultiTermToken mtt = new MultiTermToken(token.remove(0));
// Add rest of the list
- for (String term : token)
+ for (String term : token) {
mtt.add(term);
+ };
// Add MultiTermToken to stream
mtts.addMultiTermToken(mtt);
@@ -164,7 +166,7 @@
/**
* Deserialize token stream data (LEGACY).
*/
- public void setFields (ArrayList<Map<String,Object>> fields) {
+ public void setFields (ArrayList<Map<String, Object>> fields) {
Map<String,Object> primary = fields.remove(0);
this.setPrimaryData((String) primary.get("primaryData"));
diff --git a/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java b/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
index 114adac..982002f 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
@@ -5,55 +5,33 @@
public class MatchCollector extends KorapResponse {
public int totalResultDocs = 0;
- private int totalResults;
- private long totalTexts;
+ /*
+ private int totalResults;
+ private long totalTexts;
+ */
public void add (int uniqueDocID, int matchcount) {
- this.totalResultDocs++;
- this.incrTotalResults(matchcount);
+ this.totalResultDocs++;
+ this.incrTotalResults(matchcount);
};
public MatchCollector setTotalResultDocs (int i) {
- this.totalResultDocs = i;
- return this;
+ this.totalResultDocs = i;
+ return this;
};
public MatchCollector incrTotalResultDocs (int i) {
- this.totalResultDocs += i;
- return this;
+ this.totalResultDocs += i;
+ return this;
};
public int getTotalResultDocs () {
- return totalResultDocs;
+ return totalResultDocs;
};
- // Make this working in a KorapResult class
- // that is independent from search and collection
- public MatchCollector setTotalTexts (long i) {
- this.totalTexts = i;
- return this;
- };
+ public void commit () {};
- public long getTotalTexts() {
- return this.totalTexts;
- };
-
- public MatchCollector setTotalResults (int i) {
- this.totalResults = i;
- return this;
- };
-
- public MatchCollector incrTotalResults (int i) {
- this.totalResults += i;
- return this;
- };
-
- public int getTotalResults() {
- return this.totalResults;
- };
-
- public void commit() {};
- public void close() {};
+ public void close () {};
/*
* The following methods are shared and should be used from KorapResult
diff --git a/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java b/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java
index 7ec1e10..806d54c 100644
--- a/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java
+++ b/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java
@@ -31,6 +31,9 @@
ObjectMapper mapper = new ObjectMapper();
private String version, name, node, listener;
+ private long
+ totalTexts = -2, // Not set
+ totalResults = -2; // Not set
private String benchmark;
private boolean timeExceeded = false;
@@ -223,6 +226,92 @@
/**
+ * Get the total number of results.
+ *
+ * @return The total number of results.
+ */
+ public long getTotalResults () {
+ if (this.totalResults == -2)
+ return (long) 0;
+ return this.totalResults;
+ };
+
+
+ /**
+ * Set the total number of results.
+ *
+ * @param results The total number of results.
+ * @return {link KorapResponse} object for chaining.
+ */
+ public KorapResponse setTotalResults (long results) {
+ this.totalResults = results;
+ return this;
+ };
+
+
+ /**
+ * Increment the total number of results by a certain value.
+ *
+ * @param incr The number of results the total number should
+ * be incremented by.
+ * @return {@link KorapResponse} object for chaining.
+ */
+ public KorapResponse incrTotalResults (int incr) {
+ if (this.totalResults < 0)
+ this.totalResults = incr;
+ else
+ this.totalResults += incr;
+ return this;
+ };
+
+
+ /**
+ * Get the total number of texts the total number of
+ * results occur in.
+ *
+ * @return The total number of texts the total number of
+ * results occur in.
+ */
+ public long getTotalTexts () {
+ if (this.totalTexts == -2)
+ return (long) 0;
+ return this.totalTexts;
+ };
+
+
+ /**
+ * Set the total number of texts the total number of
+ * results occur in.
+ *
+ * @param texts The total number of texts the total
+ * number of results occur in.
+ * @return {@link KorapResponse} object for chaining.
+ */
+ public KorapResponse setTotalTexts (long texts) {
+ this.totalTexts = texts;
+ return this;
+ };
+
+
+ /**
+ * Increment the total number of texts the total number
+ * of results occur in by a certain value.
+ *
+ * @param incr The number of texts the total number of
+ * results occur in should be incremented by.
+ * (I don't care that this isn't English!)
+ * @return {@link KorapResponse} object for chaining.
+ */
+ public KorapResponse incrTotalTexts (int i) {
+ if (this.totalTexts < 0)
+ this.totalTexts = i;
+ else
+ this.totalTexts += i;
+ return this;
+ };
+
+
+ /**
* Serialize response as a {@link JsonNode}.
*
* @return {@link JsonNode} representation of the response
@@ -260,6 +349,14 @@
if (this.getBenchmark() != null)
json.put("benchmark", this.getBenchmark());
+ // totalTexts is set
+ if (this.totalTexts != -2)
+ json.put("totalTexts", this.totalTexts);
+
+ // totalResults is set
+ if (this.totalResults != -2)
+ json.put("totalResults", this.totalResults);
+
return (JsonNode) json;
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/Notifications.java b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
index 68bae83..c3d7edd 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Notifications.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
@@ -74,7 +74,7 @@
* Set warnings by means of a {@link JsonNode}.
*
* @param msgs JSON array of warnings.
- * @return Notifications object for chaining.
+ * @return {@link Notifications} object for chaining.
*/
public Notifications setWarnings (JsonNode msgs) {
for (JsonNode msg : msgs)
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 98b2947..71a8d03 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -21,7 +21,7 @@
# Collections
#log4j.logger.de.ids_mannheim.korap.KorapFilter = TRACE, stdout
-#log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
+# log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
# Results:
#log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index d7f30a4..87dc893 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -32,929 +32,1133 @@
public class TestKorapSearch {
@Test
public void searchCount () {
- KorapSearch ks = new KorapSearch(
- new KorapQuery("field1").seg("a").with("b")
+ KorapSearch ks = new KorapSearch(
+ new KorapQuery("field1").seg("a").with("b")
);
- // Count:
- ks.setCount(30);
- assertEquals(ks.getCount(), 30);
- ks.setCount(20);
- assertEquals(ks.getCount(), 20);
- ks.setCount(-50);
- assertEquals(ks.getCount(), 20);
- ks.setCount(500);
- assertEquals(ks.getCount(), ks.getCountMax());
+ // Count:
+ ks.setCount(30);
+ assertEquals(ks.getCount(), 30);
+ ks.setCount(20);
+ assertEquals(ks.getCount(), 20);
+ ks.setCount(-50);
+ assertEquals(ks.getCount(), 20);
+ ks.setCount(500);
+ assertEquals(ks.getCount(), ks.getCountMax());
};
@Test
public void searchStartIndex () {
- KorapSearch ks = new KorapSearch(
- new KorapQuery("field1").seg("a").with("b")
+ KorapSearch ks = new KorapSearch(
+ new KorapQuery("field1").seg("a").with("b")
);
- // startIndex
- ks.setStartIndex(5);
- assertEquals(ks.getStartIndex(), 5);
- ks.setStartIndex(1);
- assertEquals(ks.getStartIndex(), 1);
- ks.setStartIndex(0);
- assertEquals(ks.getStartIndex(), 0);
- ks.setStartIndex(70);
- assertEquals(ks.getStartIndex(), 70);
- ks.setStartIndex(-5);
- assertEquals(ks.getStartIndex(), 0);
+ // startIndex
+ ks.setStartIndex(5);
+ assertEquals(ks.getStartIndex(), 5);
+ ks.setStartIndex(1);
+ assertEquals(ks.getStartIndex(), 1);
+ ks.setStartIndex(0);
+ assertEquals(ks.getStartIndex(), 0);
+ ks.setStartIndex(70);
+ assertEquals(ks.getStartIndex(), 70);
+ ks.setStartIndex(-5);
+ assertEquals(ks.getStartIndex(), 0);
};
@Test
public void searchQuery () {
- KorapSearch ks = new KorapSearch(
- new KorapQuery("field1").seg("a").with("b")
+ KorapSearch ks = new KorapSearch(
+ new KorapQuery("field1").seg("a").with("b")
);
- // query
- assertEquals(ks.getQuery().toString(), "spanSegment(field1:a, field1:b)");
+ // query
+ assertEquals(ks.getQuery().toString(), "spanSegment(field1:a, field1:b)");
};
+
@Test
public void searchIndex () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- KorapSearch ks = new KorapSearch(
- new KorapQuery("tokens").seg("s:Buchstaben")
- );
- ks.getCollection().filter(
+ KorapSearch ks = new KorapSearch(
+ new KorapQuery("tokens").seg("s:Buchstaben")
+ );
+ ks.getCollection().filter(
new KorapFilter().and("textClass", "reisen")
);
- ks.setCount(3);
- ks.setStartIndex(5);
- ks.context.left.setLength(1);
- ks.context.right.setLength(1);
- KorapResult kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 6);
- assertEquals(kr.getMatch(0).getSnippetBrackets(), "... dem [Buchstaben] A ...");
+ ks.setCount(3);
+ ks.setStartIndex(5);
+ ks.context.left.setLength(1);
+ ks.context.right.setLength(1);
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), 6);
+ assertEquals(
+ kr.getMatch(0).getSnippetBrackets(),
+ "... dem [Buchstaben] A ..."
+ );
};
+
@Test
public void searchJSON () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- String json = getString(getClass().getResource("/queries/metaquery3.jsonld").getFile());
+ String json = getString(
+ getClass().getResource("/queries/metaquery3.jsonld").getFile()
+ );
- KorapSearch ks = new KorapSearch(json);
-
- KorapResult kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 66);
- assertEquals(5, kr.getItemsPerPage());
- assertEquals(5, kr.getStartIndex());
- assertEquals("... a: A ist [der klangreichste] der V ...", kr.getMatch(0).getSnippetBrackets());
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), 66);
+ assertEquals(5, kr.getItemsPerPage());
+ assertEquals(5, kr.getStartIndex());
+ assertEquals(
+ "... a: A ist [der klangreichste] der V ...",
+ kr.getMatch(0).getSnippetBrackets()
+ );
};
@Test
public void searchJSON2 () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439",
- "00012-fakemeta",
- "00030-fakemeta",
- /*
- "02035-substring",
- "05663-unbalanced",
- "07452-deep"
- */
- }) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439",
+ "00012-fakemeta",
+ "00030-fakemeta",
+ /*
+ "02035-substring",
+ "05663-unbalanced",
+ "07452-deep"
+ */
+ }) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- String json = getString(getClass().getResource("/queries/metaquery4.jsonld").getFile());
+ String json = getString(
+ getClass().getResource("/queries/metaquery4.jsonld").getFile()
+ );
- KorapSearch ks = new KorapSearch(json);
- KorapResult kr = ks.run(ki);
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 1);
+ assertEquals(kr.getTotalResults(), 1);
- ks = new KorapSearch(json);
- // Ignore the collection part of the query!
- ks.setCollection(new KorapCollection());
- kr = ks.run(ki);
+ ks = new KorapSearch(json);
+ // Ignore the collection part of the query!
+ ks.setCollection(new KorapCollection());
+ kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 5);
+ assertEquals(kr.getTotalResults(), 5);
- json = getString(getClass().getResource("/queries/metaquery5.jsonld").getFile());
- ks = new KorapSearch(json);
- kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 1);
+ json = getString(
+ getClass().getResource("/queries/metaquery5.jsonld").getFile()
+ );
- json = getString(getClass().getResource("/queries/metaquery6.jsonld").getFile());
- ks = new KorapSearch(json);
- kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 1);
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), 1);
+
+ json = getString(
+ getClass().getResource("/queries/metaquery6.jsonld").getFile()
+ );
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), 1);
};
@Test
public void searchJSONFailure () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"
- }) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"
+ }) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
-
- KorapResult kr = new KorapSearch("{ query").run(ki);
-
- assertEquals(kr.getTotalResults(), 0);
- assertEquals(kr.getError(0).getMessage(), "Unable to parse JSON");
+ };
+ ki.commit();
+ KorapResult kr = new KorapSearch("{ query").run(ki);
+ assertEquals(kr.getTotalResults(), 0);
+ assertEquals(kr.getError(0).getMessage(), "Unable to parse JSON");
};
-
@Test
public void searchJSONindexboundary () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- String json = getString(getClass().getResource("/queries/bsp-fail1.jsonld").getFile());
+ String json = getString(
+ getClass().getResource("/queries/bsp-fail1.jsonld").getFile()
+ );
- KorapResult kr = new KorapSearch(json).run(ki);
- assertEquals(0, kr.getStartIndex());
- assertEquals(kr.getTotalResults(), 0);
- assertEquals(25, kr.getItemsPerPage());
+ KorapResult kr = new KorapSearch(json).run(ki);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(kr.getTotalResults(), 0);
+ assertEquals(25, kr.getItemsPerPage());
};
+
@Test
public void searchJSONindexboundary2 () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- String json = getString(getClass().getResource("/queries/bsp-fail2.jsonld").getFile());
+ String json = getString(
+ getClass().getResource("/queries/bsp-fail2.jsonld").getFile()
+ );
- KorapResult kr = new KorapSearch(json).run(ki);
- assertEquals(50, kr.getItemsPerPage());
- assertEquals(49950, kr.getStartIndex());
- assertEquals(kr.getTotalResults(), 0);
+ KorapResult kr = new KorapSearch(json).run(ki);
+ assertEquals(50, kr.getItemsPerPage());
+ assertEquals(49950, kr.getStartIndex());
+ assertEquals(kr.getTotalResults(), 0);
};
@Test
public void searchJSONcontext () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- String json = getString(getClass().getResource("/queries/bsp-context.jsonld").getFile());
+ String json = getString(
+ getClass().getResource("/queries/bsp-context.jsonld").getFile()
+ );
- KorapSearch ks = new KorapSearch(json);
- KorapResult kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 10);
- assertEquals("A bzw. a ist der erste Buchstabe des lateinischen [Alphabets] und ein Vokal. Der Buchstabe A hat in deutschen Texten eine durchschnittliche Häufigkeit ...", kr.getMatch(0).getSnippetBrackets());
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), 10);
+ assertEquals("A bzw. a ist der erste Buchstabe des" +
+ " lateinischen [Alphabets] und ein Vokal." +
+ " Der Buchstabe A hat in deutschen Texten" +
+ " eine durchschnittliche Häufigkeit ...",
+ kr.getMatch(0).getSnippetBrackets());
- ks.setCount(5);
- ks.setStartPage(2);
- kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 10);
- assertEquals(5, kr.getStartIndex());
- assertEquals(5, kr.getItemsPerPage());
+ ks.setCount(5);
+ ks.setStartPage(2);
+ kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), 10);
+ assertEquals(5, kr.getStartIndex());
+ assertEquals(5, kr.getItemsPerPage());
+ json = getString(
+ getClass().getResource("/queries/bsp-context-2.jsonld").getFile()
+ );
- json = getString(getClass().getResource("/queries/bsp-context-2.jsonld").getFile());
-
- kr = new KorapSearch(json).run(ki);
- assertEquals(kr.getTotalResults(), -1);
- assertEquals("... lls seit den Griechen beibehalten worden. 3. Bedeutungen in der Biologie steht A für das Nukleosid Adenosin steht A die Base Adenin steht A für die Aminosäure Alanin in der Informatik steht a für den dezimalen [Wert] 97 sowohl im ASCII- als auch im Unicode-Zeichensatz steht A für den dezimalen Wert 65 sowohl im ASCII- als auch im Unicode-Zeichensatz als Kfz-Kennzeichen steht A in Deutschland für Augsburg. in Österreich auf ...", kr.getMatch(0).getSnippetBrackets());
+ kr = new KorapSearch(json).run(ki);
+ assertEquals(kr.getTotalResults(), -1);
+ assertEquals("... lls seit den Griechen beibehalten worden." +
+ " 3. Bedeutungen in der Biologie steht A für"+
+ " das Nukleosid Adenosin steht A die Base"+
+ " Adenin steht A für die Aminosäure Alanin"+
+ " in der Informatik steht a für den dezimalen"+
+ " [Wert] 97 sowohl im ASCII- als auch im"+
+ " Unicode-Zeichensatz steht A für den dezimalen"+
+ " Wert 65 sowohl im ASCII- als auch im"+
+ " Unicode-Zeichensatz als Kfz-Kennzeichen"+
+ " steht A in Deutschland für Augsburg."+
+ " in Österreich auf ...",
+ kr.getMatch(0).getSnippetBrackets());
};
@Test
public void searchJSONstartPage () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- String json = getString(getClass().getResource("/queries/bsp-paging.jsonld").getFile());
+ String json = getString(
+ getClass().getResource("/queries/bsp-paging.jsonld").getFile()
+ );
- KorapSearch ks = new KorapSearch(json);
- KorapResult kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 10);
- assertEquals(5, kr.getStartIndex());
- assertEquals(5, kr.getItemsPerPage());
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), 10);
+ assertEquals(5, kr.getStartIndex());
+ assertEquals(5, kr.getItemsPerPage());
- json = getString(getClass().getResource("/queries/bsp-cutoff.jsonld").getFile());
- ks = ks = new KorapSearch(json);
+ json = getString(
+ getClass().getResource("/queries/bsp-cutoff.jsonld").getFile()
+ );
+ ks = ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), -1);
+ assertEquals(2, kr.getStartIndex());
+ assertEquals(2, kr.getItemsPerPage());
- kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), -1);
- assertEquals(2, kr.getStartIndex());
- assertEquals(2, kr.getItemsPerPage());
-
-
- json = getString(getClass().getResource("/queries/metaquery9.jsonld").getFile());
- KorapCollection kc = new KorapCollection(json);
- kc.setIndex(ki);
- assertEquals(7, kc.numberOf("documents"));
+ json = getString(
+ getClass().getResource("/queries/metaquery9.jsonld").getFile()
+ );
+ KorapCollection kc = new KorapCollection(json);
+ kc.setIndex(ki);
+ assertEquals(7, kc.numberOf("documents"));
};
+
@Test
public void searchJSONitemsPerResource () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
+ String json = getString(
+ getClass().
+ getResource("/queries/bsp-itemsPerResource.jsonld").
+ getFile()
+ );
- String json = getString(getClass().getResource("/queries/bsp-itemsPerResource.jsonld").getFile());
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), 10);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(20, kr.getItemsPerPage());
- KorapSearch ks = new KorapSearch(json);
- KorapResult kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 10);
- assertEquals(0, kr.getStartIndex());
- assertEquals(20, kr.getItemsPerPage());
+ assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
+ assertEquals("WPD_AAA.00001", kr.getMatch(1).getDocID());
+ assertEquals("WPD_AAA.00001", kr.getMatch(6).getDocID());
+ assertEquals("WPD_AAA.00002", kr.getMatch(7).getDocID());
+ assertEquals("WPD_AAA.00002", kr.getMatch(8).getDocID());
+ assertEquals("WPD_AAA.00004", kr.getMatch(9).getDocID());
- assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
- assertEquals("WPD_AAA.00001", kr.getMatch(1).getDocID());
- assertEquals("WPD_AAA.00001", kr.getMatch(6).getDocID());
- assertEquals("WPD_AAA.00002", kr.getMatch(7).getDocID());
- assertEquals("WPD_AAA.00002", kr.getMatch(8).getDocID());
- assertEquals("WPD_AAA.00004", kr.getMatch(9).getDocID());
+ ks = new KorapSearch(json);
+ ks.setItemsPerResource(1);
- ks = new KorapSearch(json);
- ks.setItemsPerResource(1);
+ kr = ks.run(ki);
- kr = ks.run(ki);
+ assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
+ assertEquals("WPD_AAA.00002", kr.getMatch(1).getDocID());
+ assertEquals("WPD_AAA.00004", kr.getMatch(2).getDocID());
+
+ assertEquals(kr.getTotalResults(), 3);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(20, kr.getItemsPerPage());
+
+ ks = new KorapSearch(json);
+ ks.setItemsPerResource(2);
- assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
- assertEquals("WPD_AAA.00002", kr.getMatch(1).getDocID());
- assertEquals("WPD_AAA.00004", kr.getMatch(2).getDocID());
+ kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 3);
- assertEquals(0, kr.getStartIndex());
- assertEquals(20, kr.getItemsPerPage());
+ assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
+ assertEquals("WPD_AAA.00001", kr.getMatch(1).getDocID());
+ assertEquals("WPD_AAA.00002", kr.getMatch(2).getDocID());
+ assertEquals("WPD_AAA.00002", kr.getMatch(3).getDocID());
+ assertEquals("WPD_AAA.00004", kr.getMatch(4).getDocID());
+
+ assertEquals(kr.getTotalResults(), 5);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(20, kr.getItemsPerPage());
+ ks = new KorapSearch(json);
+ ks.setItemsPerResource(1);
+ ks.setStartIndex(1);
+ ks.setCount(1);
- ks = new KorapSearch(json);
- ks.setItemsPerResource(2);
-
- kr = ks.run(ki);
-
- assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
- assertEquals("WPD_AAA.00001", kr.getMatch(1).getDocID());
- assertEquals("WPD_AAA.00002", kr.getMatch(2).getDocID());
- assertEquals("WPD_AAA.00002", kr.getMatch(3).getDocID());
- assertEquals("WPD_AAA.00004", kr.getMatch(4).getDocID());
-
- assertEquals(kr.getTotalResults(), 5);
- assertEquals(0, kr.getStartIndex());
- assertEquals(20, kr.getItemsPerPage());
-
-
- ks = new KorapSearch(json);
- ks.setItemsPerResource(1);
- ks.setStartIndex(1);
- ks.setCount(1);
-
- kr = ks.run(ki);
+ kr = ks.run(ki);
- assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
+ assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
- assertEquals(kr.getTotalResults(), 3);
- assertEquals(1, kr.getStartIndex());
- assertEquals(1, kr.getItemsPerPage());
+ assertEquals(kr.getTotalResults(), 3);
+ assertEquals(1, kr.getStartIndex());
+ assertEquals(1, kr.getItemsPerPage());
- assertEquals((short) 1, kr.getItemsPerResource());
+ assertEquals((short) 1, kr.getItemsPerResource());
};
+
@Test
public void searchJSONitemsPerResourceServer () throws IOException {
-
- /*
- * This test is a server-only implementation of
- * TestResource#testCollection
- */
-
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- int uid = 1;
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- uid++,
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
- true
+ /*
+ * This test is a server-only implementation of
+ * TestResource#testCollection
+ */
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ int uid = 1;
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ uid++,
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- String json = getString(getClass().getResource("/queries/bsp-uid-example.jsonld").getFile());
+ String json = getString(
+ getClass().
+ getResource("/queries/bsp-uid-example.jsonld").
+ getFile()
+ );
- KorapSearch ks = new KorapSearch(json);
- ks.setItemsPerResource(1);
- KorapCollection kc = new KorapCollection();
- kc.filterUIDs(new String[]{"1", "4"});
- kc.setIndex(ki);
- ks.setCollection(kc);
+ KorapSearch ks = new KorapSearch(json);
+ ks.setItemsPerResource(1);
+ KorapCollection kc = new KorapCollection();
+ kc.filterUIDs(new String[]{"1", "4"});
+ kc.setIndex(ki);
+ ks.setCollection(kc);
- KorapResult kr = ks.run(ki);
+ KorapResult kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 2);
- assertEquals(0, kr.getStartIndex());
- assertEquals(25, kr.getItemsPerPage());
+ assertEquals(kr.getTotalResults(), 2);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
};
+
@Test
public void searchJSONnewJSON () throws IOException {
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- FieldDocument fd = ki.addDocFile(
- 1,getClass().getResource("/goe/AGA-03828.json.gz").getFile(), true
- );
- ki.commit();
-
- assertEquals(fd.getUID(), 1);
- assertEquals(fd.getTextSigle(), "GOE_AGA.03828");
- assertEquals(fd.getDocSigle(), "GOE_AGA");
- assertEquals(fd.getCorpusSigle(), "GOE");
- assertEquals(fd.getTitle() , "Autobiographische Einzelheiten");
- assertNull(fd.getSubTitle());
- assertEquals(fd.getTextType(), "Autobiographie");
- assertNull(fd.getTextTypeArt());
- assertNull(fd.getTextTypeRef());
- assertNull(fd.getTextColumn());
- assertNull(fd.getTextDomain());
- assertEquals(fd.getPages(), "529-547");
- assertEquals(fd.getLicense(), "QAO-NC");
- assertEquals(fd.getCreationDate().toString(), "18200000");
- assertEquals(fd.getPubDate().toString(), "19820000");
- assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
- assertNull(fd.getTextClass());
- assertEquals(fd.getLanguage(), "de");
- assertEquals(fd.getPubPlace(), "München");
- assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von: Autobiographische Einzelheiten, (Geschrieben bis 1832), In: Goethe, Johann Wolfgang von: Goethes Werke, Bd. 10, Autobiographische Schriften II, Hrsg.: Trunz, Erich. München: Verlag C. H. Beck, 1982, S. 529-547");
- assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
- assertNull(fd.getEditor());
- assertNull(fd.getFileEditionStatement());
- assertNull(fd.getBiblEditionStatement());
- assertNull(fd.getKeywords());
-
- assertEquals(fd.getTokenSource(), "opennlp#tokens");
- assertEquals(fd.getFoundries(), "base base/paragraphs base/sentences corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences");
- assertEquals(fd.getLayerInfos(), "base/s=spans corenlp/c=spans corenlp/ne=tokens corenlp/p=tokens corenlp/s=spans glemm/l=tokens mate/l=tokens mate/m=tokens mate/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens tt/s=spans");
-
-
- assertEquals(fd.getCorpusTitle(), "Goethes Werke");
- assertNull(fd.getCorpusSubTitle());
- assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
- assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
-
- assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
- assertNull(fd.getDocSubTitle());
- assertNull(fd.getDocEditor());
- assertNull(fd.getDocAuthor());
-
- KorapSearch ks = new KorapSearch(
- new KorapQuery("tokens").seg("mate/m:case:nom").with("mate/m:number:pl")
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ FieldDocument fd = ki.addDocFile(
+ 1,
+ getClass().
+ getResource("/goe/AGA-03828.json.gz").
+ getFile(),
+ true
);
- KorapResult kr = ks.run(ki);
+ ki.commit();
- assertEquals(kr.getTotalResults(), 148);
- assertEquals(0, kr.getStartIndex());
- assertEquals(25, kr.getItemsPerPage());
+ assertEquals(fd.getUID(), 1);
+ assertEquals(fd.getTextSigle(), "GOE_AGA.03828");
+ assertEquals(fd.getDocSigle(), "GOE_AGA");
+ assertEquals(fd.getCorpusSigle(), "GOE");
+ assertEquals(fd.getTitle() , "Autobiographische Einzelheiten");
+ assertNull(fd.getSubTitle());
+ assertEquals(fd.getTextType(), "Autobiographie");
+ assertNull(fd.getTextTypeArt());
+ assertNull(fd.getTextTypeRef());
+ assertNull(fd.getTextColumn());
+ assertNull(fd.getTextDomain());
+ assertEquals(fd.getPages(), "529-547");
+ assertEquals(fd.getLicense(), "QAO-NC");
+ assertEquals(fd.getCreationDate().toString(), "18200000");
+ assertEquals(fd.getPubDate().toString(), "19820000");
+ assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
+ assertNull(fd.getTextClass());
+ assertEquals(fd.getLanguage(), "de");
+ assertEquals(fd.getPubPlace(), "München");
+ assertEquals(fd.getReference(),
+ "Goethe, Johann Wolfgang von:"+
+ " Autobiographische Einzelheiten,"+
+ " (Geschrieben bis 1832), In: Goethe,"+
+ " Johann Wolfgang von: Goethes Werke,"+
+ " Bd. 10, Autobiographische Schriften"+
+ " II, Hrsg.: Trunz, Erich. München: "+
+ "Verlag C. H. Beck, 1982, S. 529-547");
+ assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
+ assertNull(fd.getEditor());
+ assertNull(fd.getFileEditionStatement());
+ assertNull(fd.getBiblEditionStatement());
+ assertNull(fd.getKeywords());
+
+ assertEquals(fd.getTokenSource(), "opennlp#tokens");
+ assertEquals(fd.getFoundries(),
+ "base base/paragraphs base/sentences corenlp "+
+ "corenlp/constituency corenlp/morpho "+
+ "corenlp/namedentities corenlp/sentences "+
+ "glemm glemm/morpho mate mate/morpho"+
+ " opennlp opennlp/morpho opennlp/sentences"+
+ " treetagger treetagger/morpho "+
+ "treetagger/sentences");
+ assertEquals(fd.getLayerInfos(),
+ "base/s=spans corenlp/c=spans corenlp/ne=tokens"+
+ " corenlp/p=tokens corenlp/s=spans glemm/l=tokens"+
+ " mate/l=tokens mate/m=tokens mate/p=tokens"+
+ " opennlp/p=tokens opennlp/s=spans tt/l=tokens"+
+ " tt/p=tokens tt/s=spans");
+
+ assertEquals(fd.getCorpusTitle(), "Goethes Werke");
+ assertNull(fd.getCorpusSubTitle());
+ assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
+ assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
+ assertEquals(fd.getDocTitle(),
+ "Goethe: Autobiographische Schriften II, (1817-1825, 1832)"
+ );
+ assertNull(fd.getDocSubTitle());
+ assertNull(fd.getDocEditor());
+ assertNull(fd.getDocAuthor());
+
+ KorapSearch ks = new KorapSearch(
+ new KorapQuery("tokens").
+ seg("mate/m:case:nom").
+ with("mate/m:number:pl")
+ );
+ KorapResult kr = ks.run(ki);
+
+ assertEquals(kr.getTotalResults(), 148);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
};
+
@Test
public void searchJSONnewJSON2 () throws IOException {
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- FieldDocument fd = ki.addDocFile(
- 1,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
- );
- ki.commit();
-
- assertEquals(fd.getUID(), 1);
- assertEquals(fd.getTextSigle(), "BZK_D59.00089");
- assertEquals(fd.getDocSigle(), "BZK_D59");
- assertEquals(fd.getCorpusSigle(), "BZK");
- assertEquals(fd.getTitle() , "Saragat-Partei zerfällt");
- assertEquals(fd.getPubDate().toString(), "19590219");
-
- assertNull(fd.getSubTitle());
- assertNull(fd.getAuthor());
- assertNull(fd.getEditor());
- assertEquals(fd.getPubPlace(), "Berlin");
- assertNull(fd.getPublisher());
- assertEquals(fd.getTextType(), "Zeitung: Tageszeitung");
- assertNull(fd.getTextTypeArt());
- assertEquals(fd.getTextTypeRef(), "Tageszeitung");
- assertEquals(fd.getTextDomain(), "Politik");
- assertEquals(fd.getCreationDate().toString(), "19590219");
- assertEquals(fd.getLicense(), "ACA-NC-LC");
- assertEquals(fd.getTextColumn(), "POLITIK");
- assertNull(fd.getPages());
- assertEquals(fd.getTextClass(), "politik ausland");
- assertNull(fd.getFileEditionStatement());
- assertNull(fd.getBiblEditionStatement());
-
- assertEquals(fd.getLanguage(), "de");
- assertEquals(fd.getReference(), "Neues Deutschland, [Tageszeitung], 19.02.1959, Jg. 14, Berliner Ausgabe, S. 7. - Sachgebiet: Politik, Originalressort: POLITIK; Saragat-Partei zerfällt");
- assertNull(fd.getPublisher());
- assertNull(fd.getKeywords());
-
- assertEquals(fd.getTokenSource(), "opennlp#tokens");
-
- assertEquals(fd.getFoundries(), "base base/paragraphs base/sentences corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences");
-
- assertEquals(fd.getLayerInfos(), "base/s=spans corenlp/c=spans corenlp/ne=tokens corenlp/p=tokens corenlp/s=spans glemm/l=tokens mate/l=tokens mate/m=tokens mate/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens tt/s=spans");
-
- assertEquals(fd.getCorpusTitle(), "Bonner Zeitungskorpus");
- assertNull(fd.getCorpusSubTitle());
- assertNull(fd.getCorpusAuthor());
- assertNull(fd.getCorpusEditor());
-
- assertEquals(fd.getDocTitle(), "Neues Deutschland");
- assertEquals(fd.getDocSubTitle(), "Organ des Zentralkomitees der Sozialistischen Einheitspartei Deutschlands");
- assertNull(fd.getDocEditor());
- assertNull(fd.getDocAuthor());
-
- KorapSearch ks = new KorapSearch(
- new KorapQuery("tokens").seg("mate/m:case:nom").with("mate/m:number:sg")
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ FieldDocument fd = ki.addDocFile(
+ 1,
+ getClass().
+ getResource("/bzk/D59-00089.json.gz").
+ getFile(),
+ true
);
- KorapResult kr = ks.run(ki);
+ ki.commit();
- assertEquals(kr.getTotalResults(), 6);
- assertEquals(0, kr.getStartIndex());
- assertEquals(25, kr.getItemsPerPage());
+ assertEquals(fd.getUID(), 1);
+ assertEquals(fd.getTextSigle(), "BZK_D59.00089");
+ assertEquals(fd.getDocSigle(), "BZK_D59");
+ assertEquals(fd.getCorpusSigle(), "BZK");
+ assertEquals(fd.getTitle() , "Saragat-Partei zerfällt");
+ assertEquals(fd.getPubDate().toString(), "19590219");
+
+ assertNull(fd.getSubTitle());
+ assertNull(fd.getAuthor());
+ assertNull(fd.getEditor());
+ assertEquals(fd.getPubPlace(), "Berlin");
+ assertNull(fd.getPublisher());
+ assertEquals(fd.getTextType(), "Zeitung: Tageszeitung");
+ assertNull(fd.getTextTypeArt());
+ assertEquals(fd.getTextTypeRef(), "Tageszeitung");
+ assertEquals(fd.getTextDomain(), "Politik");
+ assertEquals(fd.getCreationDate().toString(), "19590219");
+ assertEquals(fd.getLicense(), "ACA-NC-LC");
+ assertEquals(fd.getTextColumn(), "POLITIK");
+ assertNull(fd.getPages());
+ assertEquals(fd.getTextClass(), "politik ausland");
+ assertNull(fd.getFileEditionStatement());
+ assertNull(fd.getBiblEditionStatement());
+
+ assertEquals(fd.getLanguage(), "de");
+ assertEquals(
+ fd.getReference(),
+ "Neues Deutschland, [Tageszeitung], 19.02.1959, Jg. 14,"+
+ " Berliner Ausgabe, S. 7. - Sachgebiet: Politik, "+
+ "Originalressort: POLITIK; Saragat-Partei zerfällt");
+ assertNull(fd.getPublisher());
+ assertNull(fd.getKeywords());
+
+ assertEquals(fd.getTokenSource(), "opennlp#tokens");
+
+ assertEquals(
+ fd.getFoundries(),
+ "base base/paragraphs base/sentences corenlp "+
+ "corenlp/constituency corenlp/morpho corenlp/namedentities"+
+ " corenlp/sentences glemm glemm/morpho mate mate/morpho"+
+ " opennlp opennlp/morpho opennlp/sentences treetagger"+
+ " treetagger/morpho treetagger/sentences");
+
+ assertEquals(
+ fd.getLayerInfos(),
+ "base/s=spans corenlp/c=spans corenlp/ne=tokens"+
+ " corenlp/p=tokens corenlp/s=spans glemm/l=tokens"+
+ " mate/l=tokens mate/m=tokens mate/p=tokens"+
+ " opennlp/p=tokens opennlp/s=spans tt/l=tokens"+
+ " tt/p=tokens tt/s=spans");
+
+ assertEquals(fd.getCorpusTitle(), "Bonner Zeitungskorpus");
+ assertNull(fd.getCorpusSubTitle());
+ assertNull(fd.getCorpusAuthor());
+ assertNull(fd.getCorpusEditor());
+
+ assertEquals(fd.getDocTitle(), "Neues Deutschland");
+ assertEquals(
+ fd.getDocSubTitle(),
+ "Organ des Zentralkomitees der Sozialistischen "+
+ "Einheitspartei Deutschlands");
+ assertNull(fd.getDocEditor());
+ assertNull(fd.getDocAuthor());
+
+ KorapSearch ks = new KorapSearch(
+ new KorapQuery("tokens").
+ seg("mate/m:case:nom").
+ with("mate/m:number:sg")
+ );
+ KorapResult kr = ks.run(ki);
+
+ assertEquals(kr.getTotalResults(), 6);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
};
+
@Test
public void searchJSONcosmasBoundaryBug () throws IOException {
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- FieldDocument fd = ki.addDocFile(
- 1,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
- );
- ki.commit();
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ FieldDocument fd = ki.addDocFile(
+ 1,
+ getClass().
+ getResource("/bzk/D59-00089.json.gz").
+ getFile(),
+ true
+ );
+ ki.commit();
- String json = getString(
- getClass().getResource("/queries/bugs/cosmas_boundary.jsonld").getFile()
+ String json = getString(
+ getClass().
+ getResource("/queries/bugs/cosmas_boundary.jsonld").
+ getFile()
);
- KorapQuery kq = new KorapQuery("tokens");
+ KorapQuery kq = new KorapQuery("tokens");
+ KorapSearch ks = new KorapSearch(
+ kq.shrink(
+ 1,
+ kq.contains(kq.tag("base/s:s"), kq._(1, kq.seg("s:Leben")))
+ )
+ );
- KorapSearch ks = new KorapSearch(
- kq.shrink(1,kq.contains(kq.tag("base/s:s"), kq._(1, kq.seg("s:Leben"))))
- );
-
- KorapResult kr = ks.run(ki);
- assertEquals(
+ KorapResult kr = ks.run(ki);
+ assertEquals(
kr.getQuery(),
- "shrink(1: spanContain(<tokens:base/s:s />, {1: tokens:s:Leben}))"
+ "shrink(1: spanContain(<tokens:base/s:s />, {1: tokens:s:Leben}))"
);
- assertEquals(
+ assertEquals(
kr.getMatch(0).getSnippetBrackets(),
"... Initiative\" eine neue politische Gruppierung ins " +
- "[{1:Leben}] gerufen hatten. Pressemeldungen zufolge haben sich ..."
+ "[{1:Leben}] gerufen hatten. Pressemeldungen zufolge haben sich ..."
);
- // Try with high class - don't highlight
- ks = new KorapSearch(
- kq.shrink(129, kq.contains(kq.tag("base/s:s"), kq._(129, kq.seg("s:Leben"))))
- );
+ // Try with high class - don't highlight
+ ks = new KorapSearch(
+ kq.shrink(
+ 129,
+ kq.contains(kq.tag("base/s:s"), kq._(129, kq.seg("s:Leben")))
+ )
+ );
- kr = ks.run(ki);
- assertEquals(
+ kr = ks.run(ki);
+ assertEquals(
kr.getQuery(),
- "shrink(129: spanContain(<tokens:base/s:s />, {129: tokens:s:Leben}))"
+ "shrink(129: spanContain(<tokens:base/s:s />, {129: tokens:s:Leben}))"
);
- assertEquals(
+ assertEquals(
kr.getMatch(0).getSnippetBrackets(),
"... Initiative\" eine neue politische Gruppierung ins " +
- "[Leben] gerufen hatten. Pressemeldungen zufolge haben sich ..."
+ "[Leben] gerufen hatten. Pressemeldungen zufolge haben sich ..."
);
- ks = new KorapSearch(json);
- kr = ks.run(ki);
- assertEquals(
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(
kr.getQuery(),
- "shrink(129: spanElementDistance({129: tokens:s:Namen}, " +
- "{129: tokens:s:Leben}, [(base/s:s[0:1], notOrdered, notExcluded)]))"
+ "shrink(129: spanElementDistance({129: tokens:s:Namen}, " +
+ "{129: tokens:s:Leben}, [(base/s:s[0:1], notOrdered, notExcluded)]))"
);
- assertEquals(
- kr.getMatch(0).getSnippetBrackets(),
- "... ihren Austritt erklärt und unter dem [Namen \"Einheitsbewegung " +
- "der sozialistischen Initiative\" eine neue politische Gruppierung " +
- "ins Leben] gerufen hatten. Pressemeldungen zufolge haben sich ..."
+ assertEquals(
+ kr.getMatch(0).getSnippetBrackets(),
+ "... ihren Austritt erklärt und unter dem [Namen \"Einheitsbewegung " +
+ "der sozialistischen Initiative\" eine neue politische Gruppierung " +
+ "ins Leben] gerufen hatten. Pressemeldungen zufolge haben sich ..."
);
-
- assertEquals(kr.getTotalResults(), 1);
- assertEquals(0, kr.getStartIndex());
+ assertEquals(kr.getTotalResults(), 1);
+ assertEquals(0, kr.getStartIndex());
};
@Test
public void searchJSONmultipleClassesBug () throws IOException {
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- ki.addDocFile(
- 1,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
- );
- ki.addDocFile(
- 2,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
- );
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ ki.addDocFile(
+ 1,
+ getClass().
+ getResource("/bzk/D59-00089.json.gz").
+ getFile(),
+ true
+ );
+ ki.addDocFile(
+ 2,
+ getClass().
+ getResource("/bzk/D59-00089.json.gz").
+ getFile(),
+ true
+ );
- ki.commit();
+ ki.commit();
- String json = getString(
- getClass().getResource("/queries/bugs/multiple_classes.jsonld").getFile()
+ String json = getString(
+ getClass().
+ getResource("/queries/bugs/multiple_classes.jsonld").
+ getFile()
);
- KorapSearch ks = new KorapSearch(json);
- KorapResult kr = ks.run(ki);
- assertEquals(
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(
kr.getQuery(),
- "{4: spanNext({1: spanNext({2: tokens:s:ins}, {3: tokens:s:Leben})}, tokens:s:gerufen)}"
+ "{4: spanNext({1: spanNext({2: tokens:s:ins}, "+
+ "{3: tokens:s:Leben})}, tokens:s:gerufen)}"
);
- assertEquals(
- kr.getMatch(0).getSnippetBrackets(),
- "... sozialistischen Initiative\" eine neue politische Gruppierung " +
- "[{4:{1:{2:ins} {3:Leben}} gerufen}] hatten. " +
- "Pressemeldungen zufolge haben sich in ..."
+ assertEquals(
+ kr.getMatch(0).getSnippetBrackets(),
+ "... sozialistischen Initiative\" eine neue politische"+
+ " Gruppierung [{4:{1:{2:ins} {3:Leben}} gerufen}] hatten. " +
+ "Pressemeldungen zufolge haben sich in ..."
);
-
- assertEquals(kr.getTotalResults(), 2);
- assertEquals(0, kr.getStartIndex());
+ assertEquals(kr.getTotalResults(), 2);
+ assertEquals(0, kr.getStartIndex());
};
@Test
public void searchJSONmultipleClassesBugTokenList () throws IOException {
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- ki.addDocFile(
- 1,getClass().getResource("/goe/AGA-03828.json.gz").getFile(), true
- );
- ki.addDocFile(
- 2,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
- );
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ ki.addDocFile(
+ 1,
+ getClass().
+ getResource("/goe/AGA-03828.json.gz").
+ getFile(),
+ true
+ );
+ ki.addDocFile(
+ 2,
+ getClass().
+ getResource("/bzk/D59-00089.json.gz").
+ getFile(),
+ true
+ );
- ki.commit();
+ ki.commit();
- String json = getString(
- getClass().getResource("/queries/bugs/multiple_classes.jsonld").getFile()
+ String json = getString(
+ getClass().
+ getResource("/queries/bugs/multiple_classes.jsonld").
+ getFile()
);
- KorapSearch ks = new KorapSearch(json);
- KorapResult kr = ks.run(ki);
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
- ObjectMapper mapper = new ObjectMapper();
- JsonNode res = mapper.readTree(kr.toTokenListJsonString());
+ ObjectMapper mapper = new ObjectMapper();
+ JsonNode res = mapper.readTree(kr.toTokenListJsonString());
- assertEquals(1, res.at("/totalResults").asInt());
- assertEquals("{4: spanNext({1: spanNext({2: tokens:s:ins}, " +
- "{3: tokens:s:Leben})}, tokens:s:gerufen)}", res.at("/query").asText());
- assertEquals(0, res.at("/startIndex").asInt());
- assertEquals(25, res.at("/itemsPerPage").asInt());
+ assertEquals(1, res.at("/totalResults").asInt());
+ assertEquals(
+ "{4: spanNext({1: spanNext({2: tokens:s:ins}, " +
+ "{3: tokens:s:Leben})}, tokens:s:gerufen)}",
+ res.at("/query").asText());
+ assertEquals(0, res.at("/startIndex").asInt());
+ assertEquals(25, res.at("/itemsPerPage").asInt());
- assertEquals("BZK_D59.00089", res.at("/matches/0/textSigle").asText());
- assertEquals(328, res.at("/matches/0/tokens/0/0").asInt());
- assertEquals(331, res.at("/matches/0/tokens/0/1").asInt());
- assertEquals(332, res.at("/matches/0/tokens/1/0").asInt());
- assertEquals(337, res.at("/matches/0/tokens/1/1").asInt());
- assertEquals(338, res.at("/matches/0/tokens/2/0").asInt());
- assertEquals(345, res.at("/matches/0/tokens/2/1").asInt());
+ assertEquals("BZK_D59.00089", res.at("/matches/0/textSigle").asText());
+ assertEquals(328, res.at("/matches/0/tokens/0/0").asInt());
+ assertEquals(331, res.at("/matches/0/tokens/0/1").asInt());
+ assertEquals(332, res.at("/matches/0/tokens/1/0").asInt());
+ assertEquals(337, res.at("/matches/0/tokens/1/1").asInt());
+ assertEquals(338, res.at("/matches/0/tokens/2/0").asInt());
+ assertEquals(345, res.at("/matches/0/tokens/2/1").asInt());
};
+
@Test
public void searchJSONmultitermRewriteBug () throws IOException {
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- ki.addDocFile(
- 1,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
- );
- ki.commit();
+ // Construct index
+ KorapIndex ki = new KorapIndex();
- // [tt/p="A.*"]{0,3}[tt/p="N.*"]
- String json = getString(
- getClass().getResource("/queries/bugs/multiterm_rewrite.jsonld").getFile()
+ assertEquals(ki.numberOf("documents"), 0);
+
+ // Indexing test files
+ FieldDocument fd = ki.addDocFile(
+ 1,
+ getClass().
+ getResource("/bzk/D59-00089.json.gz").
+ getFile(),
+ true
+ );
+ ki.commit();
+
+ assertEquals(ki.numberOf("documents"), 1);
+ assertEquals("BZK", fd.getCorpusSigle());
+
+ // [tt/p="A.*"]{0,3}[tt/p="N.*"]
+ String json = getString(
+ getClass().
+ getResource("/queries/bugs/multiterm_rewrite.jsonld").
+ getFile()
);
- KorapSearch ks = new KorapSearch(json);
- KorapCollection kc = ks.getCollection();
+ KorapSearch ks = new KorapSearch(json);
+ KorapCollection kc = ks.getCollection();
- // No index was set
- assertEquals(-1, kc.numberOf("documents"));
- kc.setIndex(ki);
+ // No index was set
+ assertEquals(-1, kc.numberOf("documents"));
+ kc.setIndex(ki);
- // Index was set but vc restricted to WPD
- assertEquals(0, kc.numberOf("documents"));
+ // Index was set but vc restricted to WPD
+ assertEquals(0, kc.numberOf("documents"));
- kc.extend(
- new KorapFilter().or("corpusSigle", "BZK")
+ kc.extend(
+ new KorapFilter().or("corpusSigle", "BZK")
);
- /*
- System.err.println(ks.getCollection().toString());
- */
- assertEquals("Known issue: ", 1, kc.numberOf("documents"));
+ ks.setCollection(kc);
+ assertEquals(1, kc.numberOf("documents"));
- KorapResult kr = ks.run(ki);
-
- assertEquals(
+ KorapResult kr = ks.run(ki);
+
+ assertEquals(
kr.getQuery(),
- "spanOr([SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/), " +
- "spanNext(spanRepetition(SpanMultiTermQueryWrapper(tokens:/tt/p:A.*/){1,3}), " +
- "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])"
+ "spanOr([SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/), " +
+ "spanNext(spanRepetition(SpanMultiTermQueryWrapper"+
+ "(tokens:/tt/p:A.*/){1,3}), " +
+ "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])"
);
- assertEquals(kr.getTotalResults(), 58);
- assertEquals(0, kr.getStartIndex());
+ assertEquals(kr.getTotalResults(), 58);
+ assertEquals(0, kr.getStartIndex());
- assertEquals(
- kr.getMatch(0).getSnippetBrackets(),
- "[Saragat-Partei] zerfällt Rom (ADN) die von dem"
+ assertEquals(
+ kr.getMatch(0).getSnippetBrackets(),
+ "[Saragat-Partei] zerfällt Rom (ADN) die von dem"
);
- assertEquals(
- kr.getMatch(1).getSnippetBrackets(),
- "[Saragat-Partei] zerfällt Rom (ADN) die von dem"
+ assertEquals(
+ kr.getMatch(1).getSnippetBrackets(),
+ "[Saragat-Partei] zerfällt Rom (ADN) die von dem"
);
- assertEquals(
- kr.getMatch(2).getSnippetBrackets(),
- "Saragat-Partei zerfällt [Rom] (ADN) die von dem Rechtssozialisten Saragat"
+ assertEquals(
+ kr.getMatch(2).getSnippetBrackets(),
+ "Saragat-Partei zerfällt [Rom] (ADN) "+
+ "die von dem Rechtssozialisten Saragat"
);
- assertEquals(
- kr.getMatch(3).getSnippetBrackets(),
- "Saragat-Partei zerfällt Rom ([ADN]) die von dem Rechtssozialisten Saragat geführte"
+ assertEquals(
+ kr.getMatch(3).getSnippetBrackets(),
+ "Saragat-Partei zerfällt Rom ([ADN]) "+
+ "die von dem Rechtssozialisten Saragat geführte"
);
- assertEquals(
- kr.getMatch(23).getSnippetBrackets(),
- "dem Namen \"Einheitsbewegung der sozialistischen Initiative\" [eine neue politische Gruppierung] ins Leben gerufen hatten. Pressemeldungen zufolge"
+ assertEquals(
+ kr.getMatch(23).getSnippetBrackets(),
+ "dem Namen \"Einheitsbewegung der sozialistischen "+
+ "Initiative\" [eine neue politische Gruppierung] "+
+ "ins Leben gerufen hatten. Pressemeldungen zufolge"
);
};
-
@Test
public void searchJSONCollection () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().
+ getResource("/wiki/" + i + ".json.gz").
+ getFile(),
+ true
);
- };
- ki.commit();
-
- String json = getString(
- getClass().getResource("/queries/metaquery8-nocollection.jsonld").getFile()
+ };
+ ki.commit();
+ String json = getString(
+ getClass().
+ getResource("/queries/metaquery8-nocollection.jsonld").
+ getFile()
);
- KorapSearch ks = new KorapSearch(json);
- KorapResult kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 276);
- assertEquals(0, kr.getStartIndex());
- assertEquals(10, kr.getItemsPerPage());
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getTotalResults(), 276);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
- json = getString(getClass().getResource("/queries/metaquery8.jsonld").getFile());
+ json = getString(
+ getClass().
+ getResource("/queries/metaquery8.jsonld").
+ getFile()
+ );
- ks = new KorapSearch(json);
- kr = ks.run(ki);
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 147);
- assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
- assertEquals(0, kr.getStartIndex());
- assertEquals(10, kr.getItemsPerPage());
+ assertEquals(kr.getTotalResults(), 147);
+ assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
- json = getString(getClass().getResource("/queries/metaquery8-filtered.jsonld").getFile());
+ json = getString(
+ getClass().
+ getResource("/queries/metaquery8-filtered.jsonld").
+ getFile()
+ );
- ks = new KorapSearch(json);
- kr = ks.run(ki);
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 28);
- assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
- assertEquals(0, kr.getStartIndex());
- assertEquals(10, kr.getItemsPerPage());
+ assertEquals(kr.getTotalResults(), 28);
+ assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
- json = getString(getClass().getResource("/queries/metaquery8-filtered-further.jsonld").getFile());
+ json = getString(
+ getClass().
+ getResource("/queries/metaquery8-filtered-further.jsonld").
+ getFile()
+ );
- ks = new KorapSearch(json);
- kr = ks.run(ki);
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
- assertEquals(kr.getTotalResults(), 0);
- assertEquals(0, kr.getStartIndex());
- assertEquals(10, kr.getItemsPerPage());
-
- json = getString(getClass().getResource("/queries/metaquery8-filtered-nested.jsonld").getFile());
+ assertEquals(kr.getTotalResults(), 0);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
+
+ json = getString(
+ getClass().
+ getResource("/queries/metaquery8-filtered-nested.jsonld").
+ getFile()
+ );
- ks = new KorapSearch(json);
- kr = ks.run(ki);
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
- assertEquals("filter with QueryWrapperFilter(+(ID:WPD_AAA.00003 (+tokens:s:die +tokens:s:Schriftzeichen)))", ks.getCollection().getFilter(1).toString());
+ assertEquals("filter with QueryWrapperFilter("+
+ "+(ID:WPD_AAA.00003 (+tokens:s:die"+
+ " +tokens:s:Schriftzeichen)))",
+ ks.getCollection().getFilter(1).toString());
- assertEquals(kr.getTotalResults(), 119);
- assertEquals(0, kr.getStartIndex());
- assertEquals(10, kr.getItemsPerPage());
+ assertEquals(kr.getTotalResults(), 119);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
};
@Test
public void searchJSONSentenceContext () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().
+ getResource("/wiki/" + i + ".json.gz").
+ getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- String json = getString(getClass().getResource("/queries/bsp-context-2.jsonld").getFile());
+ String json = getString(
+ getClass().
+ getResource("/queries/bsp-context-2.jsonld").
+ getFile()
+ );
- KorapSearch ks = new KorapSearch(json);
- ks.setCutOff(false);
- SearchContext sc = ks.getContext();
- sc.left.setLength((short) 10);
- sc.right.setLength((short) 10);
+ KorapSearch ks = new KorapSearch(json);
+ ks.setCutOff(false);
+ SearchContext sc = ks.getContext();
+ sc.left.setLength((short) 10);
+ sc.right.setLength((short) 10);
+
+ KorapResult kr = ks.run(ki);
+ assertEquals(
+ kr.getMatch(1).getSnippetBrackets(),
+ "... dezimalen [Wert] 65 sowohl ..."
+ );
+ assertEquals(kr.getTotalResults(), 3);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
+ assertFalse(kr.getContext().toJsonNode().toString().equals("\"s\""));
- KorapResult kr = ks.run(ki);
- assertEquals(kr.getMatch(1).getSnippetBrackets(), "... dezimalen [Wert] 65 sowohl ...");
- assertEquals(kr.getTotalResults(), 3);
- assertEquals(0, kr.getStartIndex());
- assertEquals(25, kr.getItemsPerPage());
- assertFalse(kr.getContext().toJsonNode().toString().equals("\"s\""));
+ json = getString(
+ getClass().
+ getResource("/queries/bsp-context-sentence.jsonld").
+ getFile()
+ );
- json = getString(getClass().getResource("/queries/bsp-context-sentence.jsonld").getFile());
+ kr = new KorapSearch(json).run(ki);
+ assertEquals(
+ kr.getMatch(0).getSnippetBrackets(),
+ "steht a für den dezimalen [Wert] 97 sowohl im ASCII-"+
+ " als auch im Unicode-Zeichensatz"
+ );
+ assertEquals(
+ kr.getMatch(1).getSnippetBrackets(),
+ "steht A für den dezimalen [Wert] 65 sowohl im ASCII-"+
+ " als auch im Unicode-Zeichensatz"
+ );
+ assertEquals(
+ kr.getMatch(2).getSnippetBrackets(),
+ "In einem Zahlensystem mit einer Basis größer "+
+ "als 10 steht A oder a häufig für den dezimalen"+
+ " [Wert] 10, siehe auch Hexadezimalsystem."
+ );
- kr = new KorapSearch(json).run(ki);
- assertEquals(kr.getMatch(0).getSnippetBrackets(),
- "steht a für den dezimalen [Wert] 97 sowohl im ASCII- als auch im Unicode-Zeichensatz");
- assertEquals(kr.getMatch(1).getSnippetBrackets(),
- "steht A für den dezimalen [Wert] 65 sowohl im ASCII- als auch im Unicode-Zeichensatz");
- assertEquals(kr.getMatch(2).getSnippetBrackets(),
- "In einem Zahlensystem mit einer Basis größer als 10 steht A oder a häufig für den dezimalen [Wert] 10, siehe auch Hexadezimalsystem.");
-
- assertEquals(kr.getContext().toJsonNode().toString(), "\"s\"");
+ assertEquals(kr.getContext().toJsonNode().toString(), "\"s\"");
};
@Test
public void searchJSONbug () throws IOException {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().
+ getResource("/wiki/" + i + ".json.gz").
+ getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- String json = getString(getClass().getResource("/queries/bsp-bug.jsonld").getFile());
+ String json = getString(
+ getClass().
+ getResource("/queries/bsp-bug.jsonld").
+ getFile()
+ );
- KorapResult kr = new KorapSearch(json).run(ki);
+ KorapResult kr = new KorapSearch(json).run(ki);
- assertEquals(kr.getError(0).getMessage(),
- "Number of operands is not acceptable");
+ assertEquals(
+ kr.getError(0).getMessage(),
+ "Number of operands is not acceptable"
+ );
};
+
/**
* This is a breaking test for #179
*/
@@ -964,74 +1168,88 @@
KorapIndex ki = new KorapIndex();
// Indexing test files
ki.addDocFile(
- getClass().getResource("/wiki/00002.json.gz").getFile(), true
- );
+ getClass().
+ getResource("/wiki/00002.json.gz").
+ getFile(),
+ true
+ );
ki.commit();
// Expansion bug
// der alte Digraph Aa durch Å
String json = getString(
- getClass().getResource("/queries/bugs/expansion_bug_2.jsonld").getFile()
- );
+ getClass().
+ getResource("/queries/bugs/expansion_bug_2.jsonld").
+ getFile()
+ );
KorapResult kr = new KorapSearch(json).run(ki);
assertEquals("... Buchstabe des Alphabetes. In Dänemark ist " +
- "[der alte Digraph Aa durch Å] ersetzt worden, " +
- "in Eigennamen und Ortsnamen ...",
- kr.getMatch(0).getSnippetBrackets());
+ "[der alte Digraph Aa durch Å] ersetzt worden, " +
+ "in Eigennamen und Ortsnamen ...",
+ kr.getMatch(0).getSnippetBrackets());
assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
assertEquals(kr.getTotalResults(), 1);
-
+
// der alte Digraph Aa durch []
// Works with one document
json = getString(
- getClass().getResource("/queries/bugs/expansion_bug.jsonld").getFile()
- );
+ getClass().
+ getResource("/queries/bugs/expansion_bug.jsonld").
+ getFile()
+ );
kr = new KorapSearch(json).run(ki);
assertEquals("... Buchstabe des Alphabetes. In Dänemark ist " +
- "[der alte Digraph Aa durch Å] ersetzt worden, " +
- "in Eigennamen und Ortsnamen ...",
- kr.getMatch(0).getSnippetBrackets());
+ "[der alte Digraph Aa durch Å] ersetzt worden, " +
+ "in Eigennamen und Ortsnamen ...",
+ kr.getMatch(0).getSnippetBrackets());
assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
assertEquals(kr.getTotalResults(), 1);
// Now try with one file ahead
ki = new KorapIndex();
for (String i : new String[] {"00001",
- "00002"}) {
+ "00002"}) {
ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
- );
+ getClass().
+ getResource("/wiki/" + i + ".json.gz").
+ getFile(),
+ true
+ );
};
ki.commit();
// Expansion bug
// der alte Digraph Aa durch Å
json = getString(
- getClass().getResource("/queries/bugs/expansion_bug_2.jsonld").getFile()
- );
+ getClass().
+ getResource("/queries/bugs/expansion_bug_2.jsonld").
+ getFile()
+ );
kr = new KorapSearch(json).run(ki);
assertEquals("... Buchstabe des Alphabetes. In Dänemark ist " +
- "[der alte Digraph Aa durch Å] ersetzt worden, " +
- "in Eigennamen und Ortsnamen ...",
- kr.getMatch(0).getSnippetBrackets());
+ "[der alte Digraph Aa durch Å] ersetzt worden, " +
+ "in Eigennamen und Ortsnamen ...",
+ kr.getMatch(0).getSnippetBrackets());
assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
assertEquals(kr.getTotalResults(), 1);
// der alte Digraph Aa durch []
json = getString(
- getClass().getResource("/queries/bugs/expansion_bug.jsonld").getFile()
- );
+ getClass().
+ getResource("/queries/bugs/expansion_bug.jsonld").
+ getFile()
+ );
kr = new KorapSearch(json).run(ki);
assertEquals("... Buchstabe des Alphabetes. In Dänemark ist " +
- "[der alte Digraph Aa durch Å] ersetzt worden, " +
- "in Eigennamen und Ortsnamen ...",
- kr.getMatch(0).getSnippetBrackets());
+ "[der alte Digraph Aa durch Å] ersetzt worden, " +
+ "in Eigennamen und Ortsnamen ...",
+ kr.getMatch(0).getSnippetBrackets());
assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
assertEquals(kr.getTotalResults(), 1);
};
@@ -1039,43 +1257,45 @@
/*
This test will crash soon - it's just here for nostalgic reasons!
- */
+ */
@Test
public void getFoundryDistribution () throws Exception {
-
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001",
- "00002",
- "00003",
- "00004",
- "00005",
- "00006",
- "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ getClass().
+ getResource("/wiki/" + i + ".json.gz").
+ getFile(),
+ true
);
- };
- ki.commit();
+ };
+ ki.commit();
- KorapCollection kc = new KorapCollection(ki);
+ KorapCollection kc = new KorapCollection(ki);
- assertEquals(7, kc.numberOf("documents"));
+ assertEquals(7, kc.numberOf("documents"));
HashMap map = kc.getTermRelation("foundries");
- assertEquals((long) 7, map.get("-docs"));
- assertEquals((long) 7, map.get("treetagger"));
- assertEquals((long) 6, map.get("opennlp/morpho"));
- assertEquals((long) 6, map.get("#__opennlp/morpho:###:treetagger"));
- assertEquals((long) 7, map.get("#__opennlp:###:treetagger"));
+ assertEquals((long) 7, map.get("-docs"));
+ assertEquals((long) 7, map.get("treetagger"));
+ assertEquals((long) 6, map.get("opennlp/morpho"));
+ assertEquals((long) 6, map.get("#__opennlp/morpho:###:treetagger"));
+ assertEquals((long) 7, map.get("#__opennlp:###:treetagger"));
};
+
@Test
public void getTextClassDistribution () throws Exception {
-
- KorapIndex ki = new KorapIndex();
- ki.addDoc(
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(
"{" +
" \"fields\" : [" +
" { \"primaryData\" : \"abc\" },{" +
@@ -1085,9 +1305,10 @@
" [ \"s:b\", \"i:b\", \"_1#1-2\" ]," +
" [ \"s:c\", \"i:c\", \"_2#2-3\" ]]}]," +
" \"textClass\" : \"music entertainment\"" +
-"}");
+"}"
+ );
- ki.addDoc(
+ ki.addDoc(
"{" +
" \"fields\" : [" +
" { \"primaryData\" : \"abc\" },{" +
@@ -1097,9 +1318,10 @@
" [ \"s:b\", \"i:b\", \"_1#1-2\" ]," +
" [ \"s:c\", \"i:c\", \"_2#2-3\" ]]}]," +
" \"textClass\" : \"music singing\"" +
-"}");
+"}"
+ );
- ki.addDoc(
+ ki.addDoc(
"{" +
" \"fields\" : [" +
" { \"primaryData\" : \"abc\" },{" +
@@ -1109,35 +1331,35 @@
" [ \"s:b\", \"i:b\", \"_1#1-2\" ]," +
" [ \"s:c\", \"i:c\", \"_2#2-3\" ]]}]," +
" \"textClass\" : \"music entertainment jumping\"" +
-"}");
- ki.commit();
+"}"
+ );
+ ki.commit();
-
- KorapCollection kc = new KorapCollection(ki);
- assertEquals(3, kc.numberOf("documents"));
+ KorapCollection kc = new KorapCollection(ki);
+ assertEquals(3, kc.numberOf("documents"));
HashMap map = kc.getTermRelation("textClass");
- assertEquals((long) 1, map.get("singing"));
- assertEquals((long) 1, map.get("jumping"));
- assertEquals((long) 3, map.get("music"));
- assertEquals((long) 2, map.get("entertainment"));
- assertEquals((long) 3, map.get("-docs"));
- assertEquals((long) 2, map.get("#__entertainment:###:music"));
- assertEquals((long) 1, map.get("#__entertainment:###:jumping"));
- assertEquals((long) 0, map.get("#__entertainment:###:singing"));
- assertEquals((long) 0, map.get("#__jumping:###:singing"));
- assertEquals((long) 1, map.get("#__jumping:###:music"));
- assertEquals((long) 1, map.get("#__music:###:singing"));
- assertEquals(11, map.size());
-
- // System.err.println(kc.getTermRelationJSON("textClass"));
+ assertEquals((long) 1, map.get("singing"));
+ assertEquals((long) 1, map.get("jumping"));
+ assertEquals((long) 3, map.get("music"));
+ assertEquals((long) 2, map.get("entertainment"));
+ assertEquals((long) 3, map.get("-docs"));
+ assertEquals((long) 2, map.get("#__entertainment:###:music"));
+ assertEquals((long) 1, map.get("#__entertainment:###:jumping"));
+ assertEquals((long) 0, map.get("#__entertainment:###:singing"));
+ assertEquals((long) 0, map.get("#__jumping:###:singing"));
+ assertEquals((long) 1, map.get("#__jumping:###:music"));
+ assertEquals((long) 1, map.get("#__music:###:singing"));
+ assertEquals(11, map.size());
+
+ // System.err.println(kc.getTermRelationJSON("textClass"));
};
+
@Test
public void getTextClassDistribution2 () throws Exception {
-
- KorapIndex ki = new KorapIndex();
- ki.addDoc(
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(
"{" +
" \"fields\" : [" +
" { \"primaryData\" : \"abc\" },{" +
@@ -1147,10 +1369,10 @@
" [ \"s:b\", \"i:b\", \"_1#1-2\" ]," +
" [ \"s:c\", \"i:c\", \"_2#2-3\" ]]}]," +
" \"textClass\" : \"\"" +
-"}");
-
- ki.commit();
- ki.addDoc(
+"}"
+ );
+ ki.commit();
+ ki.addDoc(
"{" +
" \"fields\" : [" +
" { \"primaryData\" : \"abc\" },{" +
@@ -1160,10 +1382,11 @@
" [ \"s:b\", \"i:b\", \"_1#1-2\" ]," +
" [ \"s:c\", \"i:c\", \"_2#2-3\" ]]}]," +
" \"textClass\" : \"music entertainment\"" +
-"}");
+"}"
+ );
- ki.commit();
- ki.addDoc(
+ ki.commit();
+ ki.addDoc(
"{" +
" \"fields\" : [" +
" { \"primaryData\" : \"abc\" },{" +
@@ -1173,9 +1396,10 @@
" [ \"s:b\", \"i:b\", \"_1#1-2\" ]," +
" [ \"s:c\", \"i:c\", \"_2#2-3\" ]]}]," +
" \"textClass\" : \"music singing\"" +
-"}");
+"}"
+ );
- ki.addDoc(
+ ki.addDoc(
"{" +
" \"fields\" : [" +
" { \"primaryData\" : \"abc\" },{" +
@@ -1185,25 +1409,25 @@
" [ \"s:b\", \"i:b\", \"_1#1-2\" ]," +
" [ \"s:c\", \"i:c\", \"_2#2-3\" ]]}]," +
" \"textClass\" : \"music entertainment jumping\"" +
-"}");
- ki.commit();
+"}"
+ );
+ ki.commit();
-
- KorapCollection kc = new KorapCollection(ki);
- assertEquals(4, kc.numberOf("documents"));
+ KorapCollection kc = new KorapCollection(ki);
+ assertEquals(4, kc.numberOf("documents"));
HashMap map = kc.getTermRelation("textClass");
- assertEquals((long) 1, map.get("singing"));
- assertEquals((long) 1, map.get("jumping"));
- assertEquals((long) 3, map.get("music"));
- assertEquals((long) 2, map.get("entertainment"));
- assertEquals((long) 4, map.get("-docs"));
- assertEquals((long) 2, map.get("#__entertainment:###:music"));
- assertEquals((long) 1, map.get("#__entertainment:###:jumping"));
- assertEquals((long) 0, map.get("#__entertainment:###:singing"));
- assertEquals((long) 0, map.get("#__jumping:###:singing"));
- assertEquals((long) 1, map.get("#__jumping:###:music"));
- assertEquals((long) 1, map.get("#__music:###:singing"));
- assertEquals(11, map.size());
+ assertEquals((long) 1, map.get("singing"));
+ assertEquals((long) 1, map.get("jumping"));
+ assertEquals((long) 3, map.get("music"));
+ assertEquals((long) 2, map.get("entertainment"));
+ assertEquals((long) 4, map.get("-docs"));
+ assertEquals((long) 2, map.get("#__entertainment:###:music"));
+ assertEquals((long) 1, map.get("#__entertainment:###:jumping"));
+ assertEquals((long) 0, map.get("#__entertainment:###:singing"));
+ assertEquals((long) 0, map.get("#__jumping:###:singing"));
+ assertEquals((long) 1, map.get("#__jumping:###:music"));
+ assertEquals((long) 1, map.get("#__music:###:singing"));
+ assertEquals(11, map.size());
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/server/TestResource.java b/src/test/java/de/ids_mannheim/korap/server/TestResource.java
index c6014f7..de1a572 100644
--- a/src/test/java/de/ids_mannheim/korap/server/TestResource.java
+++ b/src/test/java/de/ids_mannheim/korap/server/TestResource.java
@@ -108,11 +108,8 @@
queryParam("uid", "4").
request("application/json").
post(Entity.json(json), KorapResponse.class);
- /*
- assertEquals(2, kresp.getTotalResults());
- */
- fail("totalResults should be implemented in KorapResponse" +
- " or KorapResult should be used here");
+
+ assertEquals(2, kresp.getTotalResults());
assertFalse(kresp.hasErrors());
assertFalse(kresp.hasWarnings());
assertFalse(kresp.hasMessages());