Collection bug fix, new server endpoint established
diff --git a/CHANGES b/CHANGES
index d9bd290..a515a65 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,8 @@
+0.43 2014-09-23
+ - [cleanup] Made a lot of stuff rely on KorapResponse (diewald)
+ - [bugfix] Small but ancient collection bug fixed (diewald)
+ - [feature] Collect first matches of documents based on UIDs (diewald)
+
0.42 2014-09-19
- [cleanup] Extracted private classes from KorapMatch (diewald)
- [bugfix] Fix query rewrite in contains-queries (diewald)
diff --git a/pom.xml b/pom.xml
index faefc7b..9bee877 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,7 +24,7 @@
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-lucene-index</artifactId>
- <version>0.42</version>
+ <version>0.43</version>
<packaging>jar</packaging>
<name>KorAP-lucene-index</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapCollection.java b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
index bebd668..49b0aed 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
@@ -83,20 +83,7 @@
this.filter = new ArrayList<FilterOperation>(5);
};
- // Create a collection based on UIDs
- public KorapCollection (String ... uids) {
- this.filter = new ArrayList<FilterOperation>(5);
- BooleanFilter filter = new BooleanFilter();
- if (DEBUG)
- log.debug("UID based collection: {},{}", uids[0], uids[1]);
- filter.or("UID", uids);
- if (DEBUG)
- log.debug("UID based filter: {}", filter.toString());
- this.filter(filter);
- };
-
-
- public void fromJSON(JsonNode json) throws QueryException {
+ public void fromJSON (JsonNode json) throws QueryException {
String type = json.get("@type").asText();
if (type.equals("korap:meta-filter")) {
@@ -124,10 +111,12 @@
public KorapCollection filter (BooleanFilter filter) {
if (DEBUG)
log.trace("Added filter: {}", filter.toString());
+
if (filter == null) {
log.warn("No filter is given");
return this;
};
+
Filter f = (Filter) new QueryWrapperFilter(filter.toQuery());
if (f == null) {
log.warn("Filter can't be wrapped");
@@ -143,6 +132,16 @@
return this;
};
+ // Filter based on UIDs
+ public KorapCollection filterUIDs (String ... uids) {
+ BooleanFilter filter = new BooleanFilter();
+ filter.or("UID", uids);
+ if (DEBUG)
+ log.debug("UID based filter: {}", filter.toString());
+ return this.filter(filter);
+ };
+
+
public KorapCollection filter (KorapFilter filter) {
return this.filter(filter.toBooleanFilter());
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapFilter.java b/src/main/java/de/ids_mannheim/korap/KorapFilter.java
index f9052d9..17a5dd4 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapFilter.java
@@ -88,14 +88,10 @@
filter = new BooleanFilter();
};
-
public KorapFilter (JsonNode json) throws QueryException {
filter = this.fromJSON(json, "tokens");
};
-
- /*
- */
/*
String type = json.get("@type").asText();
String field = _getField(json);
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index f99da41..d95919f 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -1079,9 +1079,10 @@
this.termContexts = new HashMap<Term, TermContext>();
+ // Get the spanquery from the KorapSearch object
SpanQuery query = ks.getQuery();
- // Get the field of textual data and annotations
+ // Get the field of textual data and annotations ("tokens")
String field = query.getField();
// Todo: Make kr subclassing ks - so ks has a method for a new KorapResult!
@@ -1092,36 +1093,37 @@
ks.getContext()
);
+ // Set version info to result
if (this.getVersion() != null)
kr.setVersion(this.getVersion());
+ // The following fields should be lifted for matches
HashSet<String> fieldsToLoadLocal = new HashSet<>(fieldsToLoad);
fieldsToLoadLocal.add(field);
- int i = 0;
- long t1 = 0,
- t2 = 0;
- int startIndex = kr.getStartIndex();
- int count = kr.getItemsPerPage();
- int hits = kr.itemsPerPage() + startIndex;
- int limit = ks.getLimit();
- boolean cutoff = ks.doCutOff();
- short itemsPerResource = ks.getItemsPerResource();
+ // Some initializations ...
+ int i = 0,
+ startIndex = kr.getStartIndex(),
+ count = kr.getItemsPerPage(),
+ hits = kr.itemsPerPage() + startIndex,
+ limit = ks.getLimit(),
+ itemsPerResourceCounter = 0;
+ boolean cutoff = ks.doCutOff();
+ short itemsPerResource = ks.getItemsPerResource();
// Check if there is work to do at all
if (limit > 0) {
if (hits > limit)
hits = limit;
- // Nah - nothing to do! \o/
+ // Nah - nothing to do! Let's go shopping!
if (limit < startIndex)
return kr;
};
+ // Collect matches from atomic readers
ArrayList<KorapMatch> atomicMatches = new ArrayList<KorapMatch>(kr.itemsPerPage());
- int itemsPerResourceCounter = 0;
-
try {
// Rewrite query (for regex and wildcard queries)
@@ -1131,6 +1133,9 @@
query = (SpanQuery) rewrittenQuery;
};
+ // See: http://www.ibm.com/developerworks/java/library/j-benchmark1/index.html
+ long t1 = System.nanoTime();
+
for (AtomicReaderContext atomic : this.reader().leaves()) {
int oldLocalDocID = -1;
@@ -1150,16 +1155,13 @@
// TODO: Get document information from Cache!
- // See: http://www.ibm.com/developerworks/java/library/j-benchmark1/index.html
- t1 = System.nanoTime();
-
- for (; i < hits; i++) {
+ for (; i < hits;i++) {
if (DEBUG)
log.trace("Match Nr {}/{}", i, count);
// There are no more spans to find
- if (spans.next() != true)
+ if (!spans.next())
break;
int localDocID = spans.doc();
@@ -1218,12 +1220,6 @@
atomicMatches.add(match);
};
- // Benchmark till now
- if (kr.getBenchmarkSearchResults() == null) {
- t2 = System.nanoTime();
- kr.setBenchmarkSearchResults(t1, t2);
- };
-
// Can be disabled TEMPORARILY
while (!cutoff && spans.next()) {
if (limit > 0 && i >= limit)
@@ -1232,17 +1228,23 @@
// Count hits per resource
if (itemsPerResource > 0) {
int localDocID = spans.doc();
+
+ if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+ break;
// IDS are identical
if (localDocID == oldLocalDocID || oldLocalDocID == -1) {
- if (itemsPerResourceCounter++ >= itemsPerResource)
+ if (localDocID == -1)
+ break;
+
+ if (itemsPerResourceCounter++ >= itemsPerResource) {
if (spans.skipTo(localDocID + 1) != true) {
break;
- }
- else {
- itemsPerResourceCounter = 1;
- localDocID = spans.doc();
};
+ itemsPerResourceCounter = 1;
+ localDocID = spans.doc();
+ // continue;
+ };
}
// Reset counter
@@ -1251,17 +1253,12 @@
oldLocalDocID = localDocID;
};
-
i++;
};
atomicMatches.clear();
};
- t1 = System.nanoTime();
- kr.setBenchmarkHitCounter(t2, t1);
- if (kr.getBenchmarkSearchResults() == null) {
- kr.setBenchmarkSearchResults(t2, t1);
- };
+ kr.setBenchmark(t1, System.nanoTime());
if (itemsPerResource > 0)
kr.setItemsPerResource(itemsPerResource);
@@ -1269,7 +1266,7 @@
kr.setTotalResults(cutoff ? -1 : i);
}
catch (IOException e) {
- kr.setError("There was an IO error");
+ kr.setError(600, e.getLocalizedMessage());
log.warn( e.getLocalizedMessage() );
};
@@ -1379,11 +1376,11 @@
};
};
- mc.setBenchmarkHitCounter(System.nanoTime(), t1);
+ mc.setBenchmark(t1, System.nanoTime());
}
catch (IOException e) {
- mc.setError("There was an IO error");
- log.warn( e.getLocalizedMessage() );
+ mc.setError(600, e.getLocalizedMessage());
+ log.warn(e.getLocalizedMessage());
};
mc.commit();
diff --git a/src/main/java/de/ids_mannheim/korap/KorapResult.java b/src/main/java/de/ids_mannheim/korap/KorapResult.java
index fd70547..852710c 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapResult.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapResult.java
@@ -1,12 +1,16 @@
package de.ids_mannheim.korap;
-import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.*;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.databind.node.ObjectNode;
+
import de.ids_mannheim.korap.index.PositionsToOffset;
import de.ids_mannheim.korap.index.SearchContext;
+import de.ids_mannheim.korap.server.KorapResponse;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -16,16 +20,18 @@
/*
TODO: Reuse the KorapSearch code for data serialization!
*/
-
-public class KorapResult {
+@JsonInclude(Include.NON_NULL)
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class KorapResult extends KorapResponse {
ObjectMapper mapper = new ObjectMapper();
+ @JsonIgnore
public static final short ITEMS_PER_PAGE = 25;
+
private String query;
private List<KorapMatch> matches;
- private int totalResults = 0;
private int startIndex = 0;
private SearchContext context;
@@ -37,7 +43,6 @@
benchmarkHitCounter;
private String error = null;
private String warning = null;
- private String version;
private JsonNode request;
@@ -46,7 +51,9 @@
private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
// Empty result
- public KorapResult() {}
+ public KorapResult() {
+ mapper.enable(SerializationFeature.INDENT_OUTPUT);
+ };
public KorapResult(String query,
int startIndex,
@@ -55,7 +62,7 @@
mapper.enable(SerializationFeature.INDENT_OUTPUT);
// mapper.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS);
- mapper.disable(SerializationFeature.WRITE_NULL_MAP_VALUES);
+ // mapper.disable(SerializationFeature.WRITE_NULL_MAP_VALUES);
this.matches = new ArrayList<>(itemsPerPage);
this.query = query;
@@ -97,37 +104,11 @@
return km;
}
-
- public void setTotalResults(int i) {
- this.totalResults = i;
- }
-
-
- public int getTotalResults() {
- return this.totalResults;
- }
-
-
@Deprecated
public int totalResults() {
- return this.totalResults;
+ return this.getTotalResults();
}
-
- @JsonIgnore
- public void setVersion(String version) {
- this.version = version;
- }
-
-
- @JsonIgnore
- public String getVersion() {
- if (this.version == null)
- return null;
- return "lucene-backend-" + this.version;
- }
-
-
public short getItemsPerPage() {
return this.itemsPerPage;
}
@@ -158,7 +139,11 @@
this.warning += "; " + warning;
};
+ public void setWarning (String warning) {
+ this.warning = warning;
+ };
+ @JsonIgnore
public void setRequest(JsonNode request) {
this.request = request;
}
@@ -168,25 +153,33 @@
return this.request;
}
-
+ /*
+ @JsonIgnore
public void setBenchmarkSearchResults(long t1, long t2) {
this.benchmarkSearchResults =
(t2 - t1) < 100_000_000 ? (((double) (t2 - t1) * 1e-6) + " ms") :
(((double) (t2 - t1) / 1000000000.0) + " s");
- }
+ };
+ public void setBenchmarkSearchResults(String bm) {
+ this.benchmarkSearchResults = bm;
+ };
public String getBenchmarkSearchResults() {
return this.benchmarkSearchResults;
}
+ */
-
+ @JsonIgnore
public void setBenchmarkHitCounter(long t1, long t2) {
this.benchmarkHitCounter =
(t2 - t1) < 100_000_000 ? (((double) (t2 - t1) * 1e-6) + " ms") :
(((double) (t2 - t1) / 1000000000.0) + " s");
- }
+ };
+ public void setBenchmarkHitCounter(String bm) {
+ this.benchmarkHitCounter = bm;
+ };
public String getBenchmarkHitCounter() {
return this.benchmarkHitCounter;
@@ -196,22 +189,25 @@
this.itemsPerResource = value;
};
- @JsonIgnore
+ public void setItemsPerResource (int value) {
+ this.itemsPerResource = (short) value;
+ };
+
+ // @JsonIgnore
public short getItemsPerResource () {
return this.itemsPerResource;
};
-
public String getQuery() {
return this.query;
}
-
+ @JsonIgnore
public KorapMatch getMatch(int index) {
return this.matches.get(index);
}
-
+ @JsonIgnore
public List<KorapMatch> getMatches() {
return this.matches;
}
@@ -228,6 +224,7 @@
}
+ @JsonIgnore
public KorapResult setContext(SearchContext context) {
this.context = context;
return this;
@@ -250,8 +247,8 @@
if (this.itemsPerResource > 0)
json.put("itemsPerResource", this.itemsPerResource);
- if (this.version != null)
- json.put("version", this.version);
+ if (this.getVersion() != null)
+ json.put("version", this.getVersion());
try {
return mapper.writeValueAsString(json);
@@ -261,7 +258,5 @@
return "{}";
- }
-
-
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java b/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
index 8c2faaf..e1e93e3 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
@@ -1,9 +1,32 @@
package de.ids_mannheim.korap.index;
import de.ids_mannheim.korap.KorapMatch;
+import de.ids_mannheim.korap.server.KorapResponse;
import java.util.*;
-public interface MatchCollector {
- public void add (int uniqueDocID, int matchcount);
+public class MatchCollector extends KorapResponse {
+ public int totalResultDocs = 0;
+
+ public void add (int uniqueDocID, int matchcount) {
+ this.totalResultDocs++;
+ this.incrTotalResults(matchcount);
+ };
+
+ public MatchCollector setTotalResultDocs (int i) {
+ this.totalResultDocs = i;
+ return this;
+ };
+
+ public MatchCollector incrTotalResultDocs (int i) {
+ this.totalResultDocs += i;
+ return this;
+ };
+
+ public int getTotalResultDocs () {
+ return totalResultDocs;
+ };
+
+ public void commit() {};
+
/*
* The following methods are shared and should be used from KorapResult
@@ -11,11 +34,4 @@
* getQueryHash
* getNode
*/
-
- public void setError(String s);
- public void setBenchmarkHitCounter(long t1, long t2);
- public int getMatchCount ();
- public int getDocumentCount ();
- public String toJSON();
- public void commit();
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorDB.java b/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorDB.java
index 9c35b6f..b7d212e 100644
--- a/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorDB.java
+++ b/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorDB.java
@@ -3,20 +3,15 @@
import de.ids_mannheim.korap.index.MatchCollector;
import java.util.*;
-public class MatchCollectorDB implements MatchCollector {
+public class MatchCollectorDB extends MatchCollector {
/*
Todo: In case there are multiple threads searching,
the list should be synchrinized Collections.synchronizedList()
*/
-
- private String error;
- private int doccount = 0;
- private int matchcount = 0;
- private int doccollect = 0;
-
private List matchCollector;
private int bufferSize;
+ private int doccollect;
private String tableName;
@@ -33,33 +28,13 @@
* Add matches till the bufferSize exceeds - then commit to the database.
*/
public void add (int uniqueDocID, int matchcount) {
- this.doccount++;
- this.matchcount += matchcount;
+ this.incrTotalResultDocs(1);
+ this.incrTotalResults(matchcount);
this.matchCollector.add(new int[]{uniqueDocID, matchcount});
if (this.doccollect++ > bufferSize)
this.commit();
};
- public void setError(String msg) {
- this.error = msg;
- };
-
- public void setBenchmarkHitCounter(long t1, long t2) {
- };
-
- public int getMatchCount () {
- return matchcount;
- };
-
- public int getDocumentCount () {
- return doccount;
- };
-
- public String toJSON () {
- // This may also be a commit!
- return "{ \"documents\" : " + doccount + ", \"matches\" : " + matchcount + " }";
- };
-
public void commit () {
this.matchCollector.clear();
diff --git a/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorTest.java b/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorTest.java
deleted file mode 100644
index 9935919..0000000
--- a/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorTest.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package de.ids_mannheim.korap.index.collector;
-import de.ids_mannheim.korap.index.MatchCollector;
-import de.ids_mannheim.korap.KorapMatch;
-import java.util.*;
-
-public class MatchCollectorTest implements MatchCollector {
-
- private String error;
- private int doccount = 0;
- private int matchcount = 0;
-
- public void add (int uniqueDocID, int matchcount) {
- this.doccount++;
- this.matchcount += matchcount;
- };
-
- public void setError(String msg) {
- this.error = msg;
- };
-
- public void setBenchmarkHitCounter(long t1, long t2) {
- };
-
- public int getMatchCount () {
- return matchcount;
- };
-
- public int getDocumentCount () {
- return doccount;
- };
-
- public String toJSON () {
- // This is also a commit!
- return "{ \"documents\" : " + doccount + ", \"matches\" : " + matchcount + " }";
- };
-
- public void commit() {
- };
-};
diff --git a/src/main/java/de/ids_mannheim/korap/server/KorapResponse.java b/src/main/java/de/ids_mannheim/korap/server/KorapResponse.java
index 63e55bd..ccca3bd 100644
--- a/src/main/java/de/ids_mannheim/korap/server/KorapResponse.java
+++ b/src/main/java/de/ids_mannheim/korap/server/KorapResponse.java
@@ -1,4 +1,5 @@
package de.ids_mannheim.korap.server;
+
import java.util.*;
import java.io.*;
@@ -15,11 +16,14 @@
*/
@JsonInclude(Include.NON_NULL)
+@JsonIgnoreProperties(ignoreUnknown = true)
public class KorapResponse {
ObjectMapper mapper = new ObjectMapper();
private String errstr, msg, version, node;
- private int err, unstaged = 0;
+ private int err, unstaged;
+ private int totalResults = 0;
+ private String benchmark;
public KorapResponse (String node, String version) {
this.setNode(node);
@@ -28,6 +32,11 @@
public KorapResponse () {};
+ @JsonIgnore
+ public KorapResponse setError (int code, String msg) {
+ return this.setErrstr(msg).setErr(code);
+ };
+
public KorapResponse setErrstr (String msg) {
this.errstr = msg;
return this;
@@ -82,6 +91,39 @@
return this;
};
+ public KorapResponse setTotalResults (int i) {
+ this.totalResults = i;
+ return this;
+ };
+
+ public KorapResponse incrTotalResults (int i) {
+ this.totalResults += i;
+ return this;
+ };
+
+
+ public int getTotalResults() {
+ return this.totalResults;
+ };
+
+ @JsonIgnore
+ public KorapResponse setBenchmark (long t1, long t2) {
+ this.benchmark =
+ (t2 - t1) < 100_000_000 ? (((double) (t2 - t1) * 1e-6) + " ms") :
+ (((double) (t2 - t1) / 1000000000.0) + " s");
+ return this;
+ };
+
+ public KorapResponse setBenchmark (String bm) {
+ this.benchmark = bm;
+ return this;
+ };
+
+ public String getBenchmark () {
+ return this.benchmark;
+ };
+
+
// Serialize
public String toJSON () {
ObjectNode json = (ObjectNode) mapper.valueToTree(this);
diff --git a/src/main/java/de/ids_mannheim/korap/server/Resource.java b/src/main/java/de/ids_mannheim/korap/server/Resource.java
index 0577d93..94e72e5 100644
--- a/src/main/java/de/ids_mannheim/korap/server/Resource.java
+++ b/src/main/java/de/ids_mannheim/korap/server/Resource.java
@@ -169,20 +169,26 @@
// Get query parameters
MultivaluedMap<String,String> qp = uri.getQueryParameters();
- // Build Collection based on a list of uids
- KorapCollection kc = new KorapCollection(
- qp.get("uid").toArray(new String[0])
- );
+ if (qp.get("uid") != null) {
- // TODO: RESTRICT COLLECTION TO ONLY RESPECT SELF DOCS (REPLICATION)
+ // Build Collection based on a list of uids
+ List<String> uids = qp.get("uid");
+ KorapCollection kc = new KorapCollection();
+ kc.filterUIDs(uids.toArray(new String[uids.size()]));
- // Override old collection
- ks.setCollection(kc);
+ // TODO: RESTRICT COLLECTION TO ONLY RESPECT SELF DOCS (REPLICATION)
- // Only return the first match per text
- ks.setItemsPerResource(1);
+ // Override old collection
+ ks.setCollection(kc);
- return ks.run(index).toJSON();
+ // Only return the first match per text
+ ks.setItemsPerResource(1);
+
+ return ks.run(index).toJSON();
+ };
+ KorapResult kr = new KorapResult();
+ kr.setError("No UUIDs given");
+ return kr.toJSON();
};
// Response with error message
KorapResult kr = new KorapResult();
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
index ba62e86..3fa1ea9 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
@@ -17,7 +17,6 @@
import static org.junit.Assert.*;
import org.junit.Test;
-import org.junit.Ignore;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -42,10 +41,11 @@
// Create Virtual collections:
KorapCollection kc = new KorapCollection(ki);
- // The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
-
assertEquals("Documents", 7, kc.numberOf("documents"));
+ // The virtual collection consists of all documents that have
+ // the textClass "reisen" and "freizeit"
+
kc.filter( kf.and("textClass", "reisen").and("textClass", "freizeit-unterhaltung") );
assertEquals("Documents", 5, kc.numberOf("documents"));
@@ -73,7 +73,6 @@
SpanQuery query = kq.seg("opennlp/p:NN").with("tt/p:NN").toQuery();
KorapResult kr = kc.search(query);
-
assertEquals(70, kr.totalResults());
kc.extend( kf.and("textClass", "uninteresting") );
@@ -96,7 +95,13 @@
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
- for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
@@ -110,17 +115,16 @@
assertEquals("Documents", 7, kc.numberOf("documents"));
- /*
- If this is set - everything is fine automatically ...
+ // If this is set - everything is fine automatically ...
kc.filter(kf.and("corpusID", "WPD"));
assertEquals("Documents", 7, kc.numberOf("documents"));
- */
+
// The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
+
kc.filter( kf.and("textClass", "reisen").and("textClass", "freizeit-unterhaltung") );
assertEquals("Documents", 5, kc.numberOf("documents"));
-
assertEquals("Tokens", 1678, kc.numberOf("tokens"));
assertEquals("Sentences", 194, kc.numberOf("sentences"));
assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
@@ -133,6 +137,7 @@
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
+ // This is already filtered though ...
kc.filter(kf.and("corpusID", "WPD"));
assertEquals("Documents", 1, kc.numberOf("documents"));
@@ -153,12 +158,9 @@
kc.extend( kf.and("textClass", "wissenschaft") );
assertEquals("Documents", 3, kc.numberOf("documents"));
- /*
assertEquals("Tokens", 1669, kc.numberOf("tokens"));
assertEquals("Sentences", 188, kc.numberOf("sentences"));
assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
- // System.err.println(kr.toJSON());
- */
};
@@ -169,7 +171,13 @@
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
- for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
@@ -197,8 +205,6 @@
assertEquals("Sentences", 40, kc.numberOf("sentences"));
assertEquals("Paragraphs", 2, kc.numberOf("paragraphs"));
- // assertEquals("Documents", 1, kc.numberOf("documents"));
-
// Create a query
KorapQuery kq = new KorapQuery("tokens");
SpanQuery query = kq.seg("opennlp/p:NN").with("tt/p:NN").toQuery();
@@ -232,23 +238,26 @@
};
ki.commit();
- assertEquals("Documents", 7, ki.numberOf("documents"));
- assertEquals("Sentences", 281, ki.numberOf("sentences"));
+ assertEquals("Documents", 7, ki.numberOf("documents"));
+ assertEquals("Paragraphs", 174, ki.numberOf("paragraphs"));
+ assertEquals("Sentences", 281, ki.numberOf("sentences"));
+ assertEquals("Tokens", 2661, ki.numberOf("tokens"));
SpanQuery sq = new SpanTermQuery(new Term("tokens", "s:der"));
KorapResult kr = ki.search(sq, (short) 10);
assertEquals(86,kr.getTotalResults());
// Create Virtual collections:
- KorapCollection kc = new KorapCollection(new String[]{"2", "3", "4"});
+ KorapCollection kc = new KorapCollection();
+ kc.filterUIDs(new String[]{"2", "3", "4"});
kc.setIndex(ki);
assertEquals("Documents", 3, kc.numberOf("documents"));
+ assertEquals("Paragraphs", 46, kc.numberOf("paragraphs"));
+ assertEquals("Sentences", 103, kc.numberOf("sentences"));
+ assertEquals("Tokens", 1229, kc.numberOf("tokens"));
+
kr = kc.search(sq);
assertEquals(39,kr.getTotalResults());
};
};
-
-
-
-// kc.filter( kf.and("textClass", "kultur").or("textClass", "wissenschaft") );
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index c06592d..e4dccc4 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -363,6 +363,49 @@
assertEquals((short) 1, kr.getItemsPerResource());
};
+ @Test
+ public void searchJSONitemsPerResourceServer () throws IOException {
+
+ /*
+ * This test is a server-only implementation of
+ * TestResource#testCollection
+ */
+
+
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ int uid = 1;
+ for (String i : new String[] {"00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ ki.addDocFile(
+ uid++,
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ true
+ );
+ };
+ ki.commit();
+
+ String json = getString(getClass().getResource("/queries/bsp-uid-example.jsonld").getFile());
+
+ KorapSearch ks = new KorapSearch(json);
+ ks.setItemsPerResource(1);
+ KorapCollection kc = new KorapCollection();
+ kc.filterUIDs(new String[]{"1", "4"});
+ kc.setIndex(ki);
+ ks.setCollection(kc);
+
+ KorapResult kr = ks.run(ki);
+
+ assertEquals(2, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
+ };
@Test
diff --git a/src/test/java/de/ids_mannheim/korap/server/DBTest.java b/src/test/java/de/ids_mannheim/korap/server/TestDatabase.java
similarity index 93%
rename from src/test/java/de/ids_mannheim/korap/server/DBTest.java
rename to src/test/java/de/ids_mannheim/korap/server/TestDatabase.java
index 11f3a23..04e7603 100644
--- a/src/test/java/de/ids_mannheim/korap/server/DBTest.java
+++ b/src/test/java/de/ids_mannheim/korap/server/TestDatabase.java
@@ -19,7 +19,7 @@
import org.junit.Test;
import static org.junit.Assert.assertEquals;
-public class DBTest {
+public class TestDatabase {
private Connection conn;
private Statement stat;
@@ -73,8 +73,8 @@
mc.add(9,10);
mc.add(16,90);
mc.commit();
- assertEquals(mc.getMatchCount(), 109);
- assertEquals(mc.getDocumentCount(), 4);
+ assertEquals(mc.getTotalResults(), 109);
+ assertEquals(mc.getTotalResultDocs(), 4);
};
@After
diff --git a/src/test/java/de/ids_mannheim/korap/server/ResourceTest.java b/src/test/java/de/ids_mannheim/korap/server/TestResource.java
similarity index 70%
rename from src/test/java/de/ids_mannheim/korap/server/ResourceTest.java
rename to src/test/java/de/ids_mannheim/korap/server/TestResource.java
index ee84419..08ae610 100644
--- a/src/test/java/de/ids_mannheim/korap/server/ResourceTest.java
+++ b/src/test/java/de/ids_mannheim/korap/server/TestResource.java
@@ -4,6 +4,9 @@
http://harryjoy.com/2012/09/08/simple-rest-client-in-java/
*/
import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
@@ -12,18 +15,19 @@
import org.glassfish.grizzly.http.server.HttpServer;
+import static org.junit.Assert.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-import static org.junit.Assert.assertEquals;
import java.io.FileInputStream;
import de.ids_mannheim.korap.KorapNode;
+import de.ids_mannheim.korap.KorapResult;
import de.ids_mannheim.korap.server.KorapResponse;
import static de.ids_mannheim.korap.util.KorapString.*;
-public class ResourceTest {
+public class TestResource {
private HttpServer server;
private WebTarget target;
@@ -76,12 +80,8 @@
put(Entity.json(json), KorapResponse.class);
assertEquals(kresp.getNode(), "milena");
- /*
- assertNull(kresp.getErr());
- assertNull(kresp.getErrstr());
- */
+ assertEquals(kresp.getErr(), 0);
assertEquals(kresp.getUnstaged(), unstaged++);
- assertEquals(kresp.getVersion(), "0.42");
};
KorapResponse kresp = target.path("/index").
@@ -90,4 +90,38 @@
assertEquals(kresp.getNode(), "milena");
assertEquals(kresp.getMsg(), "Unstaged data was committed");
};
+
+ @Test
+ public void testCollection() throws IOException {
+
+ String json = getString(
+ getClass().getResource("/queries/bsp-uid-example.jsonld").getFile()
+ );
+
+ KorapResponse kresp
+ = target.path("/").
+ queryParam("uid", "1").
+ queryParam("uid", "4").
+ request("application/json").
+ post(Entity.json(json), KorapResponse.class);
+
+ assertEquals(2, kresp.getTotalResults());
+ assertEquals(0, kresp.getErr());
+ };
+
+ public static String getString (String path) {
+ StringBuilder contentBuilder = new StringBuilder();
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(path));
+ String str;
+ while ((str = in.readLine()) != null) {
+ contentBuilder.append(str);
+ };
+ in.close();
+ } catch (IOException e) {
+ fail(e.getMessage());
+ }
+ return contentBuilder.toString();
+ };
+
};
diff --git a/src/test/resources/queries/bsp-uid-example.jsonld b/src/test/resources/queries/bsp-uid-example.jsonld
new file mode 100644
index 0000000..72f1c53
--- /dev/null
+++ b/src/test/resources/queries/bsp-uid-example.jsonld
@@ -0,0 +1,13 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "korap:token",
+ "wrap": {
+ "@type": "korap:term",
+ "foundry": "mate",
+ "layer" : "lemma",
+ "key":"sein",
+ "match": "match:eq"
+ }
+ }
+}