Return json request on response
diff --git a/CHANGES b/CHANGES
index e9fbe04..2060909 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,6 @@
+0.30.3 2014-02-20
+ - Return json request in response if given (diewald)
+
0.30.2 2014-02-14
- [bugfix] wrapper uses correct distance constraint
diff --git a/pom.xml b/pom.xml
index b0c9fc9..caeff94 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,7 +11,7 @@
-->
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-lucene-index</artifactId>
- <version>0.30.2</version>
+ <version>0.30.3</version>
<packaging>jar</packaging>
<name>KorAP-lucene-index</name>
@@ -86,6 +86,14 @@
<version>1.3</version>
</dependency>
+
+ <!-- getopt -->
+ <dependency>
+ <groupId>gnu.getopt</groupId>
+ <artifactId>java-getopt</artifactId>
+ <version>1.0.13</version>
+ </dependency>
+
<!-- JCache -->
<dependency>
<groupId>net.sf.jsr107cache</groupId>
@@ -136,37 +144,45 @@
</plugin>
<!--
- install the indexer
+ install the indexer and the performancetests
$ mvn clean compile assembly:single
Then run e.g.
- $ java -jar target/KorAP-lucene-index-X.XX-jar-with-dependencies.jar src/test/resources/wiki/
- or
- $ java -jar target/KorAP-lucene-index-X.XX-jar-with-dependencies.jar /home/ndiewald/Repositories/korap/KorAP-modules/KorAP-lucene-index/sandbox/toindex/A00
- $ java -jar target/KorAP-lucene-index-0.21-jar-with-dependencies.jar /home/ndiewald/Repositories/KorAP/KorAP-modules/KorAP-lucene-index/sandbox/toindex/A00
-
+ $ java -jar target/KorAP-lucene-index-0.30.2-jar-with-dependencies.jar src/main/resources/korap.conf /data/hdd/lucene-new/WPD/
-->
+
<plugin>
- <artifactId>maven-assembly-plugin</artifactId>
- <configuration>
- <archive>
- <manifest>
- <mainClass>de.ids_mannheim.korap.KorapIndexer</mainClass>
- </manifest>
- </archive>
- <descriptorRefs>
- <descriptorRef>jar-with-dependencies</descriptorRef>
- </descriptorRefs>
- </configuration>
- <executions>
- <execution>
- <id>make-assembly</id>
- <phase>package</phase>
- <goals>
- <goal>single</goal>
- </goals>
- </execution>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.2-beta-5</version>
+ <configuration>
+ <!--
+ <finalName>KorapTools</finalName>
+ -->
+ <appendAssemblyId>false</appendAssemblyId>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <executions>
+
+ <execution>
+ <id>KorapIndexer</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ <configuration>
+ <finalName>KorapIndexer</finalName>
+ <archive>
+ <manifest>
+ <mainClass>de.ids_mannheim.korap.KorapIndexer</mainClass>
+ </manifest>
+ </archive>
+ </configuration>
+ </execution>
</executions>
</plugin>
+
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
@@ -174,6 +190,7 @@
<configuration>
<excludes>
<exclude>**/TestRealIndex.java</exclude>
+ <exclude>**/benchmark/*</exclude>
</excludes>
</configuration>
</plugin>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndexer.java b/src/main/java/de/ids_mannheim/korap/KorapIndexer.java
index 1bdfb8d..b935db8 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndexer.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndexer.java
@@ -12,25 +12,22 @@
int count;
int commitCount;
+ // Init logger
private final static Logger log = LoggerFactory.getLogger(KorapIndexer.class);
- public KorapIndexer () throws IOException {
+ public KorapIndexer(Properties prop) throws IOException {
+ this.indexDir = prop.getProperty("lucene.indexDir");
- Properties prop = new Properties();
-
- ClassLoader classLoader = getClass().getClassLoader();
- InputStream fr = classLoader.getResourceAsStream("korap.conf");
-
- prop.load(fr);
-
- this.indexDir = prop.getProperty("lucene.index");
- String commitCount = prop.getProperty("lucene.index.commit.count", "1000");
+ System.out.println("Index to " + this.indexDir);
+
+ String commitCount = prop.getProperty("lucene.index.commit.count", "1000");
this.index = new KorapIndex(new MMapDirectory(new File(indexDir)));
- this.count = 0;
- this.commitCount = Integer.parseInt(commitCount);
+ this.count = 0;
+ this.commitCount = Integer.parseInt(commitCount);
};
+
public void parse (File dir) {
for (String file : dir.list()) {
if (file.matches("^[^\\.].+?\\.json\\.gz$")) {
@@ -49,6 +46,7 @@
};
};
+
public void commit () {
System.out.println("-----");
System.out.print(" Commit ... ");
@@ -61,25 +59,28 @@
System.out.println("done.");
};
- public static void main(String[] args) throws IOException {
- KorapIndexer ki = new KorapIndexer();
+ public static void main (String[] argv) throws IOException {
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(argv[0]);
+ prop.load(fr);
+ KorapIndexer ki = new KorapIndexer(prop);
System.out.println();
- for (String arg : args) {
- File f = new File( arg );
- if (f.isDirectory()) {
- ki.parse(f);
- };
- };
+ for (String arg : Arrays.copyOfRange(argv, 1, argv.length)) {
+ File f = new File(arg);
+ if (f.isDirectory())
+ ki.parse(f);
+ };
- // Final commit
- ki.commit();
- // Finish indexing
- System.out.println("-----");
- System.out.println(" Indexed " + ki.count + " files.");
- System.out.println();
+ // Final commit
+ ki.commit();
+
+ // Finish indexing
+ System.out.println("-----");
+ System.out.println(" Indexed " + ki.count + " files.");
+ System.out.println();
};
-};
\ No newline at end of file
+};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapResult.java b/src/main/java/de/ids_mannheim/korap/KorapResult.java
index 6b05e36..4c22cd7 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapResult.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapResult.java
@@ -38,6 +38,8 @@
benchmarkHitCounter = "0";
private String error = null;
+ private JsonNode request;
+
// Logger
// This is KorapMatch instead of KorapResult!
private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
@@ -129,6 +131,14 @@
this.error = msg;
};
+ public void setRequest (JsonNode request) {
+ this.request = request;
+ };
+
+ public JsonNode getRequest () {
+ return this.request;
+ };
+
public void setBenchmarkSearchResults (long t1, long t2) {
this.benchmarkSearchResults = (t2 - t1) * 1e-6 + " ms";
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index 30895fe..8843e31 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
@@ -34,6 +34,8 @@
private KorapIndex index;
private String error;
+ private JsonNode request;
+
public KorapSearchContext leftContext, rightContext;
{
@@ -99,12 +101,12 @@
public KorapSearch (String jsonString) {
ObjectMapper mapper = new ObjectMapper();
try {
- JsonNode json = mapper.readValue(jsonString, JsonNode.class);
-
+ this.request = mapper.readValue(jsonString, JsonNode.class);
+
// "query" value
- if (json.has("query")) {
+ if (this.request.has("query")) {
try {
- this.query = new KorapQuery("tokens").fromJSON(json.get("query")).toQuery();
+ this.query = new KorapQuery("tokens").fromJSON(this.request.get("query")).toQuery();
}
catch (QueryException q) {
this.error = q.getMessage();
@@ -115,12 +117,12 @@
};
// "meta" virtual collections
- if (json.has("collections"))
+ if (this.request.has("collections"))
this.setCollection(new KorapCollection(jsonString));
if (this.error == null) {
- if (json.has("meta")) {
- JsonNode meta = json.get("meta");
+ if (this.request.has("meta")) {
+ JsonNode meta = this.request.get("meta");
// Defined count
if (meta.has("count"))
@@ -178,6 +180,10 @@
return this.query;
};
+ public JsonNode getRequest () {
+ return this.request;
+ };
+
public KorapSearch setQuery (SpanQueryWrapperInterface sqwi) {
this.query = sqwi.toQuery();
return this;
@@ -274,6 +280,7 @@
public KorapResult run (KorapIndex ki) {
if (this.query == null) {
KorapResult kr = new KorapResult();
+ kr.setRequest(this.request);
if (this.error != null)
kr.setError(this.error);
else
@@ -283,11 +290,14 @@
if (this.error != null) {
KorapResult kr = new KorapResult();
+ kr.setRequest(this.request);
kr.setError(this.error);
return kr;
};
this.getCollection().setIndex(ki);
- return ki.search(this.getCollection(), this);
+ KorapResult kr = ki.search(this.getCollection(), this);
+ kr.setRequest(this.request);
+ return kr;
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkElementSpans.java b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkElementSpans.java
new file mode 100644
index 0000000..e7d11a3
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkElementSpans.java
@@ -0,0 +1,131 @@
+package de.ids_mannheim.korap.benchmark;
+
+import java.util.*;
+import java.io.*;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.KorapCollection;
+import de.ids_mannheim.korap.KorapFilter;
+import de.ids_mannheim.korap.KorapSearch;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.KorapQuery;
+import org.apache.lucene.store.MMapDirectory;
+import de.ids_mannheim.korap.filter.BooleanFilter;
+import org.apache.lucene.search.spans.SpanQuery;
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapperInterface;
+import de.ids_mannheim.korap.util.QueryException;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestBenchmarkElementSpans {
+
+ @Test
+ public void checkspans1 () throws IOException {
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
+
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
+
+ // Construct filter generator
+ // KorapFilter kf = new KorapFilter();
+
+ // The virtual collection consists of all documents that have
+ // the textClasses "reisen" and "freizeit"
+ // kc.filter( kf.and("textClass", "reisen").and("textClass", "freizeit-unterhaltung") );
+
+ // This is real slow atm - sorry
+ // kc.filter(kf.and("textClass", "kultur"));
+
+
+ // Create a query
+ // KorapQuery kq = new KorapQuery("tokens");
+
+ long t1 = 0, t2 = 0;
+ t1 = System.nanoTime();
+
+ String json = getString(getClass().getResource("/queries/bsp19.jsonld").getFile());
+
+ int rounds = 1;
+
+ for (int i = 1; i <= rounds; i++) {
+ /*
+ SpanQuery query =
+ kq.within(
+ kq.tag("xip/const:NPA"),
+ kq._(1,
+ kq.seq(
+ kq._(2, kq.seg("cnx/p:A").with("mate/m:number:sg"))
+ ).append(
+ kq.seg("opennlp/p:NN").with("tt/p:NN")
+ )
+ )
+ ).toQuery();
+ */
+ // SpanQuery query = kq.tag("s").toQuery();
+
+ KorapResult kr = new KorapSearch(json).run(ki);
+ System.err.println(kr.toJSON());
+ };
+
+ t2 = System.nanoTime();
+
+ System.out.println("It took " + ((t2 - t1) / rounds) * 1e-6 + " ms per query");
+
+
+
+ // kc = new KorapCollection("{\"meta\":[{\"@type\":\"korap:meta-filter\",\"@value\":{\"@type\":\"korap:term\",\"@field\":\"korap:field#corpusID\",\"@value\":\"A00\"}},{\"@type\":\"korap:meta-extend\",\"@value\":{\"@type\":\"korap:term\",\"@field\":\"korap:field#corpusID\",\"@value\":\"A01\"}}]}");
+
+ // kc = new KorapCollection("{\"meta\":[{\"@type\":\"korap:meta-filter\",\"@value\":{\"@type\":\"korap:term\",\"@field\":\"korap:field#corpusID\",\"@value\":\"A01\"}}]}");
+ /*
+ kc = new KorapCollection("{\"meta\":[{\"@type\":\"korap:meta-filter\",\"@value\":{\"@type\":\"korap:term\",\"@field\":\"korap:field#textClass\",\"@value\":\"reisen\"}}]}");
+ kc.setIndex(ki);
+
+ System.err.println(kc.getFilter(0).toString());
+ System.err.println(kc.numberOf("documents"));
+
+ */
+
+ // assertEquals(14, kc.numberOf("documents"));
+ };
+
+ public static String getString (String path) {
+ StringBuilder contentBuilder = new StringBuilder();
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(path));
+ String str;
+ while ((str = in.readLine()) != null) {
+ contentBuilder.append(str);
+ };
+ in.close();
+ } catch (IOException e) {
+ fail(e.getMessage());
+ }
+ return contentBuilder.toString();
+ };
+
+ public static SpanQueryWrapperInterface jsonQuery (String jsonFile) {
+ SpanQueryWrapperInterface sqwi;
+
+ try {
+ String json = getString(jsonFile);
+ sqwi = new KorapQuery("tokens").fromJSON(json);
+ }
+ catch (QueryException e) {
+ fail(e.getMessage());
+ sqwi = new KorapQuery("tokens").seg("???");
+ };
+ return sqwi;
+ };
+
+};
diff --git a/src/test/resources/queries/bsp19.jsonld b/src/test/resources/queries/bsp19.jsonld
new file mode 100644
index 0000000..5710602
--- /dev/null
+++ b/src/test/resources/queries/bsp19.jsonld
@@ -0,0 +1,46 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operation" : "operation:sequence",
+ "inOrder" : true,
+ "distances" : [ {
+ "@type" : "korap:distance",
+ "key" : "w",
+ "min" : 1,
+ "max" : 3
+ }, {
+ "@type" : "korap:distance",
+ "key" : "s",
+ "min" : 0,
+ "max" : 1
+ } ],
+ "operands" : [ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "das",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }, {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Buch",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ]
+ },
+ "meta" : {
+ "count":25,
+ "cutOff":true,
+ "context":{
+ "left":["char",110],
+ "right":["char",110]
+ },
+ "startPage":1
+ }
+}