Fixed pubDate bug
diff --git a/CHANGES b/CHANGES
index fe98544..4930f2f 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,6 @@
+0.25.1 2013-12-28
+ - Support for pubDate collections.
+
0.25 2013-12-20
- Support for Wildcard Queries.
- Improved support for regular expressions.
diff --git a/pom.xml b/pom.xml
index dd12c85..a166b2f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -9,7 +9,7 @@
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-lucene-index</artifactId>
- <version>0.25</version>
+ <version>0.25.1</version>
<packaging>jar</packaging>
<name>KorAP-lucene-index</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapCollection.java b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
index 6c33c7c..1f65da2 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
@@ -11,6 +11,7 @@
import de.ids_mannheim.korap.KorapFilter;
import de.ids_mannheim.korap.util.KorapDate;
+import de.ids_mannheim.korap.util.QueryException;
import de.ids_mannheim.korap.filter.BooleanFilter;
import de.ids_mannheim.korap.filter.FilterOperation;
import org.apache.lucene.search.spans.SpanQuery;
@@ -58,6 +59,7 @@
try {
JsonNode json = mapper.readValue(jsonString, JsonNode.class);
if (json.has("meta")) {
+ log.trace("Add meta collection");
for (JsonNode meta : json.get("meta")) {
this.fromJSON(meta);
};
@@ -72,13 +74,15 @@
this.filter = new ArrayList<FilterOperation>(5);
};
- public void fromJSON(JsonNode json) {
+ public void fromJSON(JsonNode json) throws QueryException {
String type = json.get("@type").asText();
if (type.equals("korap:meta-filter")) {
+ log.trace("Add Filter");
this.filter(new BooleanFilter(json.get("@value")));
}
else if (type.equals("korap:meta-extend")) {
+ log.trace("Add Extend");
this.extend(new BooleanFilter(json.get("@value")));
};
};
@@ -134,6 +138,14 @@
};
+ public String toString () {
+ StringBuffer sb = new StringBuffer();
+ for (FilterOperation fo : this.filter) {
+ sb.append(fo.toString()).append("; ");
+ };
+ return sb.toString();
+ };
+
// DEPRECATED BUT USED IN TEST CASES
public KorapResult search (SpanQuery query) {
return this.index.search(this, query, 0, (short) 20, true, (short) 5, true, (short) 5);
@@ -231,5 +243,9 @@
return this.index.numberOf(this, "tokens", type);
};
+ public String getError () {
+ return this.error;
+ };
+
// implement "till" with rangefilter
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index 8733a38..4f55dd0 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
@@ -252,6 +252,8 @@
public KorapSearch setCollection (KorapCollection kc) {
this.collection = kc;
+ if (kc.getError() != null)
+ this.error = kc.getError();
return this;
};
diff --git a/src/main/java/de/ids_mannheim/korap/filter/BooleanFilter.java b/src/main/java/de/ids_mannheim/korap/filter/BooleanFilter.java
index 5a22198..b9bf69d 100644
--- a/src/main/java/de/ids_mannheim/korap/filter/BooleanFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/filter/BooleanFilter.java
@@ -17,6 +17,8 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
+import de.ids_mannheim.korap.util.QueryException;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -38,18 +40,19 @@
private String type;
// Logger
- private final static Logger jlog = LoggerFactory.getLogger(KorapFilter.class);
-
+ private final static Logger log = LoggerFactory.getLogger(KorapFilter.class);
private BooleanQuery bool;
+ private String error;
public BooleanFilter () {
bool = new BooleanQuery();
};
- public BooleanFilter (JsonNode json) {
+ public BooleanFilter (JsonNode json) throws QueryException {
bool = new BooleanQuery();
-
+ this.fromJSON(json, "tokens");
+ /*
String type = json.get("@type").asText();
String field = _getField(json);
@@ -62,12 +65,15 @@
this.fromJSON(operand, field);
};
};
+ */
};
- private void fromJSON (JsonNode json, String field) {
+ private void fromJSON (JsonNode json, String field) throws QueryException {
String type = json.get("@type").asText();
+ log.trace("@type: " + type);
+
if (json.has("@field"))
field = _getField(json);
@@ -82,7 +88,10 @@
String date, till;
+ log.trace("relation: " + json.get("relation").asText());
+
switch (json.get("relation").asText()) {
+
case "between":
date = _getDate(json, 0);
till = _getDate(json, 1);
@@ -107,8 +116,23 @@
if (date != null)
this.date(date);
break;
+
+ case "and":
+ if (!json.has("operands"))
+ return;
+
+ for (JsonNode operand : json.get("operands")) {
+ this.fromJSON(operand, field);
+ };
+ break;
+
+ default:
+ throw new QueryException(json.get("relation").asText() + " is not a supported relation");
};
}
+ else {
+ throw new QueryException(type + " is not a supported group");
+ };
};
private static String _getField (JsonNode json) {
@@ -232,7 +256,7 @@
);
}
catch (NumberFormatException e) {
- jlog.warn("Parameter of till(date) is invalid");
+ log.warn("Parameter of till(date) is invalid");
};
return this;
};
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index e050a57..addc5d8 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -11,7 +11,8 @@
#log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.KorapFilter = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.index.PositionsToOffset = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.analysis.MultiTermTokenStream = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
index 3dca921..87e20ff 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
@@ -16,10 +16,11 @@
public void metaQuery1 () {
String metaQuery = getString(getClass().getResource("/queries/metaquery.json").getFile());
KorapCollection kc = new KorapCollection(metaQuery);
- assertEquals(3,kc.getCount());
- assertEquals("filter with QueryWrapperFilter(+textClass:wissenschaft)",kc.getFilter(0).toString());
- assertEquals("filter with QueryWrapperFilter(+pubPlace:Erfurt +author:Hesse)",kc.getFilter(1).toString());
- assertEquals("extend with QueryWrapperFilter(+pubDate:[20110429 TO 20131231] +textClass:freizeit)",kc.getFilter(2).toString());
+
+ assertEquals("filter with QueryWrapperFilter(+textClass:wissenschaft)", kc.getFilter(0).toString());
+ assertEquals("filter with QueryWrapperFilter(+pubPlace:Erfurt +author:Hesse)", kc.getFilter(1).toString());
+ assertEquals("extend with QueryWrapperFilter(+pubDate:[20110429 TO 20131231] +textClass:freizeit)", kc.getFilter(2).toString());
+ assertEquals(3, kc.getCount());
};
@@ -31,6 +32,14 @@
assertEquals("filter with QueryWrapperFilter(+author:Hesse +pubDate:[0 TO 20131205])",kc.getFilter(0).toString());
};
+ @Test
+ public void metaQuery3 () {
+ String metaQuery = getString(getClass().getResource("/queries/metaquery4.json").getFile());
+ KorapCollection kc = new KorapCollection(metaQuery);
+ assertEquals(1,kc.getCount());
+ assertEquals("filter with QueryWrapperFilter(+pubDate:[20000101 TO 20131231])",kc.getFilter(0).toString());
+ };
+
public static String getString (String path) {
StringBuilder contentBuilder = new StringBuilder();
@@ -47,4 +56,4 @@
return contentBuilder.toString();
};
-};
\ No newline at end of file
+};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index 7e14b8f..cdb8329 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -111,13 +111,38 @@
assertEquals(5, kr.getItemsPerPage());
assertEquals(5, kr.getStartIndex());
assertEquals("... a: A ist [der klangreichste] der V ...", kr.getMatch(0).getSnippetBrackets());
+ };
- json = getString(getClass().getResource("/queries/metaquery4.json").getFile());
+ @Test
+ public void searchJSON2 () throws IOException {
- kr = new KorapSearch(json).run(ki);
- assertEquals(0, kr.getTotalResults());
- assertEquals(5, kr.getItemsPerPage());
- assertEquals(5, kr.getStartIndex());
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02035-substring", "02439", "05663-unbalanced", "07452-deep"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+
+ String json = getString(getClass().getResource("/queries/metaquery4.json").getFile());
+
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(2, kr.getTotalResults());
+
+ json = getString(getClass().getResource("/queries/metaquery5.json").getFile());
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(2, kr.getTotalResults());
+
+ System.err.println(kr.toJSON());
+
+ json = getString(getClass().getResource("/queries/metaquery6.json").getFile());
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(1, kr.getTotalResults());
};
diff --git a/src/test/resources/queries/metaquery2.json b/src/test/resources/queries/metaquery2.json
index c80b044..c1a6eda 100644
--- a/src/test/resources/queries/metaquery2.json
+++ b/src/test/resources/queries/metaquery2.json
@@ -1,89 +1,89 @@
{
- "@context": {
- "korap": "http://korap.ids-mannheim.de/ns/query",
- "@language": "de",
- "operands": {
- "@id": "korap:operands",
- "@container": "@list"
- },
- "relation": {
- "@id": "korap:relation",
- "@type": "korap:relation#types"
- },
- "class": {
- "@id": "korap:class",
- "@type": "xsd:integer"
- },
- "query": "korap:query",
- "filter": "korap:filter",
- "meta": "korap:meta"
+ "@context": {
+ "korap": "http://korap.ids-mannheim.de/ns/query",
+ "@language": "de",
+ "operands": {
+ "@id": "korap:operands",
+ "@container": "@list"
},
- "startPage" : 2,
- "count" : 5,
- "context" : {
- "left" : [ "token", 6 ],
- "right" : [ "token", 6 ]
+ "relation": {
+ "@id": "korap:relation",
+ "@type": "korap:relation#types"
},
- "query":{
- "@type":"korap:group",
- "relation":"or",
- "operands":[
- {
+ "class": {
+ "@id": "korap:class",
+ "@type": "xsd:integer"
+ },
+ "query": "korap:query",
+ "filter": "korap:filter",
+ "meta": "korap:meta"
+ },
+ "startPage" : 2,
+ "count" : 5,
+ "context" : {
+ "left" : [ "token", 6 ],
+ "right" : [ "token", 6 ]
+ },
+ "query":{
+ "@type":"korap:group",
+ "relation":"or",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:Vokal",
+ "relation":"="
+ }
+ },
+ {
+ "@type":"korap:sequence",
+ "operands":[
+ {
"@type":"korap:token",
"@value":{
- "@type":"korap:term",
- "@value":"base:Vokal",
- "relation":"="
+ "@type":"korap:term",
+ "@value":"base:der",
+ "relation":"="
}
- },
- {
- "@type":"korap:sequence",
- "operands":[
- {
- "@type":"korap:token",
- "@value":{
- "@type":"korap:term",
- "@value":"base:der",
- "relation":"="
- }
- },
- {
- "@type":"korap:token",
- "@value":{
- "@type":"korap:term",
- "@value":"mate/p:ADJD",
- "relation":"="
- }
- }
- ]
- }
- ]
- },
- "meta": [
- {
- "@type": "korap:meta-filter",
- "@value": {
- "@type": "korap:group",
- "relation": "and",
- "operands": [
- {
- "@type": "korap:term",
- "@field": "korap:field#author",
- "@value": "Hesse"
- },
- {
- "@type": "korap:group",
- "@field": "korap:field#pubDate",
- "relation": "until",
- "operands": [
- {
- "@type": "korap:date",
- "@value": "2013-12-05"
- }
- ]
- }
- ]
- }
+ },
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"mate/p:ADJD",
+ "relation":"="
}
+ }
]
-}
\ No newline at end of file
+ }
+ ]
+ },
+ "meta": [
+ {
+ "@type": "korap:meta-filter",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "and",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@field": "korap:field#author",
+ "@value": "Hesse"
+ },
+ {
+ "@type": "korap:group",
+ "@field": "korap:field#pubDate",
+ "relation": "until",
+ "operands": [
+ {
+ "@type": "korap:date",
+ "@value": "2013-12-05"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/test/resources/queries/metaquery4.json b/src/test/resources/queries/metaquery4.json
index 6b82550..33aea67 100644
--- a/src/test/resources/queries/metaquery4.json
+++ b/src/test/resources/queries/metaquery4.json
@@ -1,73 +1,56 @@
{
- "@context": {
- "korap": "http://korap.ids-mannheim.de/ns/query",
- "@language": "de",
- "operands": {
- "@id": "korap:operands",
- "@container": "@list"
- },
- "relation": {
- "@id": "korap:relation",
- "@type": "korap:relation#types"
- },
- "class": {
- "@id": "korap:class",
- "@type": "xsd:integer"
- },
- "query": "korap:query",
- "filter": "korap:filter",
- "meta": "korap:meta"
+ "@context": {
+ "korap": "http://korap.ids-mannheim.de/ns/query",
+ "@language": "de",
+ "operands": {
+ "@id": "korap:operands",
+ "@container": "@list"
},
- "startPage" : 2,
- "count" : 5,
- "context" : {
- "left" : [ "token", 3 ],
- "right" : [ "char", 6 ]
+ "relation": {
+ "@id": "korap:relation",
+ "@type": "korap:relation#types"
},
- "query":{
- "@type":"korap:group",
- "relation":"or",
- "operands":[
- {
- "@type":"korap:token",
- "@value":{
- "@type":"korap:term",
- "@value":"base:Vokal",
- "relation":"="
- }
- },
- {
- "@type":"korap:sequence",
- "operands":[
- {
- "@type":"korap:token",
- "@value":{
- "@type":"korap:term",
- "@value":"base:der",
- "relation":"="
- }
- },
- {
- "@type":"korap:token",
- "@value":{
- "@type":"korap:term",
- "@value":"mate/p:ADJA",
- "relation":"="
- }
- }
- ]
- }
- ]
- },
- "meta": [
- {
- "@type": "korap:meta-filter",
- "@id": "korap-filter#id-1223232",
- "@value": {
- "@type": "korap:term",
- "@field": "korap:field#textClass",
- "@value": "baum"
- }
- }
- ]
-}
\ No newline at end of file
+ "class": {
+ "@id": "korap:class",
+ "@type": "xsd:integer"
+ },
+ "query": "korap:query",
+ "filter": "korap:filter",
+ "meta": "korap:meta"
+ },
+ "startPage" : 1,
+ "count" : 5,
+ "context" : {
+ "left" : [ "token", 3 ],
+ "right" : [ "char", 6 ]
+ },
+ "query":{
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:lediglich",
+ "relation":"="
+ }
+ },
+ "meta": [
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "between",
+ "@field": "korap:field#pubDate",
+ "operands": [
+ {
+ "@type": "korap:date",
+ "@value": "2000-01-01"
+ },
+ {
+ "@type": "korap:date",
+ "@value": "2013-12-31"
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/test/resources/queries/metaquery5.json b/src/test/resources/queries/metaquery5.json
new file mode 100644
index 0000000..5a0cf20
--- /dev/null
+++ b/src/test/resources/queries/metaquery5.json
@@ -0,0 +1,62 @@
+{
+ "@context": {
+ "korap": "http://korap.ids-mannheim.de/ns/query",
+ "@language": "de",
+ "operands": {
+ "@id": "korap:operands",
+ "@container": "@list"
+ },
+ "relation": {
+ "@id": "korap:relation",
+ "@type": "korap:relation#types"
+ },
+ "class": {
+ "@id": "korap:class",
+ "@type": "xsd:integer"
+ },
+ "query": "korap:query",
+ "filter": "korap:filter",
+ "meta": "korap:meta"
+ },
+ "startPage" : 1,
+ "count" : 5,
+ "context" : {
+ "left" : [ "token", 3 ],
+ "right" : [ "char", 6 ]
+ },
+ "query":{
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:lediglich",
+ "relation":"="
+ }
+ },
+ "meta": [
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "and",
+ "operands": [
+ {
+ "@type": "korap:group",
+ "relation": "between",
+ "@field": "korap:field#pubDate",
+ "operands": [
+ {
+ "@type": "korap:date",
+ "@value": "2000-01-01"
+ },
+ {
+ "@type": "korap:date",
+ "@value": "2013-12-31"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/test/resources/queries/metaquery6.json b/src/test/resources/queries/metaquery6.json
new file mode 100644
index 0000000..5b80cf6
--- /dev/null
+++ b/src/test/resources/queries/metaquery6.json
@@ -0,0 +1,62 @@
+{
+ "@context": {
+ "korap": "http://korap.ids-mannheim.de/ns/query",
+ "@language": "de",
+ "operands": {
+ "@id": "korap:operands",
+ "@container": "@list"
+ },
+ "relation": {
+ "@id": "korap:relation",
+ "@type": "korap:relation#types"
+ },
+ "class": {
+ "@id": "korap:class",
+ "@type": "xsd:integer"
+ },
+ "query": "korap:query",
+ "filter": "korap:filter",
+ "meta": "korap:meta"
+ },
+ "startPage" : 1,
+ "count" : 5,
+ "context" : {
+ "left" : [ "token", 3 ],
+ "right" : [ "char", 6 ]
+ },
+ "query":{
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:lediglich",
+ "relation":"="
+ }
+ },
+ "meta": [
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "and",
+ "operands": [
+ {
+ "@type": "korap:group",
+ "relation": "between",
+ "@field": "korap:field#pubDate",
+ "operands": [
+ {
+ "@type": "korap:date",
+ "@value": "2005-01-01"
+ },
+ {
+ "@type": "korap:date",
+ "@value": "2013-12-31"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/test/resources/wiki/readme.txt b/src/test/resources/wiki/readme.txt
index 6f60d05..b0a98ed 100644
--- a/src/test/resources/wiki/readme.txt
+++ b/src/test/resources/wiki/readme.txt
@@ -1,7 +1,10 @@
-00001: freizeit-unterhaltung,reisen,wissenschaft,populaerwissenschaft
-00002: freizeit-unterhaltung,reisen
-00003: kultur,musik
-00004: wissenschaft,populaerwissenschaft
-00005: freizeit-unterhaltung,reisen
-00006: freizeit-unterhaltung,reisen (WPD)
-02439: kultur,musik,freizeit-unterhaltung,reisen (WPD)
+00001: 20050328 - freizeit-unterhaltung,reisen,wissenschaft,populaerwissenschaft
+00002: 20050328 - freizeit-unterhaltung,reisen
+00003: 20050328 - kultur,musik
+00004: 20050328 - wissenschaft,populaerwissenschaft
+00005: 20050328 - freizeit-unterhaltung,reisen
+00006: 20050328 - freizeit-unterhaltung,reisen (WPD)
+02035: 20000111 - sport ballsport
+02439: 20050328 - kultur,musik,freizeit-unterhaltung,reisen (WPD)
+05663: 20000124 - gesundheit-ernaehrung gesundheit
+07452: 20000129 - politik kommunalpolitik