Validate query and collection input
diff --git a/CHANGES b/CHANGES
index afe7b3f..770142d 100644
--- a/CHANGES
+++ b/CHANGES
@@ -12,6 +12,8 @@
- [sigh] Support more legacy APIs (diewald)
- [bugfix] Check for the existence of @type in JSON-LD groups
to avoid NullPointer (diewald)
+ - [cleanup] Validated query and collection input - limited
+ to a "don't break anyting" policy (diewald)
0.47 2014-11-05
- [feature] Support new index format with more metadata (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/KorapCollection.java b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
index 904c6ce..dcb1461 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
@@ -123,6 +123,12 @@
* Legacy API for collection filters.
*/
public void fromJSONLegacy (JsonNode json) throws QueryException {
+ if (!json.has("@type"))
+ throw new QueryException(612, "JSON-LD group has no @type attribute");
+
+ if (!json.has("@value"))
+ throw new QueryException(612, "Legacy filter need @value fields");
+
String type = json.get("@type").asText();
KorapFilter kf = new KorapFilter();
diff --git a/src/main/java/de/ids_mannheim/korap/KorapFilter.java b/src/main/java/de/ids_mannheim/korap/KorapFilter.java
index ac23ae9..9c271d9 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapFilter.java
@@ -26,56 +26,6 @@
* KorapFilter implements a simple API for creating meta queries
* constituing Virtual Collections.
*/
-
-/*
-Suche XYZ in allen Documenten in den Foundries "Treetagger" und "MATE", die entweder den Texttyp "sports" oder den Texttyp "news" haben, bis höchsten 2009 publiziert wurden und deren Autor auf den regulären Ausdruck "Peter .+?" matcht.
-
-textClass
-ID
-title
-subTitle
-author
-corpusID
-pubDate
-pubPlace
-
-Query: (corpusID=BRZ13 | corpusID=WPD) & textClass=wissenschaft
-
-{
- "@type": "korap:filter",
- "filter": {
- "@type": "korap:docGroup",
- "relation": "relation:and",
- "operands": [
- {
- "@type": "korap:docGroup",
- "relation": "relation:or",
- "operands": [
- {
- "@type": "korap:doc",
- "key": "corpusID",
- "value": "BRZ13",
- "match": "match:eq"
- },
- {
- "@type": "korap:doc",
- "key": "corpusID",
- "value": "WPD",
- "match": "match:eq"
- }
- ]
- },
- {
- "@type": "korap:doc",
- "key": "textClass",
- "value": "wissenschaft",
- "match": "match:eq"
- }
- ]
- }
-}
-*/
-
public class KorapFilter {
private BooleanFilter filter;
@@ -95,6 +45,13 @@
protected BooleanFilter fromJSON (JsonNode json, String field) throws QueryException {
BooleanFilter bfilter = new BooleanFilter();
+
+ /*
+ TODO: THIS UNFORTUNATELY BREAKS TESTS
+ if (!json.has("@type"))
+ throw new QueryException(612, "JSON-LD group has no @type attribute");
+ */
+
String type = json.get("@type").asText();
// Single filter
@@ -112,6 +69,10 @@
// Filter based on date
if (valtype.equals("type:date")) {
+
+ if (!json.has("value"))
+ throw new QueryException(612, "Dates require value fields");
+
String dateStr = json.get("value").asText();
if (json.has("match"))
match = json.get("match").asText();
@@ -146,6 +107,9 @@
// nested group
else if (type.equals("korap:docGroup")) {
+ if (!json.has("operands") || !json.get("operands").isArray())
+ throw new QueryException(612, "Groups need operands");
+
String operation = "operation:and";
if (json.has("operation"))
operation = json.get("operation").asText();
@@ -160,7 +124,7 @@
group.or(this.fromJSON(operand, field));
}
else {
- throw new QueryException(613, "Unknown docGroup operation");
+ throw new QueryException(613, "Unknown document group operation");
};
};
bfilter.and(group);
@@ -193,6 +157,9 @@
protected BooleanFilter fromJSONLegacy (JsonNode json, String field) throws QueryException {
BooleanFilter bfilter = new BooleanFilter();
+
+ if (!json.has("@type"))
+ throw new QueryException(612, "JSON-LD group has no @type attribute");
String type = json.get("@type").asText();
@@ -208,13 +175,22 @@
return bfilter;
}
else if (type.equals("korap:group")) {
- if (!json.has("relation") || !json.has("operands"))
- return bfilter;
+ if (!json.has("relation"))
+ throw new QueryException(612, "Group needs relation");
+
+ if (!json.has("operands"))
+ throw new QueryException(612, "Group needs operand list");
+
+ //return bfilter;
String dateStr, till;
+ JsonNode operands = json.get("operands");
+
+ if (!operands.isArray())
+ throw new QueryException(612, "Group needs operand list");
if (DEBUG)
- log.trace("relation: " + json.get("relation").asText());
+ log.trace("relation found {}", json.get("relation").asText());
BooleanFilter group = new BooleanFilter();
@@ -245,31 +221,34 @@
break;
case "and":
- for (JsonNode operand : json.get("operands")) {
+ if (operands.size() < 1)
+ throw new QueryException(612, "Operation needs at least two operands");
+
+ for (JsonNode operand : operands) {
group.and(this.fromJSONLegacy(operand, field));
};
bfilter.and(group);
break;
case "or":
- for (JsonNode operand : json.get("operands")) {
+ if (operands.size() < 1)
+ throw new QueryException(612, "Operation needs at least two operands");
+
+ for (JsonNode operand : operands) {
group.or(this.fromJSONLegacy(operand, field));
};
bfilter.and(group);
break;
default:
- throw new QueryException(
- json.get("relation").asText() + " is not a supported relation"
- );
+ throw new QueryException(613, "Relation is not supported");
};
}
else {
- throw new QueryException(type + " is not a supported group");
+ throw new QueryException(613, "Filter type is not a supported group");
};
return bfilter;
};
-
private static String _getFieldLegacy (JsonNode json) {
if (!json.has("@field"))
@@ -287,6 +266,10 @@
return (String) null;
JsonNode date = json.get("operands").get(index);
+
+ if (!date.has("@type"))
+ return (String) null;
+
if (!date.get("@type").asText().equals("korap:date"))
return (String) null;
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 28969a9..b7f4081 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -40,6 +40,8 @@
/*
+ TODO: Validate document import!
+
TODO: DON'T STORE THE TEXT IN THE TOKENS FIELD!
It has only to be lifted for match views!!!
diff --git a/src/main/java/de/ids_mannheim/korap/KorapQuery.java b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
index 33aa4e7..b409599 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
@@ -173,7 +173,7 @@
throw new QueryException(612, "Position needs exactly two operands");
// TODO: Check for operands
-
+ // TODO: LEGACY and not future proof
String frame = json.has("frame") ?
json.get("frame").asText() :
"frame:contains";
@@ -281,33 +281,34 @@
throw new QueryException(612, "JSON-LD group has no @type attribute");
JsonNode distances;
- if (firstDistance.get("@type").asText().equals("korap:group"))
+ if (firstDistance.get("@type").asText().equals("korap:group")) {
+ if (!firstDistance.has("operands") ||
+ !firstDistance.get("operands").isArray())
+ throw new QueryException(612, "Groups need operands");
+
distances = firstDistance.get("operands");
+ }
// Support korap distances
- else if (firstDistance.get("@type").asText().equals("korap:distance"))
- distances = json.get("distances");
-
// Support cosmas distances
- else if (firstDistance.get("@type").asText().equals("cosmas:distance"))
+ else if (
+ firstDistance.get("@type").asText().equals("korap:distance")
+ ||
+ firstDistance.get("@type").asText().equals("cosmas:distance")) {
+
distances = json.get("distances");
+ }
else
throw new QueryException(612, "No valid distances defined");
+ // Add all distance constraint to query
for (JsonNode constraint : distances) {
String unit = "w";
if (constraint.has("key"))
unit = constraint.get("key").asText();
- /*
- if (unit.equals("t"))
- throw new QueryException(
- 613,
- "Text based distances are not supported yet"
- );
- */
-
+ // There is a maximum of 100 fix
int min = 0, max = 100;
if (constraint.has("boundary")) {
Boundary b = new Boundary(constraint.get("boundary"), 0,100);
@@ -406,6 +407,13 @@
case "operation:repetition":
+ if (operands.size() != 1)
+ throw new QueryException(
+ 612,
+ "Class group expects exactly one operand in list"
+ );
+
+
int min = 0;
int max = 100;
@@ -463,18 +471,22 @@
json.get("operation").asText() +
" not supported yet");
+ if (!json.has("operands"))
+ throw new QueryException(
+ 613, "Peripheral references are not supported yet"
+ );
+
operands = json.get("operands");
- if (operands.size() == 0) {
- throw new QueryException(
- 613, "Focus with peripheral references is not supported yet"
- );
- };
+ if (!operands.isArray())
+ throw new QueryException(612, "Operation needs operand list");
+
+ if (operands.size() == 0)
+ throw new QueryException(612, "Operation needs operand list");
if (operands.size() != 1)
throw new QueryException(612, "Operation needs exactly two operands");
-
if (json.has("classRef")) {
if (json.has("classRefOp")) {
throw new QueryException(
@@ -486,8 +498,10 @@
number = json.get("classRef").get(0).asInt();
if (number > MAX_CLASS_NUM)
- throw new QueryException(613, "Class numbers limited to " + MAX_CLASS_NUM);
-
+ throw new QueryException(
+ 613,
+ "Class numbers limited to " + MAX_CLASS_NUM
+ );
}
else if (json.has("spanRef")) {
throw new QueryException(613, "Span references not supported yet");
@@ -553,17 +567,20 @@
case "korap:termGroup":
if (!json.has("operands"))
- throw new QueryException(612, "termGroup expects operands");
+ throw new QueryException(612, "Term group expects operands");
+
+ // Get operands
+ JsonNode operands = json.get("operands");
SpanSegmentQueryWrapper ssegqw = this.seg();
if (!json.has("relation"))
- throw new QueryException(612, "termGroup expects a relation");
+ throw new QueryException(612, "Term group expects a relation");
switch (json.get("relation").asText()) {
case "relation:and":
- for (JsonNode operand : json.get("operands")) {
+ for (JsonNode operand : operands) {
SpanQueryWrapper part = this._segFromJSON(operand);
if (part instanceof SpanAlterQueryWrapper) {
ssegqw.with((SpanAlterQueryWrapper) part);
@@ -583,8 +600,9 @@
return ssegqw;
case "relation:or":
+
SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
- for (JsonNode operand : json.get("operands")) {
+ for (JsonNode operand : operands) {
ssaq.or(this._segFromJSON(operand));
};
return ssaq;
@@ -635,8 +653,14 @@
break;
};
- if (isCaseInsensitive && isTerm && layer.equals("s"))
- layer = "i";
+ if (isCaseInsensitive && isTerm) {
+ if (layer.equals("s")) {
+ layer = "i";
+ }
+ else {
+ this.addWarning("Layer does not support case insensitivity");
+ };
+ };
// Ignore foundry for orth layer
if (layer.equals("s"))
diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index adb574c..1798afd 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
@@ -114,7 +114,7 @@
// virtual collections
- if (this.request.has("collections") ||
+ if (this.request.has("collection") ||
// Legacy collections
this.request.has("collections"))
this.setCollection(new KorapCollection(jsonString));
diff --git a/src/main/java/de/ids_mannheim/korap/filter/BooleanFilter.java b/src/main/java/de/ids_mannheim/korap/filter/BooleanFilter.java
index ec04226..f288abf 100644
--- a/src/main/java/de/ids_mannheim/korap/filter/BooleanFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/filter/BooleanFilter.java
@@ -23,9 +23,7 @@
/*
Todo: !not
-
-THE JSON STUFF DEFINITIVELY BELONGS INTO KORAPFILTER
-
+ THIS IS LIMITED TO PUBDATE AT THE MOMENT AND COMPLETELY LEGACY!
*/
/**
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSON.java b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSON.java
index 963ef47..32e22a3 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSON.java
@@ -46,6 +46,15 @@
assertEquals(kc.toString(), "filter with QueryWrapperFilter(+(pubDate:[19900000 TO 99999999] title:Mannheim)); ");
};
+
+ @Ignore
+ public void noCollection () {
+ String metaQuery = _getJSONString("no_collection.jsonld");
+ // TODO!!!
+ // Use KorapSearch and test
+ };
+
+
private String _getJSONString (String file) {
return getString(getClass().getResource(path + file).getFile());
};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index c2d73e2..5c91eb5 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -763,6 +763,7 @@
);
KorapSearch ks = new KorapSearch(json);
+
KorapResult kr = ks.run(ki);
assertEquals(
kr.getQuery(),
diff --git a/src/test/resources/queries/collections/no_collection.jsonld b/src/test/resources/queries/collections/no_collection.jsonld
new file mode 100644
index 0000000..6eec72e
--- /dev/null
+++ b/src/test/resources/queries/collections/no_collection.jsonld
@@ -0,0 +1,63 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "errors":[],
+ "warnings":[],
+ "announcements":[
+ "Deprecated 2014-07-24: 'min' and 'max' to be supported until 3 months from deprecation date."
+ ],
+ "meta":{
+ "startPage":1,
+ "context":"paragraph"
+ },
+ "query":{
+ "@type":"korap:group",
+ "operation":"operation:sequence",
+ "operands":[
+ {
+ "@type":"korap:group",
+ "operation":"operation:repetition",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "wrap":{
+ "@type":"korap:term",
+ "foundry":"tt",
+ "layer":"p",
+ "type":"type:regex",
+ "key":"A.*",
+ "match":"match:eq"
+ }
+ }
+ ],
+ "boundary":{
+ "@type":"korap:boundary",
+ "min":0,
+ "max":3
+ },
+ "min":0,
+ "max":3
+ },
+ {
+ "@type":"korap:token",
+ "wrap":{
+ "@type":"korap:term",
+ "foundry":"tt",
+ "layer":"p",
+ "type":"type:regex",
+ "key":"N.*",
+ "match":"match:eq"
+ }
+ }
+ ]
+ },
+ "collections":[
+ {
+ "@type":"korap:meta-filter",
+ "@value":{
+ "@type":"korap:term",
+ "@field":"korap:field#corpusID",
+ "@value":"WPD"
+ }
+ }
+ ]
+}