New collection system
diff --git a/src/main/java/de/ids_mannheim/korap/KorapCollection.java b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
index 49b0aed..4e6b57b 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapCollection.java
@@ -2,26 +2,19 @@
import java.util.*;
import java.io.IOException;
-import org.apache.lucene.search.QueryWrapperFilter;
-import org.apache.lucene.search.NumericRangeFilter;
-import org.apache.lucene.search.Filter;
-import de.ids_mannheim.korap.KorapIndex;
-import de.ids_mannheim.korap.KorapResult;
-import de.ids_mannheim.korap.KorapFilter;
-
+import de.ids_mannheim.korap.*;
import de.ids_mannheim.korap.util.KorapDate;
import de.ids_mannheim.korap.util.QueryException;
import de.ids_mannheim.korap.filter.BooleanFilter;
import de.ids_mannheim.korap.filter.FilterOperation;
+
import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.FilteredQuery;
+import org.apache.lucene.search.*;
+
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.DocIdSet;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
@@ -60,17 +53,24 @@
};
public KorapCollection (String jsonString) {
- this.filter = new ArrayList<FilterOperation>(5);
ObjectMapper mapper = new ObjectMapper();
+ this.filter = new ArrayList<FilterOperation>(5);
try {
JsonNode json = mapper.readValue(jsonString, JsonNode.class);
- if (json.has("collections")) {
+
+ if (json.has("collection")) {
+ this.fromJSON(json.get("collection"));
+ }
+
+ // Legacy collection serialization
+ // This will be removed!
+ else if (json.has("collections")) {
if (DEBUG)
- log.trace("Add meta collection");
+ log.warn("Using DEPRECATED collection!");
for (JsonNode collection : json.get("collections")) {
- this.fromJSON(collection);
+ this.fromJSONLegacy(collection);
};
};
}
@@ -84,18 +84,26 @@
};
public void fromJSON (JsonNode json) throws QueryException {
+ this.filter(new KorapFilter(json));
+ };
+
+ public void fromJSONLegacy (JsonNode json) throws QueryException {
String type = json.get("@type").asText();
+ KorapFilter kf = new KorapFilter();
+ kf.setBooleanFilter(
+ kf.fromJSONLegacy(json.get("@value"), "tokens")
+ );
if (type.equals("korap:meta-filter")) {
if (DEBUG)
- log.trace("Add Filter");
- this.filter(new KorapFilter(json.get("@value")));
+ log.trace("Add Filter LEGACY");
+ this.filter(kf);
}
else if (type.equals("korap:meta-extend")) {
if (DEBUG)
- log.trace("Add Extend");
- this.extend(new KorapFilter(json.get("@value")));
+ log.trace("Add Extend LEGACY");
+ this.extend(kf);
};
};
@@ -143,7 +151,7 @@
public KorapCollection filter (KorapFilter filter) {
- return this.filter(filter.toBooleanFilter());
+ return this.filter(filter.getBooleanFilter());
};
@@ -161,7 +169,7 @@
};
public KorapCollection extend (KorapFilter filter) {
- return this.extend(filter.toBooleanFilter());
+ return this.extend(filter.getBooleanFilter());
};
@@ -187,7 +195,14 @@
* testing purposes and not recommended for serious usage.
*/
public KorapResult search (SpanQuery query) {
- return this.index.search(this, query, 0, (short) 20, true, (short) 5, true, (short) 5);
+ return this.index.search(
+ this,
+ query,
+ 0,
+ (short) 20,
+ true, (short) 5,
+ true, (short) 5
+ );
};
public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
diff --git a/src/main/java/de/ids_mannheim/korap/KorapFilter.java b/src/main/java/de/ids_mannheim/korap/KorapFilter.java
index 17a5dd4..a391d83 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapFilter.java
@@ -92,6 +92,55 @@
filter = this.fromJSON(json, "tokens");
};
+ protected BooleanFilter fromJSON (JsonNode json, String field) throws QueryException {
+ String type = json.get("@type").asText();
+
+ // Single filter
+ if (type.equals("korap:doc")) {
+
+ String key = "tokens";
+ String valtype = "type:string";
+ String match = "match:eq";
+
+ if (json.has("key"))
+ key = json.get("key").asText();
+
+ if (json.has("type"))
+ valtype = json.get("type").asText();
+
+ // Filter based on date
+ if (valtype.equals("type:date")) {
+ String date = json.get("value").asText();
+ if (json.has("match"))
+ match = json.get("match").asText();
+
+ switch (match) {
+ case "match:eq":
+ filter.date(date);
+ case "match:geq":
+ filter.since(date);
+ case "match:leq":
+ filter.till(date);
+ };
+ /*
+ No good reason for gt or lt
+ */
+ return filter;
+ };
+ }
+
+ // nested group
+ else if (type.equals("korap:docGroup")) {
+ }
+
+ // UNknown type
+ else {
+ throw new QueryException(613, "Collection query type has to be doc or docGroup");
+ };
+
+ return new BooleanFilter();
+ };
+
/*
String type = json.get("@type").asText();
String field = _getField(json);
@@ -108,7 +157,7 @@
*/
// };
- private BooleanFilter fromJSON (JsonNode json, String field) throws QueryException {
+ protected BooleanFilter fromJSONLegacy (JsonNode json, String field) throws QueryException {
BooleanFilter filter = new BooleanFilter();
String type = json.get("@type").asText();
@@ -117,7 +166,7 @@
log.trace("@type: " + type);
if (json.has("@field"))
- field = _getField(json);
+ field = _getFieldLegacy(json);
if (type.equals("korap:term")) {
if (field != null && json.has("@value"))
@@ -137,40 +186,40 @@
switch (json.get("relation").asText()) {
case "between":
- date = _getDate(json, 0);
- till = _getDate(json, 1);
+ date = _getDateLegacy(json, 0);
+ till = _getDateLegacy(json, 1);
if (date != null && till != null)
filter.between(date, till);
break;
case "until":
- date = _getDate(json, 0);
+ date = _getDateLegacy(json, 0);
if (date != null)
filter.till(date);
break;
case "since":
- date = _getDate(json, 0);
+ date = _getDateLegacy(json, 0);
if (date != null)
filter.since(date);
break;
case "equals":
- date = _getDate(json, 0);
+ date = _getDateLegacy(json, 0);
if (date != null)
filter.date(date);
break;
case "and":
for (JsonNode operand : json.get("operands")) {
- group.and(this.fromJSON(operand, field));
+ group.and(this.fromJSONLegacy(operand, field));
};
filter.and(group);
break;
case "or":
for (JsonNode operand : json.get("operands")) {
- group.or(this.fromJSON(operand, field));
+ group.or(this.fromJSONLegacy(operand, field));
};
filter.and(group);
break;
@@ -188,7 +237,7 @@
};
- private static String _getField (JsonNode json) {
+ private static String _getFieldLegacy (JsonNode json) {
if (!json.has("@field"))
return (String) null;
@@ -196,7 +245,7 @@
return field.replaceFirst("korap:field#", "");
};
- private static String _getDate (JsonNode json, int index) {
+ private static String _getDateLegacy (JsonNode json, int index) {
if (!json.has("operands"))
return (String) null;
@@ -268,10 +317,14 @@
return new RegexFilter(regex);
};
- public BooleanFilter toBooleanFilter() {
+ public BooleanFilter getBooleanFilter() {
return this.filter;
};
+ public void setBooleanFilter (BooleanFilter bf) {
+ this.filter = bf;
+ };
+
public Query toQuery () {
return this.filter.toQuery();
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index be261b6..ab624fb 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -1,84 +1,38 @@
package de.ids_mannheim.korap;
+// Java classes
import java.util.*;
-
-import java.io.*;
-
-import java.net.URL;
-
import java.util.zip.GZIPInputStream;
import java.util.regex.Pattern;
-import java.io.FileInputStream;
+import java.io.*;
+import java.net.URL;
import java.nio.ByteBuffer;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.QueryWrapperFilter;
-
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-
+// Lucene classes
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.spans.*;
import org.apache.lucene.document.Document;
-
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.RAMDirectory;
-
+import org.apache.lucene.index.*;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.store.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
+import org.apache.lucene.util.*;
+import org.apache.lucene.util.automaton.*;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.DocIdSetIterator;
-
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.lucene.util.FixedBitSet;
-
-// Automata
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.RegExp;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
-
+// JSON helper class
import com.fasterxml.jackson.annotation.*;
import com.fasterxml.jackson.databind.ObjectMapper;
-import de.ids_mannheim.korap.KorapResult;
-import de.ids_mannheim.korap.KorapMatch;
-import de.ids_mannheim.korap.KorapCollection;
-import de.ids_mannheim.korap.KorapSearch;
-import de.ids_mannheim.korap.index.FieldDocument;
-import de.ids_mannheim.korap.index.PositionsToOffset;
-import de.ids_mannheim.korap.index.TermInfo;
-import de.ids_mannheim.korap.index.SpanInfo;
-import de.ids_mannheim.korap.index.SearchContext;
+// KorAP classes
+import de.ids_mannheim.korap.*;
+import de.ids_mannheim.korap.index.*;
import de.ids_mannheim.korap.query.SpanElementQuery;
-import de.ids_mannheim.korap.index.MatchCollector;
import de.ids_mannheim.korap.util.QueryException;
+// Log4j Logger classes
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -126,7 +80,6 @@
private int maxTermRelations = 100;
private int autoCommit = 500;
-
private Directory directory;
// Temp:
diff --git a/src/main/java/de/ids_mannheim/korap/KorapQuery.java b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
index e15eeef..075233c 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
@@ -82,7 +82,7 @@
if (!json.has("@type") ||
!json.get("@type").asText().equals("korap:boundary")) {
- throw new QueryException("Boundary definition is not valid");
+ throw new QueryException(612, "Boundary definition is not valid");
};
// Set min boundary
@@ -108,7 +108,8 @@
json = this.json.readValue(jsonString, JsonNode.class);
}
catch (IOException e) {
- throw new QueryException(e.getMessage());
+ String msg = e.getMessage();
+ throw new QueryException(611, msg.split("\n")[0]);
};
if (!json.has("@type") && json.has("query"))
@@ -127,7 +128,7 @@
int number = 0;
if (!json.has("@type")) {
- throw new QueryException("JSON-LD group has no @type attribute");
+ throw new QueryException(612, "JSON-LD group has no @type attribute");
};
String type = json.get("@type").asText();
@@ -138,7 +139,7 @@
SpanClassQueryWrapper classWrapper;
if (!json.has("operation"))
- throw new QueryException("Group expects operation");
+ throw new QueryException(612, "Group expects operation");
String operation = json.get("operation").asText();
@@ -146,13 +147,13 @@
log.trace("Found {} group", operation);
if (!json.has("operands"))
- throw new QueryException("Operation needs operand list");
+ throw new QueryException(612, "Operation needs operand list");
// Get all operands
JsonNode operands = json.get("operands");
if (!operands.isArray())
- throw new QueryException("Operation needs operand list");
+ throw new QueryException(612, "Operation needs operand list");
if (DEBUG)
log.trace("Operands are {}", operands);
@@ -170,7 +171,7 @@
case "operation:position":
if (operands.size() != 2)
- throw new QueryException("Operation needs exactly two operands");
+ throw new QueryException(612, "Position needs exactly two operands");
// TODO: Check for operands
@@ -206,13 +207,14 @@
flag = REAL_OVERLAP;
break;
default:
- throw new QueryException("Frame type unknown");
+ throw new QueryException(613, "Frame type unknown");
};
// Check for exclusion modificator
Boolean exclude;
if (json.has("exclude") && json.get("exclude").asBoolean())
throw new QueryException(
+ 613,
"Exclusion is currently not supported in position operations"
);
@@ -226,16 +228,16 @@
case "operation:submatch":
if (operands.size() != 1)
- throw new QueryException("Operation needs exactly two operands");
+ throw new QueryException(612, "Operation needs exactly two operands");
if (json.has("classRef")) {
if (json.has("classRefOp"))
- throw new QueryException("Class reference operators not supported yet");
+ throw new QueryException(613, "Class reference operators not supported yet");
number = json.get("classRef").get(0).asInt();
}
else if (json.has("spanRef")) {
- throw new QueryException("Span references not supported yet");
+ throw new QueryException(613, "Span references not supported yet");
};
return new SpanMatchModifyQueryWrapper(
@@ -246,31 +248,39 @@
if (operands.size() < 2)
throw new QueryException(
- "SpanSequenceQuery needs at least two operands"
+ 612,
+ "SpanSequenceQuery needs at least two operands"
);
SpanSequenceQueryWrapper sseqqw = this.seq();
- for (JsonNode operand : operands) {
- sseqqw.append(this.fromJSON(operand));
- };
// Say if the operand order is important
if (json.has("inOrder"))
sseqqw.setInOrder(json.get("inOrder").asBoolean());
// Introduce distance constraints
+ // ATTENTION: Distances have to be set before segments are added
if (json.has("distances")) {
// TODO
if (json.has("exclude") && json.get("exclude").asBoolean())
throw new QueryException(
- "Excluding distance constraints are not supported yet"
+ 613, "Excluding distance constraints are not supported yet"
);
+ if (!json.get("distances").isArray()) {
+ throw new QueryException(
+ 612,
+ "Distance Constraints have " +
+ "to be defined as arrays"
+ );
+ };
+
// TEMPORARY: Workaround for group distances
JsonNode firstDistance = json.get("distances").get(0);
+
if (!firstDistance.has("@type"))
- throw new QueryException("Distances need a defined @type");
+ throw new QueryException(612, "Distances need a defined @type");
JsonNode distances;
if (firstDistance.get("@type").asText().equals("korap:group"))
@@ -278,7 +288,7 @@
else if (firstDistance.get("@type").asText().equals("korap:distance"))
distances = json.get("distances");
else
- throw new QueryException("No valid distances defined");
+ throw new QueryException(612, "No valid distances defined");
for (JsonNode constraint : distances) {
String unit = "w";
@@ -287,6 +297,7 @@
if (unit.equals("t"))
throw new QueryException(
+ 613,
"Text based distances are not supported yet"
);
@@ -303,10 +314,19 @@
max = constraint.get("max").asInt(100);
};
+ // Sanitize boundary
+ if (max < min)
+ max = min;
+
sseqqw.withConstraint(min, max, unit);
};
};
+ // Add segments to sequence
+ for (JsonNode operand : operands) {
+ sseqqw.append(this.fromJSON(operand));
+ };
+
// inOrder was set without a distance constraint
if (!sseqqw.isInOrder() && !sseqqw.hasConstraints()) {
sseqqw.withConstraint(1,1,"w");
@@ -319,6 +339,7 @@
if (json.has("class")) {
if (operands.size() != 1)
throw new QueryException(
+ 612,
"Class group expects exactly one operand in list"
);
@@ -327,8 +348,11 @@
number = json.get("class").asInt(0);
- if (number > MAX_CLASS_NUM)
- throw new QueryException("Class numbers limited to " + MAX_CLASS_NUM);
+ if (number > MAX_CLASS_NUM) {
+ throw new QueryException(
+ 612, "Class numbers limited to " + MAX_CLASS_NUM
+ );
+ };
SpanQueryWrapper sqw = this.fromJSON(operands.get(0));
@@ -339,7 +363,7 @@
return new SpanClassQueryWrapper(sqw, number);
};
- throw new QueryException("Class group expects class attribute");
+ throw new QueryException(612, "Class group expects class attribute");
case "operation:repetition":
@@ -388,37 +412,43 @@
return new SpanRepetitionQueryWrapper(sqw, min, max);
};
- throw new QueryException("Unknown group operation");
+ throw new QueryException(613, "Unknown group operation");
case "korap:reference":
if (json.has("operation") &&
!json.get("operation").asText().equals("operation:focus"))
- throw new QueryException("Reference operation " +
+ throw new QueryException(613, "Reference operation " +
json.get("operation").asText() +
" not supported yet");
operands = json.get("operands");
if (operands.size() == 0) {
- throw new QueryException("Focus with peripheral references is not supported yet");
+ throw new QueryException(
+ 613, "Focus with peripheral references is not supported yet"
+ );
};
if (operands.size() != 1)
- throw new QueryException("Operation needs exactly two operands");
+ throw new QueryException(612, "Operation needs exactly two operands");
if (json.has("classRef")) {
- if (json.has("classRefOp"))
- throw new QueryException("Class reference operators not supported yet");
+ if (json.has("classRefOp")) {
+ throw new QueryException(
+ 613,
+ "Class reference operators not supported yet"
+ );
+ };
number = json.get("classRef").get(0).asInt();
if (number > MAX_CLASS_NUM)
- throw new QueryException("Class numbers limited to " + MAX_CLASS_NUM);
+ throw new QueryException(613, "Class numbers limited to " + MAX_CLASS_NUM);
}
else if (json.has("spanRef")) {
- throw new QueryException("Span references not supported yet");
+ throw new QueryException(613, "Span references not supported yet");
};
if (DEBUG)
@@ -438,11 +468,11 @@
case "korap:span":
if (!json.has("key"))
- throw new QueryException("A span needs at least a key definition");
+ throw new QueryException(612, "A span needs at least a key definition");
return this._termFromJSON(json);
};
- throw new QueryException("Unknown serialized query type: " + type);
+ throw new QueryException(613, "Unknown serialized query type");
};
@@ -472,17 +502,17 @@
return this._termFromJSON(json);
};
- throw new QueryException("Match relation unknown");
+ throw new QueryException(613, "Match relation unknown");
case "korap:termGroup":
if (!json.has("operands"))
- throw new QueryException("TermGroup expects operands");
+ throw new QueryException(612, "termGroup expects operands");
SpanSegmentQueryWrapper ssegqw = this.seg();
if (!json.has("relation"))
- throw new QueryException("termGroup expects a relation");
+ throw new QueryException(612, "termGroup expects a relation");
switch (json.get("relation").asText()) {
case "relation:and":
@@ -499,7 +529,9 @@
ssegqw.with((SpanSegmentQueryWrapper) part);
}
else {
- throw new QueryException("Object not supported in segment queries");
+ throw new QueryException(
+ 613, "Object not supported in segment queries"
+ );
};
};
return ssegqw;
@@ -512,14 +544,14 @@
return ssaq;
};
};
- throw new QueryException("Unknown token type");
+ throw new QueryException(613, "Unknown token type");
};
private SpanQueryWrapper _termFromJSON (JsonNode json) throws QueryException {
if (!json.has("key") || json.get("key").asText().length() < 1)
- throw new QueryException("Terms and spans have to provide key attributes");
+ throw new QueryException(612, "Terms and spans have to provide key attributes");
Boolean isTerm = json.get("@type").asText().equals("korap:term") ? true : false;
Boolean isCaseInsensitive = false;
@@ -588,7 +620,7 @@
return this.seg(value.toString());
if (json.has("attr"))
- throw new QueryException("Attributes not yet supported in spans");
+ throw new QueryException(613, "Attributes not yet supported in spans");
return this.tag(value.toString());
};
@@ -839,6 +871,4 @@
public SpanRepetitionQueryWrapper repeat (SpanQueryWrapper element, int min, int max) {
return new SpanRepetitionQueryWrapper(element, min, max);
};
-
- // split
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index 223ea72..0f0eb0e 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
@@ -79,8 +79,10 @@
if (this.request.has("warning"))
this.addWarning(this.request.get("warning").asText());
- // "meta" virtual collections
- if (this.request.has("collections"))
+ // virtual collections
+ if (this.request.has("collections") ||
+ // Legacy collections
+ this.request.has("collections"))
this.setCollection(new KorapCollection(jsonString));
if (this.error == null) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
index dd84549..eba99d6 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
@@ -21,7 +21,8 @@
protected boolean isNull = true,
isOptional = false,
isNegative = false,
- isEmpty = false,
+ isEmpty = false,
+ isExtended = false,
isExtendedToTheRight = false;
// Serialize query to Lucene SpanQuery
@@ -57,6 +58,10 @@
return this.isEmpty;
};
+ public boolean isExtended () {
+ return this.isExtended;
+ };
+
// The subquery may exceed the right text offset due to an empty extension
// [base=tree][]{3,4}
// This makes it necessary to check the last position of the span
@@ -65,7 +70,6 @@
return this.isExtendedToTheRight;
};
-
// Check, if the query may be an anchor
// in a SpanSequenceQueryWrapper
public boolean maybeAnchor () {
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
index 1180aeb..fe671ec 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
@@ -1,25 +1,13 @@
package de.ids_mannheim.korap.query.wrap;
import java.util.*;
-import de.ids_mannheim.korap.query.DistanceConstraint;
-import de.ids_mannheim.korap.query.SpanElementQuery;
-import de.ids_mannheim.korap.query.SpanNextQuery;
-import de.ids_mannheim.korap.query.SpanDistanceQuery;
-import de.ids_mannheim.korap.query.SpanExpansionQuery;
-import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
-
-import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
-import de.ids_mannheim.korap.query.wrap.SpanSimpleQueryWrapper;
-import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
-import de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper;
-import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
-import de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper;
+import de.ids_mannheim.korap.query.*;
+import de.ids_mannheim.korap.query.wrap.*;
import de.ids_mannheim.korap.util.QueryException;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -29,23 +17,23 @@
Make isNegative work!
Make isEmpty work!
Make isExtendedToTheRight work!
-
- Probably the problemsolving should be done on attribute check
- not on toQuery().
*/
-
/**
* Deserialize complexe sequence queries to Lucene SpanQueries.
*
* @author Nils Diewald
- * @version 0.02
+ * @version 0.03
*/
public class SpanSequenceQueryWrapper extends SpanQueryWrapper {
private String field;
private ArrayList<SpanQueryWrapper> segments;
private ArrayList<DistanceConstraint> constraints;
+ private final String limitationError =
+ "Distance constraints not supported with " +
+ "empty or negative operands";
+
// Logger
private final static Logger log = LoggerFactory.getLogger(SpanSequenceQueryWrapper.class);
@@ -54,6 +42,9 @@
private boolean isInOrder = true;
+ // The sequence is problem solved
+ private boolean isSolved = false;
+
/**
* Empty constructor.
*/
@@ -108,7 +99,10 @@
log.trace("Unable to serialize query {}", qe.getMessage());
};
};
-
+ /*
+ System.err.println("Is negative: ");
+ System.err.println(sswq.isNegative());
+ */
this.segments.add(sswq);
this.isNull = false;
};
@@ -118,11 +112,7 @@
* Append a term to the sequence.
*/
public SpanSequenceQueryWrapper append (String term) {
- return this.append(
- new SpanSimpleQueryWrapper(
- new SpanTermQuery(new Term(field, term))
- )
- );
+ return this.append(new SpanTermQuery(new Term(field, term)));
};
@@ -141,21 +131,37 @@
if (ssq.isNull())
return this;
+ this.isSolved = false;
this.isNull = false;
- this.segments.add(ssq);
+
+ // Embed a sequence
+ if (ssq instanceof SpanSequenceQueryWrapper) {
+
+ // There are no constraints - just next spans
+ SpanSequenceQueryWrapper ssqw = (SpanSequenceQueryWrapper) ssq;
+ if (!this.hasConstraints() &&
+ !ssqw.hasConstraints() &&
+ this.isInOrder() == ssqw.isInOrder()) {
+ for (int i = 0; i < ssqw.segments.size(); i++) {
+ this.append(ssqw.segments.get(i));
+ };
+ };
+ }
+
+ // Only one segment
+ else {
+ this.segments.add(ssq);
+ };
return this;
};
+
/**
* Prepend a term to the sequence.
*/
public SpanSequenceQueryWrapper prepend (String term) {
- return this.prepend(
- new SpanSimpleQueryWrapper(
- new SpanTermQuery(new Term(field, term))
- )
- );
+ return this.prepend(new SpanTermQuery(new Term(field, term)));
};
@@ -163,11 +169,10 @@
* Prepend a SpanQuery to the sequence.
*/
public SpanSequenceQueryWrapper prepend (SpanQuery query) {
- return this.prepend(
- new SpanSimpleQueryWrapper(query)
- );
+ return this.prepend(new SpanSimpleQueryWrapper(query));
};
+
/**
* Prepend a SpanQueryWrapper to the sequence.
*/
@@ -175,8 +180,27 @@
if (ssq.isNull())
return this;
+ this.isSolved = false;
this.isNull = false;
- this.segments.add(0, ssq);
+
+ // Embed a sequence
+ if (ssq instanceof SpanSequenceQueryWrapper) {
+
+ // There are no constraints - just next spans
+ SpanSequenceQueryWrapper ssqw = (SpanSequenceQueryWrapper) ssq;
+ if (!this.hasConstraints() &&
+ !ssqw.hasConstraints() &&
+ this.isInOrder() == ssqw.isInOrder()) {
+ for (int i = ssqw.segments.size() - 1; i >= 0; i--) {
+ this.prepend(ssqw.segments.get(i));
+ };
+ };
+ }
+
+ // Only one segment
+ else {
+ this.segments.add(0, ssq);
+ };
return this;
};
@@ -222,8 +246,12 @@
boolean exclusion) {
if (this.constraints == null)
this.constraints = new ArrayList<DistanceConstraint>(1);
+
+ // Word unit
if (unit.equals("w"))
this.constraints.add(new DistanceConstraint(min, max, isInOrder, exclusion));
+
+ // Element unit (sentence or paragraph)
else
this.constraints.add(
new DistanceConstraint(
@@ -232,6 +260,7 @@
return this;
};
+
/**
* Respect the order of distances.
*/
@@ -239,6 +268,7 @@
this.isInOrder = isInOrder;
};
+
/**
* Check if the order is relevant.
*/
@@ -246,6 +276,7 @@
return this.isInOrder;
};
+
/**
* Check if there are constraints defined for the sequence.
*/
@@ -254,10 +285,20 @@
return false;
if (this.constraints.size() <= 0)
return false;
+
+ // The constraint is in fact a next query
+ if (this.constraints.size() == 1) {
+ DistanceConstraint dc = this.constraints.get(0);
+ if (dc.getUnit().equals("w") &&
+ dc.getMinDistance() == 1 &&
+ dc.getMaxDistance() == 1) {
+ return false;
+ };
+ };
+
return true;
};
-
/**
* Serialize Query to Lucene SpanQueries
*/
@@ -266,35 +307,53 @@
int size = this.segments.size();
// Nothing to do
- if (size == 0 || this.isNull)
+ if (size == 0 || this.isNull())
return (SpanQuery) null;
+ // No real sequence - only one element
if (size == 1) {
+
+ // But the element may be expanded
+ if (this.segments.get(0).isExtended() &&
+ (this.hasConstraints() || !this.isInOrder())) {
+ throw new QueryException(613, limitationError);
+ };
+
+ // Unproblematic single query
if (this.segments.get(0).maybeAnchor())
return (SpanQuery) this.segments.get(0).toQuery();
if (this.segments.get(0).isEmpty())
- throw new QueryException("Sequence is not allowed to be empty");
+ throw new QueryException(613, "Sequence is not allowed to be empty");
if (this.segments.get(0).isOptional())
- throw new QueryException("Sequence is not allowed to be optional");
+ throw new QueryException(613, "Sequence is not allowed to be optional");
if (this.segments.get(0).isNegative())
- throw new QueryException("Sequence is not allowed to be negative");
+ throw new QueryException(613, "Sequence is not allowed to be negative");
};
- if (!_solveProblematicSequence(size)) {
- if (this.segments.get(0).isNegative())
- throw new QueryException("Sequence contains unresolvable "+
- "empty, optional, or negative segments");
+ if (!this.isSolved) {
+ if (!_solveProblematicSequence()) {
+ if (this.segments.get(0).maybeExtension())
+ throw new QueryException(
+ 613,
+ "Sequence contains unresolvable "+
+ "empty, optional, or negative segments"
+ );
+ };
+ };
+
+ // The element may be expanded
+ if (this.segments.size() == 1 &&
+ this.segments.get(0).isExtended() &&
+ (this.hasConstraints() || !this.isInOrder())) {
+ throw new QueryException(613, limitationError);
};
// Create the initial query
SpanQuery query = this.segments.get(0).toQuery();
// NextQueries:
- if (this.constraints == null || this.constraints.size() == 0 ||
- (this.constraints.size() == 1 &&
- (this.constraints.get(0).getMinDistance() == 1 &&
- this.constraints.get(0).getMaxDistance() == 1))) {
+ if (!this.hasConstraints() && this.isInOrder()) {
for (int i = 1; i < this.segments.size(); i++) {
query = new SpanNextQuery(
query,
@@ -311,6 +370,11 @@
// Create spanElementDistance query
if (!constraint.getUnit().equals("w")) {
for (int i = 1; i < this.segments.size(); i++) {
+
+ // No support for extended spans in constraints
+ if (this.segments.get(i).isExtended())
+ throw new QueryException(613, limitationError);
+
SpanDistanceQuery sdquery = new SpanDistanceQuery(
query,
this.segments.get(i).toQuery(),
@@ -324,6 +388,11 @@
// Create spanDistance query
else {
for (int i = 1; i < this.segments.size(); i++) {
+
+ // No support for extended spans in constraints
+ if (this.segments.get(i).isExtended())
+ throw new QueryException(613, limitationError);
+
SpanDistanceQuery sdquery = new SpanDistanceQuery(
query,
this.segments.get(i).toQuery(),
@@ -339,6 +408,11 @@
// MultipleDistanceQueries
for (int i = 1; i < this.segments.size(); i++) {
+
+ // No support for extended spans in constraints
+ if (this.segments.get(i).isExtended())
+ throw new QueryException(613, limitationError);
+
query = new SpanMultipleDistanceQuery(
query,
this.segments.get(i).toQuery(),
@@ -356,7 +430,9 @@
- merge the problematic segment with the anchor
- go on
*/
- private boolean _solveProblematicSequence (int size) throws QueryException {
+ private boolean _solveProblematicSequence () {
+
+ int size = this.segments.size();
// Check if there is a problematic segment
SpanQueryWrapper underScrutiny;
@@ -379,10 +455,15 @@
if (i < (size-1) && this.segments.get(i+1).maybeAnchor()) {
// Insert the solution
- this.segments.set(
- i+1,
- _merge(this.segments.get(i+1), underScrutiny, false)
- );
+ try {
+ this.segments.set(
+ i+1,
+ _merge(this.segments.get(i+1), underScrutiny, false)
+ );
+ }
+ catch (QueryException e) {
+ return false;
+ };
// Remove the problem
this.segments.remove(i);
@@ -395,10 +476,15 @@
// [anchor][problem]
else if (i >= 1 && this.segments.get(i-1).maybeAnchor()) {
// Insert the solution
- this.segments.set(
- i-1,
- _merge(this.segments.get(i-1), underScrutiny, true)
- );
+ try {
+ this.segments.set(
+ i-1,
+ _merge(this.segments.get(i-1), underScrutiny, true)
+ );
+ }
+ catch (QueryException e) {
+ return false;
+ };
// Remove the problem
this.segments.remove(i);
@@ -422,18 +508,20 @@
// The size has changed - retry!
if (size != this.segments.size())
- return _solveProblematicSequence(this.segments.size());
- else
- return true;
+ return _solveProblematicSequence();
+
+ this.isSolved = true;
+ return true;
};
+ this.isSolved = true;
return false;
};
// Todo: Deal with negative and optional!
// [base=der][base!=Baum]?
- public SpanQueryWrapper _merge (
+ private SpanQueryWrapper _merge (
SpanQueryWrapper anchor,
SpanQueryWrapper problem,
boolean mergeLeft) throws QueryException {
@@ -480,7 +568,7 @@
true
);
};
- return new SpanSimpleQueryWrapper(query);
+ return new SpanSimpleQueryWrapper(query).isExtended(true);
}
// make negative extension to anchor
@@ -518,7 +606,7 @@
true
);
};
- return new SpanSimpleQueryWrapper(query);
+ return new SpanSimpleQueryWrapper(query).isExtended(true);
};
if (DEBUG)
@@ -543,4 +631,36 @@
return (SpanQueryWrapper) saqw;
};
+
+ public boolean isEmpty () {
+ if (this.segments.size() == 0)
+ return this.segments.get(0).isEmpty();
+ if (!this.isSolved)
+ _solveProblematicSequence();
+ return super.isEmpty();
+ };
+
+
+ public boolean isOptional () {
+ if (this.segments.size() == 0)
+ return this.segments.get(0).isOptional();
+ if (!this.isSolved)
+ _solveProblematicSequence();
+ return super.isOptional();
+ };
+
+ public boolean isNegative () {
+ if (this.segments.size() == 0)
+ return this.segments.get(0).isNegative();
+ if (!this.isSolved)
+ _solveProblematicSequence();
+ return super.isNegative();
+ };
+
+ public boolean isExtendedToTheRight () {
+ if (!this.isSolved) {
+ _solveProblematicSequence();
+ };
+ return this.isExtendedToTheRight;
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSimpleQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSimpleQueryWrapper.java
index 84cc259..dcc38da 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSimpleQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSimpleQueryWrapper.java
@@ -20,4 +20,9 @@
public SpanQuery toQuery () {
return this.query;
};
+
+ public SpanSimpleQueryWrapper isExtended (boolean extended) {
+ this.isExtended = true;
+ return this;
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/TestSimple.java b/src/test/java/de/ids_mannheim/korap/TestSimple.java
index 552d5a3..a86e537 100644
--- a/src/test/java/de/ids_mannheim/korap/TestSimple.java
+++ b/src/test/java/de/ids_mannheim/korap/TestSimple.java
@@ -1,28 +1,21 @@
package de.ids_mannheim.korap;
import java.util.*;
-import java.io.IOException;
+import java.io.*;
-import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
-import de.ids_mannheim.korap.analysis.MultiTermToken;
+import static org.junit.Assert.*;
+
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.analysis.*;
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
+
import static de.ids_mannheim.korap.util.KorapByte.*;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.AtomicReaderContext;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.IntField;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-
+import org.apache.lucene.index.*;
+import org.apache.lucene.document.*;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanQuery;
-
import org.apache.lucene.util.Bits;
/**
@@ -32,6 +25,7 @@
*/
public class TestSimple {
+ // Add document
public static void addDoc(IndexWriter w, Map<String, String> m) throws IOException {
Document doc = new Document();
@@ -60,7 +54,7 @@
w.addDocument(doc);
};
-
+ // Get Term Vector
public static MultiTermTokenStream getTermVector (String stream) {
MultiTermTokenStream ts = new MultiTermTokenStream();
@@ -89,7 +83,42 @@
return ts;
};
- public static List<String> getSpanInfo (IndexReader reader, SpanQuery query) throws IOException {
+ // Get query wrapper based on json file
+ public static SpanQueryWrapper getJSONQuery (String jsonFile) {
+ SpanQueryWrapper sqwi;
+
+ try {
+ String json = getString(jsonFile);
+ sqwi = new KorapQuery("tokens").fromJSON(json);
+ }
+ catch (QueryException e) {
+ fail(e.getMessage());
+ sqwi = new KorapQuery("tokens").seg("???");
+ };
+ return sqwi;
+ };
+
+
+ // Get string
+ public static String getString (String path) {
+ StringBuilder contentBuilder = new StringBuilder();
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(path));
+ String str;
+ while ((str = in.readLine()) != null) {
+ contentBuilder.append(str);
+ };
+ in.close();
+ } catch (IOException e) {
+ fail(e.getMessage());
+ }
+ return contentBuilder.toString();
+ };
+
+
+ // getSpan Info
+ public static List<String> getSpanInfo (IndexReader reader, SpanQuery query)
+ throws IOException {
Map<Term, TermContext> termContexts = new HashMap<>();
List<String> spanArray = new ArrayList<>();
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSON.java b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSON.java
new file mode 100644
index 0000000..0399e55
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSON.java
@@ -0,0 +1,30 @@
+package de.ids_mannheim.korap.collection;
+
+import java.util.*;
+import java.io.*;
+
+import de.ids_mannheim.korap.KorapCollection;
+
+import static de.ids_mannheim.korap.TestSimple.*;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestKorapCollectionJSON {
+
+ final String path = "/queries/collections/";
+
+ @Test
+ public void collection1 () {
+ String metaQuery = _getJSONString("collection_1.jsonld");
+ KorapCollection kc = new KorapCollection(metaQuery);
+ };
+
+ private String _getJSONString (String file) {
+ return getString(getClass().getResource(path + file).getFile());
+ };
+};
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSONLegacy.java
similarity index 82%
rename from src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
rename to src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSONLegacy.java
index 62e07bf..ef9be16 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionJSONLegacy.java
@@ -1,10 +1,12 @@
-package de.ids_mannheim.korap.filter;
+package de.ids_mannheim.korap.collection;
import java.util.*;
import java.io.*;
import de.ids_mannheim.korap.KorapCollection;
+import static de.ids_mannheim.korap.TestSimple.*;
+
import static org.junit.Assert.*;
import org.junit.Test;
import org.junit.Ignore;
@@ -12,7 +14,7 @@
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
-public class TestKorapCollectionJSON {
+public class TestKorapCollectionJSONLegacy {
@Test
public void metaQuery1 () {
@@ -58,23 +60,4 @@
assertEquals(1,kc.getCount());
assertEquals("filter with QueryWrapperFilter(+corpusID:WPD)",kc.getFilter(0).toString());
};
-
-
-
- public static String getString (String path) {
- StringBuilder contentBuilder = new StringBuilder();
- try {
- BufferedReader in = new BufferedReader(new FileReader(path));
- String str;
- while ((str = in.readLine()) != null) {
- contentBuilder.append(str);
- };
- in.close();
- }
- catch (IOException e) {
- fail(e.getMessage());
- }
- return contentBuilder.toString();
- };
-
};
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionLegacy.java
similarity index 98%
rename from src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
rename to src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionLegacy.java
index 9caa0ab..2cf0e4d 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKorapCollectionLegacy.java
@@ -1,4 +1,4 @@
-package de.ids_mannheim.korap.filter;
+package de.ids_mannheim.korap.collection;
import java.io.*;
@@ -23,7 +23,7 @@
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
-public class TestKorapCollection {
+public class TestKorapCollectionLegacy {
@Test
public void filterExample () throws Exception {
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java b/src/test/java/de/ids_mannheim/korap/collection/TestKorapFilter.java
similarity index 98%
rename from src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
rename to src/test/java/de/ids_mannheim/korap/collection/TestKorapFilter.java
index c2d9b00..f89145f 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKorapFilter.java
@@ -1,4 +1,4 @@
-package de.ids_mannheim.korap.filter;
+package de.ids_mannheim.korap.collection;
import java.util.*;
import java.io.*;
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
index a05f837..241d122 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
@@ -281,204 +281,6 @@
assertEquals(sqwi.toQuery().toString(), "spanDistance(tokens:s:Tal, tokens:s:Wald, [(w[2:100], ordered, notExcluded)])");
};
-
- /*
- Check extensions
- */
-
- @Test
- public void queryJSONseqEmpty () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty.jsonld").getFile());
-
- // []
- assertTrue(sqwi.isEmpty());
- };
-
- @Test
- public void queryJSONseqEmptyEnd () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-last.jsonld").getFile());
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{1, 1}, right)");
- };
-
- @Test
- public void queryJSONseqEmptyEndClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-last-class.jsonld").getFile());
- // der{3:[]}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{1, 1}, right, class:3)");
- };
-
- @Test
- public void queryJSONseqEmptyEndRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-last-repetition.jsonld").getFile());
- // der[]{3,5}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{3, 5}, right)");
- };
-
- @Test
- public void queryJSONseqEmptyStart () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-first.jsonld").getFile());
- // [][tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{1, 1}, left)");
- };
-
- @Test
- public void queryJSONseqEmptyStartClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-first-class.jsonld").getFile());
- // {2:[]}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:2)");
- };
-
- @Test
- public void queryJSONseqEmptyStartRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-first-repetition.jsonld").getFile());
- // []{2,7}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{2, 7}, left)");
- };
-
- @Test
- public void queryJSONseqEmptyStartRepetition2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-first-repetition-2.jsonld").getFile());
- // []{0,0}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
- };
-
- @Test
- public void queryJSONseqEmptyMiddle () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-middle.jsonld").getFile());
- // der[][tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{1, 1}, left))");
- };
-
- @Test
- public void queryJSONseqEmptyMiddleClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-middle-class.jsonld").getFile());
- // der{1:[]}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:1))");
- };
-
- @Test
- public void queryJSONseqEmptyMiddleRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-middle-repetition.jsonld").getFile());
- // der[]{4,8}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{4, 8}, left))");
- };
-
- @Test
- public void queryJSONseqEmptySurround () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-surround.jsonld").getFile());
- // [][tt/p=NN][]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{1, 1}, right)");
- };
-
- @Test
- public void queryJSONseqEmptySurroundClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-surround-class.jsonld").getFile());
- // [][tt/p=NN]{2:[]}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{1, 1}, right, class:2)");
- };
-
- @Test
- public void queryJSONseqEmptySurroundClass2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-surround-class-2.jsonld").getFile());
- // {3:[]}[tt/p=NN]{2:[]}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:3), []{1, 1}, right, class:2)");
- };
-
- @Test
- public void queryJSONseqEmptySurroundRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-surround-repetition.jsonld").getFile());
- // [][tt/p=NN][]{2,7}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{2, 7}, right)");
- };
-
- @Test
- public void queryJSONseqEmptySurroundRepetition2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-surround-repetition-2.jsonld").getFile());
- // []{3,5}[tt/p=NN][]{2,7}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{3, 5}, left), []{2, 7}, right)");
- };
-
- @Test
- public void queryJSONseqEmptySurroundRepetitionClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-surround-repetition-class.jsonld").getFile());
- // {1:[]}{3,8}[tt/p=NN]{2:[]{2,7}}
- // Ist gleichbedeutend mit
- // {1:[]{3,8}}[tt/p=NN]{2:[]}{2,7}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{3, 8}, left, class:1), []{2, 7}, right, class:2)");
- };
-
- @Test
- public void queryJSONseqNegative () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative.jsonld").getFile());
-
- // [tt/p!=NN]
- assertTrue(sqwi.isNegative());
- };
-
- @Test
- public void queryJSONseqNegativeStart () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-first.jsonld").getFile());
-
- // [tt/p!=NN][tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, left)");
- };
-
- @Test
- public void queryJSONseqNegativeEnd () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-last.jsonld").getFile());
-
- // [tt/p=NN][tt/p!=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right)");
- };
-
- @Test
- public void queryJSONseqNegativeStartRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-first-repetition.jsonld").getFile());
-
- // [tt/p!=NN]{4,5}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, left)");
- };
-
- @Test
- public void queryJSONseqNegativeStartRepetition2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-first-repetition-2.jsonld").getFile());
-
- // [tt/p!=NN]{0,5}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{0, 5}, left)");
- };
-
- @Test
- public void queryJSONseqNegativeStartRepetition3 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-first-repetition-3.jsonld").getFile());
-
- // [tt/p!=NN]{0,0}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
- };
-
- @Test
- public void queryJSONseqNegativeEndClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-last-class.jsonld").getFile());
-
- // [tt/p=NN]{2:[tt/p!=NN]}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right, class:2)");
- };
-
- @Test
- public void queryJSONseqNegativeEndRepetitionClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-last-class-repetition.jsonld").getFile());
-
- // [tt/p=NN]{2:[tt/p!=NN]{4,5}}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
- };
-
- @Test
- public void queryJSONseqNegativeEndRepetitionClass2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-last-class-repetition-2.jsonld").getFile());
-
- // [tt/p=NN]{2:[tt/p!=NN]}{4,5}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
- };
-
public static String getString (String path) {
StringBuilder contentBuilder = new StringBuilder();
try {
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java
new file mode 100644
index 0000000..d46c9e2
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java
@@ -0,0 +1,242 @@
+package de.ids_mannheim.korap.query;
+
+import java.util.*;
+import java.io.*;
+
+import static de.ids_mannheim.korap.TestSimple.*;
+
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestSpanSequenceQueryJSON {
+
+ static String path = "/queries/sequence/";
+
+ // Test Extensions
+
+ @Test
+ public void queryJSONseqEmpty () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty.jsonld");
+
+ // []
+ assertTrue(sqwi.isEmpty());
+ };
+
+ @Test
+ public void queryJSONseqEmptyEnd () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-last.jsonld");
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{1, 1}, right)");
+ };
+
+ @Test
+ public void queryJSONseqEmptyEndClass () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-last-class.jsonld");
+ // der{3:[]}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{1, 1}, right, class:3)");
+ };
+
+ @Test
+ public void queryJSONseqEmptyEndRepetition () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-last-repetition.jsonld");
+ // der[]{3,5}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{3, 5}, right)");
+ };
+
+ @Test
+ public void queryJSONseqEmptyStart () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-first.jsonld");
+ // [][tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{1, 1}, left)");
+ };
+
+ @Test
+ public void queryJSONseqEmptyStartClass () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-first-class.jsonld");
+ // {2:[]}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:2)");
+ };
+
+ @Test
+ public void queryJSONseqEmptyStartRepetition () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-first-repetition.jsonld");
+ // []{2,7}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{2, 7}, left)");
+ };
+
+ @Test
+ public void queryJSONseqEmptyStartRepetition2 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-first-repetition-2.jsonld");
+ // []{0,0}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
+ };
+
+ @Test
+ public void queryJSONseqEmptyMiddle () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-middle.jsonld");
+ // der[][tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{1, 1}, left))");
+ };
+
+ @Test
+ public void queryJSONseqEmptyMiddleClass () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-middle-class.jsonld");
+ // der{1:[]}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:1))");
+ };
+
+ @Test
+ public void queryJSONseqEmptyMiddleRepetition () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-middle-repetition.jsonld");
+ // der[]{4,8}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{4, 8}, left))");
+ };
+
+ @Test
+ public void queryJSONseqEmptySurround () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround.jsonld");
+ // [][tt/p=NN][]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{1, 1}, right)");
+ };
+
+ @Test
+ public void queryJSONseqEmptySurroundClass () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-class.jsonld");
+ // [][tt/p=NN]{2:[]}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{1, 1}, right, class:2)");
+ };
+
+ @Test
+ public void queryJSONseqEmptySurroundClass2 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-class-2.jsonld");
+ // {3:[]}[tt/p=NN]{2:[]}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:3), []{1, 1}, right, class:2)");
+ };
+
+ @Test
+ public void queryJSONseqEmptySurroundRepetition () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-repetition.jsonld");
+ // [][tt/p=NN][]{2,7}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{2, 7}, right)");
+ };
+
+ @Test
+ public void queryJSONseqEmptySurroundRepetition2 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-repetition-2.jsonld");
+ // []{3,5}[tt/p=NN][]{2,7}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{3, 5}, left), []{2, 7}, right)");
+ };
+
+ @Test
+ public void queryJSONseqEmptySurroundRepetitionClass () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-repetition-class.jsonld");
+ // {1:[]}{3,8}[tt/p=NN]{2:[]{2,7}}
+ // Ist gleichbedeutend mit
+ // {1:[]{3,8}}[tt/p=NN]{2:[]}{2,7}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{3, 8}, left, class:1), []{2, 7}, right, class:2)");
+ };
+
+ @Test
+ public void queryJSONseqNegative () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative.jsonld");
+ // [tt/p!=NN]
+ assertTrue(sqwi.isNegative());
+ };
+
+ @Test
+ public void queryJSONseqNegativeStart () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-first.jsonld");
+ // [tt/p!=NN][tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, left)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeEnd () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last.jsonld");
+ // [tt/p=NN][tt/p!=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeStartRepetition () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-first-repetition.jsonld");
+ // [tt/p!=NN]{4,5}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, left)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeStartRepetition2 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-first-repetition-2.jsonld");
+ // [tt/p!=NN]{0,5}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{0, 5}, left)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeStartRepetition3 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-first-repetition-3.jsonld");
+ // [tt/p!=NN]{0,0}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
+ };
+
+ @Test
+ public void queryJSONseqNegativeEndClass () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-class.jsonld");
+ // [tt/p=NN]{2:[tt/p!=NN]}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right, class:2)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeEndRepetitionClass () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-class-repetition.jsonld");
+ // [tt/p=NN]{2:[tt/p!=NN]{4,5}}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeEndRepetitionClass2 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-class-repetition-2.jsonld");
+ // [tt/p=NN]{2:[tt/p!=NN]}{4,5}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
+ };
+
+ @Test
+ public void queryJSONseqNegativelastConstraint () {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-constraint.jsonld");
+ try {
+ sqwi.toQuery().toString();
+ fail("Should throw an exception");
+ }
+ catch (QueryException qe) {
+ assertEquals("Distance constraints not supported with empty or negative operands", qe.getMessage());
+ };
+ };
+
+ @Test
+ public void queryJSONseqNegativeEndSequence () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-sequence.jsonld");
+ // [tt/p=NN]([tt/p!=DET][tt/p!=NN])
+ assertEquals("spanExpansion(spanExpansion(tokens:tt/p:NN, !tokens:tt/p:DET{1, 1}, right), !tokens:tt/p:ADJ{1, 1}, right)", sqwi.toQuery().toString());
+ };
+
+ @Test
+ public void queryJSONseqNegativeEndSequence2 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-sequence-2.jsonld");
+ // [tt/p!=NN]([tt/p!=DET][tt/p=NN])
+
+ // spanNext(tokens:tt/p:NN,
+ assertEquals("spanExpansion(spanExpansion(tokens:tt/p:ADJ, !tokens:tt/p:DET{1, 1}, left), !tokens:tt/p:NN{1, 1}, left)", sqwi.toQuery().toString());
+ };
+
+
+
+ // get query wrapper based on json file
+ public SpanQueryWrapper jsonQueryFile (String filename) {
+ return getJSONQuery(getClass().getResource(path + filename).getFile());
+ };
+};
diff --git a/src/test/resources/collections/collection_1.jsonld b/src/test/resources/queries/collections/collection_1.jsonld
similarity index 100%
rename from src/test/resources/collections/collection_1.jsonld
rename to src/test/resources/queries/collections/collection_1.jsonld
diff --git a/src/test/resources/collections/collection_2.jsonld b/src/test/resources/queries/collections/collection_2.jsonld
similarity index 94%
rename from src/test/resources/collections/collection_2.jsonld
rename to src/test/resources/queries/collections/collection_2.jsonld
index 57c40cf..c6c39f4 100644
--- a/src/test/resources/collections/collection_2.jsonld
+++ b/src/test/resources/queries/collections/collection_2.jsonld
@@ -11,7 +11,7 @@
"key" : "pubDate",
"type" : "type:date",
"value" : "1990",
- "match" : "match:gt"
+ "match" : "match:geq"
}, {
"@type" : "korap:doc",
"key" : "pubDate",
diff --git a/src/test/resources/collections/collection_3.jsonld b/src/test/resources/queries/collections/collection_3.jsonld
similarity index 100%
rename from src/test/resources/collections/collection_3.jsonld
rename to src/test/resources/queries/collections/collection_3.jsonld
diff --git a/src/test/resources/collections/readme.txt b/src/test/resources/queries/collections/readme.txt
similarity index 100%
rename from src/test/resources/collections/readme.txt
rename to src/test/resources/queries/collections/readme.txt