Minor fix for subspan deserialization
diff --git a/Changes b/Changes
index e327a80..0fc0bb3 100644
--- a/Changes
+++ b/Changes
@@ -1,7 +1,7 @@
0.49.3 2014-01-29
- [documentation] Improved documentation for API classes (diewald)
- [documentation] Improved documentation for various queries (margaretha)
- - [feature] Added deserialization of SpanSubSpanQueries (margaretha)
+ - [feature] Added deserialization of SpanSubSpanQueries (margaretha,diewald)
- [bugfix] Null filters are now correctly extended (diewald)
- [cleanup] Refactoring of KorapResult, KorapResponse, KorapQuery,
deprecated operation:or in favor of operation:junction (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/KorapQuery.java b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
index b7c3fcc..9ad167c 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
@@ -233,7 +233,6 @@
number = json.get("classRef").get(0).asInt();
-
if (number > MAX_CLASS_NUM)
throw new QueryException(
709,
@@ -244,7 +243,8 @@
// Reference based on spans
else if (json.has("spanRef")) {
JsonNode spanRef = json.get("spanRef");
- int length=0;
+ int length = 0;
+ int startOffset = 0;
if (!spanRef.isArray() || spanRef.size() == 0) {
throw new QueryException(
714,
@@ -252,14 +252,17 @@
" and a length parameter"
);
};
-
- if (!spanRef.get(1).isMissingNode())
- length = spanRef.get(1).asInt();
+
+ if (spanRef.size() > 1)
+ length = spanRef.get(1).asInt(0);
+ startOffset = spanRef.get(0).asInt(0);
+
+ if (DEBUG) log.trace("Wrap span reference {},{}", startOffset, length);
+
return new SpanSubspanQueryWrapper(
- fromJson(operands.get(0)), spanRef.get(0).asInt(),
- length
- );
+ this.fromJson(operands.get(0)), startOffset, length
+ );
};
if (DEBUG) log.trace("Wrap class reference {}", number);
@@ -273,6 +276,7 @@
if (!json.has("wrap"))
return new SpanRepetitionQueryWrapper();
+ // Get wrapped token
return this._segFromJson(json.get("wrap"));
case "korap:span":
@@ -283,6 +287,7 @@
throw new QueryException(713, "Query type is not supported");
};
+
// Deserialize korap:group
private SpanQueryWrapper _groupFromJson (JsonNode json) throws QueryException {
@@ -348,6 +353,7 @@
return ssaq;
};
+
// Deserialize operation:position
private SpanQueryWrapper _operationPositionFromJson (JsonNode json, JsonNode operands)
throws QueryException {
@@ -438,6 +444,7 @@
);
};
+
// Deserialize operation:repetition
private SpanQueryWrapper _operationRepetitionFromJson (JsonNode json, JsonNode operands)
throws QueryException {
@@ -445,8 +452,7 @@
if (operands.size() != 1)
throw new QueryException(705, "Number of operands is not acceptable");
- int min = 0;
- int max = 100;
+ int min = 0, max = 100;
if (json.has("boundary")) {
Boundary b = new Boundary(json.get("boundary"), 0, 100);
@@ -504,6 +510,7 @@
if (operands.size() != 1)
throw new QueryException(705, "Number of operands is not acceptable");
+ // Use class reference
if (json.has("classRef")) {
if (json.has("classRefOp")) {
throw new QueryException(
@@ -514,6 +521,8 @@
number = json.get("classRef").get(0).asInt();
}
+
+ // Use span reference
else if (json.has("spanRef")) {
throw new QueryException(
762,
@@ -599,6 +608,7 @@
// Deserialize operation:sequence
private SpanQueryWrapper _operationSequenceFromJson (JsonNode json, JsonNode operands)
throws QueryException {
+
// Sequence with only one operand
if (operands.size() == 1)
return this.fromJson(operands.get(0));
@@ -710,14 +720,14 @@
// inOrder was set to false without a distance constraint
if (!sseqqw.isInOrder() && !sseqqw.hasConstraints()) {
- sseqqw.withConstraint(1,1,"w");
+ sseqqw.withConstraint(1, 1, "w");
};
return sseqqw;
};
- // Segment
+ // Deserialize korap:token
private SpanQueryWrapper _segFromJson (JsonNode json) throws QueryException {
if (!json.has("@type"))
throw new QueryException(701, "JSON-LD group has no @type attribute");
@@ -727,6 +737,7 @@
if (DEBUG)
log.trace("Wrap new token definition by {}", type);
+ // Branch on type
switch (type) {
case "korap:term":
String match = "match:eq";
@@ -734,13 +745,18 @@
match = json.get("match").asText();
switch (match) {
+
case "match:ne":
if (DEBUG)
log.trace("Term is negated");
+
SpanSegmentQueryWrapper ssqw =
(SpanSegmentQueryWrapper) this._termFromJson(json);
+
ssqw.makeNegative();
+
return this.seg().without(ssqw);
+
case "match:eq":
return this._termFromJson(json);
};
@@ -796,13 +812,23 @@
};
- private SpanQueryWrapper _termFromJson (JsonNode json) throws QueryException {
+ // Deserialize korap:term
+ private SpanQueryWrapper _termFromJson (JsonNode json)
+ throws QueryException {
- if (!json.has("key") || json.get("key").asText().length() < 1)
- throw new QueryException(740, "Key definition is missing in term or span");
+ if (!json.has("key") || json.get("key").asText().length() < 1) {
+ throw new QueryException(
+ 740,
+ "Key definition is missing in term or span"
+ );
+ };
- if (!json.has("@type"))
- throw new QueryException(701, "JSON-LD group has no @type attribute");
+ if (!json.has("@type")) {
+ throw new QueryException(
+ 701,
+ "JSON-LD group has no @type attribute"
+ );
+ };
Boolean isTerm = json.get("@type").asText().equals("korap:term") ? true : false;
Boolean isCaseInsensitive = false;
@@ -845,15 +871,16 @@
layer = "c";
break;
- case "cat":
- layer = "c";
- break;
+ /*
+ case "cat":
+ layer = "c";
+ break;
+ */
};
if (isCaseInsensitive && isTerm) {
- if (layer.equals("s")) {
+ if (layer.equals("s"))
layer = "i";
- }
else {
this.addWarning(
767,
@@ -879,13 +906,18 @@
// Regular expression or wildcard
if (isTerm && json.has("type")) {
+
+ // Branch on type
switch (json.get("type").asText()) {
case "type:regex":
return this.seg(this.re(value.toString(), isCaseInsensitive));
+
case "type:wildcard":
return this.seq(this.wc(value.toString(), isCaseInsensitive));
+
case "type:string":
break;
+
default:
this.addWarning(746, "Term type is not supported - treated as a string");
};
@@ -895,40 +927,48 @@
return this.seg(value.toString());
if (json.has("attr")) {
-
this.addWarning(
- 768,
- "Attributes are currently not supported - results may not be correct");
+ 768,
+ "Attributes are currently not supported - results may not be correct"
+ );
// SpanQueryWrapper attrQueryWrapper =
- // handleAttr(json.get("attr"));
+ // _attrFromJson(json.get("attr"));
// if (attrQueryWrapper != null) {
// return seg SpanElementWithAttributeQueryWrapper
// }
- }
+ };
+
return this.tag(value.toString());
};
- private SpanQueryWrapper handleAttr(JsonNode attrNode)
- throws QueryException {
+
+ private SpanQueryWrapper _attrFromJson (JsonNode attrNode)
+ throws QueryException {
if (!attrNode.has("@type")) {
- throw new QueryException(701,
- "JSON-LD group has no @type attribute");
- }
+ throw new QueryException(
+ 701,
+ "JSON-LD group has no @type attribute"
+ );
+ };
if (attrNode.get("@type").asText().equals("korap:term")) {
if (attrNode.has("tokenarity") || attrNode.has("arity")) {
this.addWarning(
- 768,
- "This kind of attributes are currently not supported - results may not be correct");
- }
+ 770,
+ "Arity attributes are currently not supported" +
+ " - results may not be correct"
+ );
+ };
+
if (attrNode.has("root")) {
String rootValue = attrNode.get("root").asText();
if (rootValue.equals("true") || rootValue.equals("false")) {
return new SpanAttributeQueryWrapper(
- new SpanSimpleQueryWrapper("tokens", "@root"),
- Boolean.valueOf(rootValue));
+ new SpanSimpleQueryWrapper("tokens", "@root"),
+ Boolean.valueOf(rootValue)
+ );
}
// wrong root value
}
@@ -941,6 +981,7 @@
return null;
}
+
/**
* Create a query object based on a regular expression.
*
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java
index c455396..d276cb1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java
@@ -34,16 +34,20 @@
};
public SpanQuery toQuery () throws QueryException {
- if (this.subquery.isNull())
- return (SpanQuery) null;
+ if (this.subquery.isNull())
+ return (SpanQuery) null;
- // TODO: If this.subquery.isNegative(), it may be an Expansion!
- // SpanExpansionQuery(x, y.negative, min, max. direction???, classNumber, true)
+ SpanQuery sq = (SpanQuery) this.subquery.toQuery();
+
+ if (sq == null) return (SpanQuery) null;
+
+ // TODO: If this.subquery.isNegative(), it may be an Expansion!
+ // SpanExpansionQuery(x, y.negative, min, max. direction???, classNumber, true)
- if (this.number == (byte) 0) {
- return new SpanClassQuery((SpanQuery) this.subquery.toQuery());
- };
- return new SpanClassQuery((SpanQuery) this.subquery.toQuery(), (byte) this.number);
+ if (this.number == (byte) 0) {
+ return new SpanClassQuery(sq);
+ };
+ return new SpanClassQuery(sq, (byte) this.number);
};
public boolean isOptional () {
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
index 4ed1223..9df3c6d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
@@ -31,11 +31,12 @@
private ArrayList<DistanceConstraint> constraints;
private final String limitationError =
- "Distance constraints not supported with " +
- "empty or negative operands";
+ "Distance constraints not supported with " +
+ "empty or negative operands";
// Logger
- private final static Logger log = LoggerFactory.getLogger(SpanSequenceQueryWrapper.class);
+ private final static Logger log =
+ LoggerFactory.getLogger(SpanSequenceQueryWrapper.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
@@ -45,12 +46,13 @@
// The sequence is problem solved
private boolean isSolved = false;
+
/**
* Empty constructor.
*/
public SpanSequenceQueryWrapper (String field) {
- this.field = field;
- this.segments = new ArrayList<SpanQueryWrapper>(2);
+ this.field = field;
+ this.segments = new ArrayList<SpanQueryWrapper>(2);
};
@@ -58,15 +60,15 @@
* Constructor accepting term sequences.
*/
public SpanSequenceQueryWrapper (String field, String ... terms) {
- this(field);
- for (int i = 0; i < terms.length; i++) {
- this.segments.add(
+ this(field);
+ for (int i = 0; i < terms.length; i++) {
+ this.segments.add(
new SpanSimpleQueryWrapper(
new SpanTermQuery(new Term(field, terms[i]))
)
);
- };
- this.isNull = false;
+ };
+ this.isNull = false;
};
@@ -74,9 +76,9 @@
* Constructor accepting SpanQuery sequences.
*/
public SpanSequenceQueryWrapper (String field, SpanQuery sq) {
- this(field);
- this.segments.add(new SpanSimpleQueryWrapper(sq));
- this.isNull = false;
+ this(field);
+ this.segments.add(new SpanSimpleQueryWrapper(sq));
+ this.isNull = false;
};
@@ -85,26 +87,26 @@
* These wrappers may be optional, negative or empty.
*/
public SpanSequenceQueryWrapper (String field, SpanQueryWrapper sswq) {
- this(field);
+ this(field);
- // Ignore null queries
- if (sswq.isNull())
- return;
+ // Ignore null queries
+ if (sswq.isNull())
+ return;
- if (DEBUG && !sswq.isEmpty) {
- try {
- log.trace("New span sequence {}", sswq.toQuery().toString());
- }
- catch (QueryException qe) {
- log.trace("Unable to serialize query {}", qe.getMessage());
- };
- };
- /*
- System.err.println("Is negative: ");
- System.err.println(sswq.isNegative());
- */
- this.segments.add(sswq);
- this.isNull = false;
+ if (DEBUG && !sswq.isEmpty) {
+ try {
+ log.trace("New span sequence {}", sswq.toQuery().toString());
+ }
+ catch (QueryException qe) {
+ log.trace("Unable to serialize query {}", qe.getMessage());
+ };
+ };
+ /*
+ System.err.println("Is negative: ");
+ System.err.println(sswq.isNegative());
+ */
+ this.segments.add(sswq);
+ this.isNull = false;
};
@@ -318,125 +320,149 @@
* Serialize Query to Lucene SpanQueries
*/
public SpanQuery toQuery () throws QueryException {
+ int size = this.segments.size();
- int size = this.segments.size();
+ // Nothing to do
+ if (size == 0 || this.isNull())
+ return (SpanQuery) null;
- // Nothing to do
- if (size == 0 || this.isNull())
- return (SpanQuery) null;
+ // No real sequence - only one element
+ if (size == 1) {
- // No real sequence - only one element
- if (size == 1) {
+ // But the element may be expanded
+ if (this.segments.get(0).isExtended() &&
+ (this.hasConstraints() || !this.isInOrder())) {
+ throw new QueryException(613, limitationError);
+ };
- // But the element may be expanded
- if (this.segments.get(0).isExtended() &&
- (this.hasConstraints() || !this.isInOrder())) {
- throw new QueryException(613, limitationError);
- };
+ // Unproblematic single query
+ if (this.segments.get(0).maybeAnchor())
+ return (SpanQuery) this.segments.get(0).toQuery();
- // Unproblematic single query
- if (this.segments.get(0).maybeAnchor())
- return (SpanQuery) this.segments.get(0).toQuery();
+ if (this.segments.get(0).isEmpty())
+ throw new QueryException(613, "Sequence is not allowed to be empty");
- if (this.segments.get(0).isEmpty())
- throw new QueryException(613, "Sequence is not allowed to be empty");
- if (this.segments.get(0).isOptional())
- throw new QueryException(613, "Sequence is not allowed to be optional");
- if (this.segments.get(0).isNegative())
- throw new QueryException(613, "Sequence is not allowed to be negative");
- };
+ if (this.segments.get(0).isOptional())
+ throw new QueryException(613, "Sequence is not allowed to be optional");
- if (!this.isSolved) {
- if (!_solveProblematicSequence()) {
- if (this.segments.get(0).maybeExtension())
- throw new QueryException(
- 613,
- "Sequence contains unresolvable "+
- "empty, optional, or negative segments"
- );
- };
- };
+ if (this.segments.get(0).isNegative())
+ throw new QueryException(613, "Sequence is not allowed to be negative");
+ };
- // The element may be expanded
- if (this.segments.size() == 1 &&
- this.segments.get(0).isExtended() &&
- (this.hasConstraints() || !this.isInOrder())) {
- throw new QueryException(613, limitationError);
- };
+ if (!this.isSolved) {
+ if (!_solveProblematicSequence()) {
+ if (this.segments.get(0).maybeExtension()) {
+ throw new QueryException(
+ 613,
+ "Sequence contains unresolvable " +
+ "empty, optional, or negative segments"
+ );
+ };
+ };
+ };
- // Create the initial query
- SpanQuery query = this.segments.get(0).toQuery();
+ // The element may be expanded
+ if (this.segments.size() == 1 &&
+ this.segments.get(0).isExtended() &&
+ (this.hasConstraints() || !this.isInOrder())) {
+ throw new QueryException(613, limitationError);
+ };
- // NextQueries:
- if (!this.hasConstraints() && this.isInOrder()) {
- for (int i = 1; i < this.segments.size(); i++) {
- query = new SpanNextQuery(
- query,
- this.segments.get(i).toQuery()
- );
- };
- return (SpanQuery) query;
- };
+ // Create the initial query
+ SpanQuery query = null;// = this.segments.get(0).toQuery();
+ int i = 0;
+ while (query == null && i < this.segments.size()) {
+ query = this.segments.get(i).toQuery();
+ i++;
+ };
- // DistanceQueries
- if (this.constraints.size() == 1) {
- DistanceConstraint constraint = this.constraints.get(0);
+ if (query == null)
+ return (SpanQuery) null;
- // Create spanElementDistance query
- if (!constraint.getUnit().equals("w")) {
- for (int i = 1; i < this.segments.size(); i++) {
+ // NextQueries:
+ if (!this.hasConstraints() && this.isInOrder()) {
+ for (; i < this.segments.size(); i++) {
- // No support for extended spans in constraints
- if (this.segments.get(i).isExtended())
- throw new QueryException(613, limitationError);
+ SpanQuery second = this.segments.get(i).toQuery();
+ if (second == null)
+ continue;
- SpanDistanceQuery sdquery = new SpanDistanceQuery(
- query,
- this.segments.get(i).toQuery(),
- constraint,
- true
- );
- query = (SpanQuery) sdquery;
- };
- }
+ query = new SpanNextQuery(
+ query,
+ second
+ );
+ };
+ return (SpanQuery) query;
+ };
+
+ // DistanceQueries
+ if (this.constraints.size() == 1) {
+ DistanceConstraint constraint = this.constraints.get(0);
- // Create spanDistance query
- else {
- for (int i = 1; i < this.segments.size(); i++) {
+ // Create spanElementDistance query
+ if (!constraint.getUnit().equals("w")) {
+ for (i = 1; i < this.segments.size(); i++) {
- // No support for extended spans in constraints
- if (this.segments.get(i).isExtended())
- throw new QueryException(613, limitationError);
+ // No support for extended spans in constraints
+ if (this.segments.get(i).isExtended())
+ throw new QueryException(613, limitationError);
- SpanDistanceQuery sdquery = new SpanDistanceQuery(
- query,
- this.segments.get(i).toQuery(),
- constraint,
- true
- );
- query = (SpanQuery) sdquery;
- };
- };
+ SpanQuery sq = (SpanQuery) this.segments.get(i).toQuery();
+ if (sq == null) continue;
- return (SpanQuery) query;
- };
+ SpanDistanceQuery sdquery = new SpanDistanceQuery(
+ query,
+ sq,
+ constraint,
+ true
+ );
+ query = (SpanQuery) sdquery;
+ };
+ }
- // MultipleDistanceQueries
- for (int i = 1; i < this.segments.size(); i++) {
+ // Create spanDistance query
+ else {
+ for (i = 1; i < this.segments.size(); i++) {
- // No support for extended spans in constraints
- if (this.segments.get(i).isExtended())
- throw new QueryException(613, limitationError);
+ // No support for extended spans in constraints
+ if (this.segments.get(i).isExtended())
+ throw new QueryException(613, limitationError);
- query = new SpanMultipleDistanceQuery(
- query,
- this.segments.get(i).toQuery(),
- this.constraints,
- isInOrder,
- true
- );
- };
- return (SpanQuery) query;
+ SpanQuery sq = (SpanQuery) this.segments.get(i).toQuery();
+ if (sq == null) continue;
+
+ SpanDistanceQuery sdquery = new SpanDistanceQuery(
+ query,
+ sq,
+ constraint,
+ true
+ );
+ query = (SpanQuery) sdquery;
+ };
+ };
+
+ return (SpanQuery) query;
+ };
+
+ // MultipleDistanceQueries
+ for (i = 1; i < this.segments.size(); i++) {
+
+ // No support for extended spans in constraints
+ if (this.segments.get(i).isExtended())
+ throw new QueryException(613, limitationError);
+
+ SpanQuery sq = (SpanQuery) this.segments.get(i).toQuery();
+ if (sq == null) continue;
+
+ query = new SpanMultipleDistanceQuery(
+ query,
+ sq,
+ this.constraints,
+ isInOrder,
+ true
+ );
+ };
+ return (SpanQuery) query;
};
/*
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java
index 2af6c19..25cf0d6 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java
@@ -9,7 +9,7 @@
import de.ids_mannheim.korap.util.QueryException;
/**
- * @author margaretha
+ * @author margaretha, diewald
*
*/
public class SpanSubspanQueryWrapper extends SpanQueryWrapper {
@@ -17,34 +17,114 @@
private SpanQueryWrapper subquery;
private int startOffset, length;
- private Logger log = LoggerFactory.getLogger(SpanSubspanQueryWrapper.class);
+ private final static Logger log =
+ LoggerFactory.getLogger(SpanSubspanQueryWrapper.class);
- public SpanSubspanQueryWrapper(SpanQueryWrapper sqw, int startOffset,
- int length) {
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+ public SpanSubspanQueryWrapper(SpanQueryWrapper sqw,
+ int startOffset,
+ int length) {
this.subquery = sqw;
+ if (sqw == null) {
+ this.isNull = true;
+ return;
+ }
+ else {
+ this.isNull = false;
+ };
+
this.startOffset = startOffset;
this.length = length;
+
+ // The embedded class is empty,
+ // but probably in a valid range
+ // - optimize
+ // subspan([]{,5}, 2) -> subspan([]{2,5}, 2)
+ // subspan([]{2,}, 2,5) -> subspan([]{2,5}, 2,5)
+ if (subquery.isEmpty()) {
+
+ // Todo: Is there a possible way to deal with that?
+ if (startOffset < 0) {
+ this.isNull = true;
+ return;
+ };
+
+ // e.g, subspan([]{0,6}, 8)
+ if (subquery.getMax() < startOffset) {
+ this.isNull = true;
+ return;
+ };
+
+ // Readjust the minimum of the subquery
+ if (startOffset > 0) {
+ subquery.setMin(startOffset);
+ subquery.isOptional = false;
+ };
+
+ // Readjust the maximum,
+ // although the following case may be somehow disputable:
+ // subspan([]{2,8}, 2, 1) -> subspan([]{2,5},2,1)
+ if (length > 0) {
+ int newMax = subquery.getMin() + startOffset + length;
+ if (subquery.getMax() > newMax) {
+ subquery.setMax(subquery.getMin() + length);
+ };
+ };
+ };
+
+ // Todo: What happens with negative queries?
+ // submatch([base!=tree],3)
}
@Override
public SpanQuery toQuery() throws QueryException {
- if (subquery == null) {
- log.warn("Subquery of SpanSubspanquery is null.");
+
+ if (this.isNull() || subquery.isNull()) {
+ if (DEBUG)
+ log.warn("Subquery of SpanSubspanquery is null.");
+ return null;
+ };
+
+ if (startOffset == 0 && length == 0) {
+ if (DEBUG)
+ log.warn("Not SpanSubspanQuery. Creating only the subquery.");
+ return subquery.toQuery();
+ };
+
+ // The embedded subquery may be null
+ SpanQuery sq = subquery.toQuery();
+ if (sq == null) return null;
+
+ if (sq instanceof SpanTermQuery) {
+
+ // No relevant subspan
+ if ((startOffset == 0 || startOffset == -1) &&
+ (length <= 1)) {
+ if (DEBUG)
+ log.warn("Not SpanSubspanQuery. " +
+ "Creating only the subquery.");
+ return sq;
+ };
+
+ // Subspanquery can't match (always out of scope)
return null;
}
- if (length == 0) {
- log.warn("Not SpanSubspanQuery. Creating only the subquery.");
- return subquery.toQuery();
- }
-
- SpanQuery sq = subquery.toQuery();
- if (sq instanceof SpanTermQuery) {
- log.warn("Not SpanSubspanQuery. Creating only the subquery.");
- return sq;
- }
-
- return new SpanSubspanQuery(subquery.toQuery(), startOffset, length,
+ return new SpanSubspanQuery(sq, startOffset, length,
true);
}
+
+ @Override
+ public boolean isNegative () {
+ return this.subquery.isNegative();
+ };
+
+ @Override
+ public boolean isOptional () {
+ if (startOffset > 0)
+ return false;
+ return this.subquery.isOptional();
+ };
}
diff --git a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java
index 14da689..b568fbe 100644
--- a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java
+++ b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java
@@ -16,107 +16,106 @@
public class TestMultiTerm {
@Test
public void multiTermSimple () {
- MultiTerm mt = new MultiTerm("test");
- assertEquals(mt.term, "test");
- assertNull(mt.payload);
- assertEquals(mt.start, 0);
- assertEquals(mt.end, 0);
+ MultiTerm mt = new MultiTerm("test");
+ assertEquals(mt.term, "test");
+ assertNull(mt.payload);
+ assertEquals(mt.start, 0);
+ assertEquals(mt.end, 0);
};
@Test
public void multiTermPayload () {
- MultiTerm mt = new MultiTerm("test$5");
- assertEquals("test", mt.term);
- assertEquals(new BytesRef("5"), mt.payload);
- assertEquals(mt.start, 0);
- assertEquals(mt.end, 0);
+ MultiTerm mt = new MultiTerm("test$5");
+ assertEquals("test", mt.term);
+ assertEquals(new BytesRef("5"), mt.payload);
+ assertEquals(mt.start, 0);
+ assertEquals(mt.end, 0);
};
@Test
public void multiTermOffset () {
- MultiTerm mt = new MultiTerm("versuch#2-34");
- assertEquals(mt.term, "versuch");
- assertNull(mt.payload);
- assertEquals(mt.start, 2);
- assertEquals(mt.end, 34);
+ MultiTerm mt = new MultiTerm("versuch#2-34");
+ assertEquals(mt.term, "versuch");
+ assertNull(mt.payload);
+ assertEquals(mt.start, 2);
+ assertEquals(mt.end, 34);
};
@Test
public void multiTermOffsetPayload () {
- MultiTerm mt = new MultiTerm("example#6-42$hihi");
- assertEquals(mt.term, "example");
- assertEquals(new BytesRef("hihi"), mt.payload);
- assertEquals(mt.start,6);
- assertEquals(mt.end, 42);
+ MultiTerm mt = new MultiTerm("example#6-42$hihi");
+ assertEquals(mt.term, "example");
+ assertEquals(new BytesRef("hihi"), mt.payload);
+ assertEquals(mt.start,6);
+ assertEquals(mt.end, 42);
};
@Test
public void multiTermString () {
- MultiTerm mt = new MultiTerm("example#6-42$hihi");
- assertEquals("example#6-42$hihi", mt.toString());
- mt.term = "spassmacher";
- assertEquals("spassmacher#6-42$hihi", mt.toString());
+ MultiTerm mt = new MultiTerm("example#6-42$hihi");
+ assertEquals("example#6-42$hihi", mt.toString());
+ mt.term = "spassmacher";
+ assertEquals("spassmacher#6-42$hihi", mt.toString());
};
@Test
public void multiTermStringPayloadType () {
- MultiTerm mt = new MultiTerm("example$<i>4000");
- assertEquals("example$<?>[0,0,f,a0]", mt.toString());
+ MultiTerm mt = new MultiTerm("example$<i>4000");
+ assertEquals("example$<?>[0,0,f,a0]", mt.toString());
- mt = new MultiTerm("example$<l>757574643438");
- assertEquals("example$<?>[0,0,0,b0,62,f7,ae,ee]", mt.toString());
+ mt = new MultiTerm("example$<l>757574643438");
+ assertEquals("example$<?>[0,0,0,b0,62,f7,ae,ee]", mt.toString());
};
-
+
@Test
public void multiTermStringPayloadType2 () {
- MultiTerm mt = new MultiTerm();
- mt.setTerm("beispiel");
- mt.setStart(40);
- assertEquals(mt.getStart(), mt.start);
- mt.setEnd(50);
- assertEquals(mt.getEnd(), mt.end);
- mt.setPayload((int) 4000);
- assertEquals("beispiel#40-50$<?>[0,0,f,a0]", mt.toString());
+ MultiTerm mt = new MultiTerm();
+ mt.setTerm("beispiel");
+ mt.setStart(40);
+ assertEquals(mt.getStart(), mt.start);
+ mt.setEnd(50);
+ assertEquals(mt.getEnd(), mt.end);
+ mt.setPayload((int) 4000);
+ assertEquals("beispiel#40-50$<?>[0,0,f,a0]", mt.toString());
};
@Test
public void multiTermStringPayloadType3 () {
- MultiTerm mt = new MultiTerm("example$<b>120");
- assertEquals("example$x", mt.toString());
+ MultiTerm mt = new MultiTerm("example$<b>120");
+ assertEquals("example$x", mt.toString());
};
@Test
public void multiTermStringPayloadType4 () {
- MultiTerm mt = new MultiTerm("example$<i>420<b>120");
- assertEquals("example$<?>[0,0,1,a4,78]", mt.toString());
+ MultiTerm mt = new MultiTerm("example$<i>420<b>120");
+ assertEquals("example$<?>[0,0,1,a4,78]", mt.toString());
};
-
@Test
public void multiTermStringPayloadType5 () {
- MultiTerm mt = new MultiTerm("example$<i>4000");
- assertEquals("example$<?>[0,0,f,a0]", mt.toString());
+ MultiTerm mt = new MultiTerm("example$<i>4000");
+ assertEquals("example$<?>[0,0,f,a0]", mt.toString());
- mt = new MultiTerm("example$<i>4000<b>120");
- assertEquals("example$<?>[0,0,f,a0,78]", mt.toString());
+ mt = new MultiTerm("example$<i>4000<b>120");
+ assertEquals("example$<?>[0,0,f,a0,78]", mt.toString());
- mt = new MultiTerm("example$<l>4000<b>120");
- assertEquals("example$<?>[0,0,0,0,0,0,f,a0,78]", mt.toString());
+ mt = new MultiTerm("example$<l>4000<b>120");
+ assertEquals("example$<?>[0,0,0,0,0,0,f,a0,78]", mt.toString());
};
@Test
public void multiTermStringFail () {
- MultiTerm mt = new MultiTerm("example#56-66");
- assertEquals(56, mt.getStart());
- assertEquals(66,mt.getEnd());
+ MultiTerm mt = new MultiTerm("example#56-66");
+ assertEquals(56, mt.getStart());
+ assertEquals(66,mt.getEnd());
- mt = new MultiTerm("example#56-66$<i>a");
- assertEquals(56, mt.getStart());
- assertEquals(66, mt.getEnd());
+ mt = new MultiTerm("example#56-66$<i>a");
+ assertEquals(56, mt.getStart());
+ assertEquals(66, mt.getEnd());
- mt = new MultiTerm("example#56$<i>a");
- assertEquals(mt.getPayload(), null);
- assertEquals(mt.getStart(), 0);
- assertEquals(mt.getEnd(), 0);
+ mt = new MultiTerm("example#56$<i>a");
+ assertEquals(mt.getPayload(), null);
+ assertEquals(mt.getStart(), 0);
+ assertEquals(mt.getEnd(), 0);
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
index c3ab4c0..f88b194 100644
--- a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
+++ b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
@@ -14,27 +14,34 @@
@RunWith(JUnit4.class)
public class TestMultiTermToken {
+
@Test
public void multiTermTokenSimple () {
- MultiTermToken mtt = new MultiTermToken("t:test", "a:abbruch");
- assertEquals("[t:test|a:abbruch]", mtt.toString());
- mtt.add("b:banane");
- assertEquals("[t:test|a:abbruch|b:banane]", mtt.toString());
- mtt.add("c:chaos#21-26");
- assertEquals("[(21-26)t:test|a:abbruch|b:banane|c:chaos#21-26]", mtt.toString());
- mtt.add("d:dadaismus#21-28$vergleich");
- assertEquals("[(21-28)t:test|a:abbruch|b:banane|c:chaos#21-26|d:dadaismus#21-28$vergleich]", mtt.toString());
+ MultiTermToken mtt = new MultiTermToken("t:test", "a:abbruch");
+ assertEquals("[t:test|a:abbruch]", mtt.toString());
+ mtt.add("b:banane");
+ assertEquals("[t:test|a:abbruch|b:banane]", mtt.toString());
+ mtt.add("c:chaos#21-26");
+ assertEquals("[(21-26)t:test|a:abbruch|b:banane|c:chaos#21-26]",
+ mtt.toString());
+ mtt.add("d:dadaismus#21-28$vergleich");
+ assertEquals(
+ "[(21-28)t:test|a:abbruch|b:banane|c:chaos#21-26|" +
+ "d:dadaismus#21-28$vergleich]",
+ mtt.toString()
+ );
};
@Test
public void multiTermTokenOffsets () {
- MultiTermToken mtt = new MultiTermToken("t:test#23-27");
- assertEquals("[(23-27)t:test#23-27]", mtt.toString());
- mtt.add("b:baum#34-45");
- assertEquals("[(23-45)t:test#23-27|b:baum#34-45]", mtt.toString());
- mtt.add("c:cannonball#34-45$tatsache");
- assertEquals("[(23-45)t:test#23-27|b:baum#34-45|c:cannonball#34-45$tatsache]", mtt.toString());
- assertEquals(23, mtt.start);
- assertEquals(45, mtt.end);
+ MultiTermToken mtt = new MultiTermToken("t:test#23-27");
+ assertEquals("[(23-27)t:test#23-27]", mtt.toString());
+ mtt.add("b:baum#34-45");
+ assertEquals("[(23-45)t:test#23-27|b:baum#34-45]", mtt.toString());
+ mtt.add("c:cannonball#34-45$tatsache");
+ assertEquals("[(23-45)t:test#23-27|b:baum#34-45|" +
+ "c:cannonball#34-45$tatsache]", mtt.toString());
+ assertEquals(23, mtt.start);
+ assertEquals(45, mtt.end);
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
index c5b15c0..ff20192 100644
--- a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
+++ b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
Binary files differ
diff --git a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
index 6e85f27..1a28f7d 100644
--- a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
+++ b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
@@ -27,357 +27,353 @@
@Test
public void checkBenchmark1 () throws IOException {
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- long t1 = 0, t2 = 0;
- /// cosmas20.json!!!
- String json = getString(getClass().getResource("/queries/benchmark1.jsonld").getFile());
+ long t1 = 0, t2 = 0;
+ /// cosmas20.json!!!
+ String json = getString(getClass().getResource("/queries/benchmark1.jsonld").getFile());
- int rounds = 100;
+ int rounds = 100;
- KorapResult kr = new KorapResult();
+ KorapResult kr = new KorapResult();
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- // assertEquals("TotalResults", 30751, kr.getTotalResults());
- assertEquals("TotalResults", kr.getTotalResults(), 4803739);
+ // assertEquals("TotalResults", 30751, kr.getTotalResults());
+ assertEquals("TotalResults", kr.getTotalResults(), 4803739);
- // System.err.println(kr.toJSON());
-
-
- // long seconds = (long) (t2 - t1 / 1000) % 60 ;
- double seconds = (double)(t2-t1) / 1000000000.0;
+ // long seconds = (long) (t2 - t1 / 1000) % 60 ;
+ double seconds = (double)(t2-t1) / 1000000000.0;
- System.out.println("It took " + seconds + " seconds");
+ // System.out.println("It took " + seconds + " seconds");
- // 100 times:
- // 43,538 sec
- // 4.874
+ // 100 times:
+ // 43,538 sec
+ // 4.874
+
+ // 1000 times:
+ // 36.613 sec
- // 1000 times:
- // 36.613 sec
-
-
- // After refactoring
- // 100 times
- // 273.58114372 seconds
-
- // After intro of attributes
- // 100 times
- // 350.171506379 seconds
+ // After refactoring
+ // 100 times
+ // 273.58114372 seconds
+
+ // After intro of attributes
+ // 100 times
+ // 350.171506379 seconds
};
@Test
public void checkBenchmark2JSON () throws IOException {
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ long t1 = 0, t2 = 0;
+ /// cosmas20.json!!!
+ String json = getString(getClass().getResource("/queries/benchmark2.jsonld").getFile());
+
+ int rounds = 10000;
+
+ KorapResult kr = new KorapResult();
+ String result = new String("");
+
+ t1 = System.nanoTime();
+ double length = 0;
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ length += kr.toJsonString().length();
+ };
+ t2 = System.nanoTime();
- long t1 = 0, t2 = 0;
- /// cosmas20.json!!!
- String json = getString(getClass().getResource("/queries/benchmark2.jsonld").getFile());
+ // assertEquals("TotalResults", 30751, kr.getTotalResults());
- int rounds = 10000;
+ // System.err.println(kr.toJSON());
- KorapResult kr = new KorapResult();
- String result = new String("");
-
- t1 = System.nanoTime();
- double length = 0;
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- length += kr.toJsonString().length();
- };
- t2 = System.nanoTime();
-
- // assertEquals("TotalResults", 30751, kr.getTotalResults());
-
- // System.err.println(kr.toJSON());
-
- // long seconds = (long) (t2 - t1 / 1000) % 60 ;
- double seconds = (double)(t2-t1) / 1000000000.0;
+ // long seconds = (long) (t2 - t1 / 1000) % 60 ;
+ double seconds = (double)(t2-t1) / 1000000000.0;
- System.out.println("It took " + seconds + " seconds");
+ // System.out.println("It took " + seconds + " seconds");
- // 10000 times:
- // 77.167124985 sec
+ // 10000 times:
+ // 77.167124985 sec
};
@Test
public void checkBenchmarkSentences () throws IOException {
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- long t1 = 0, t2 = 0;
- /// cosmas20.json!!!
- String json = getString(getClass().getResource("/queries/benchmark4.jsonld").getFile());
+ long t1 = 0, t2 = 0;
+ /// cosmas20.json!!!
+ String json = getString(getClass().getResource("/queries/benchmark4.jsonld").getFile());
- int rounds = 10;
+ int rounds = 10;
- KorapResult kr = new KorapResult();
+ KorapResult kr = new KorapResult();
- t1 = System.nanoTime();
- double length = 0;
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ t1 = System.nanoTime();
+ double length = 0;
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- // System.err.println(kr.getMatch(0).toJSON());
+ // System.err.println(kr.getMatch(0).toJSON());
+
+ assertEquals("TotalResults1", kr.getTotalResults(), 4116282);
+ assertEquals("TotalResults2", kr.getTotalResults(), ki.numberOf("sentences"));
- assertEquals("TotalResults1", kr.getTotalResults(), 4116282);
- assertEquals("TotalResults2", kr.getTotalResults(), ki.numberOf("sentences"));
-
- double seconds = (double)(t2-t1) / 1000000000.0;
-
- System.out.println("It took " + seconds + " seconds");
- // 100 rounds
- // 56.253 secs
+ double seconds = (double)(t2-t1) / 1000000000.0;
+
+ // System.out.println("It took " + seconds + " seconds");
+ // 100 rounds
+ // 56.253 secs
};
@Test
public void checkBenchmarkClasses () throws IOException {
- // [orth=Der]{1:[orth=Mann]{2:[orth=und]}}
+ // [orth=Der]{1:[orth=Mann]{2:[orth=und]}}
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- long t1 = 0, t2 = 0;
- // Without classes
- String json = getString(getClass().getResource("/queries/benchmark5-ohne.jsonld").getFile());
+ long t1 = 0, t2 = 0;
+ // Without classes
+ String json = getString(getClass().getResource("/queries/benchmark5-ohne.jsonld").getFile());
- int rounds = 2000;
+ int rounds = 2000;
+
+ KorapResult kr = new KorapResult();
- KorapResult kr = new KorapResult();
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ double seconds = (double)(t2-t1) / 1000000000.0;
+
+ // System.out.println("It took " + seconds + " seconds without classes");
- double seconds = (double)(t2-t1) / 1000000000.0;
-
- System.out.println("It took " + seconds + " seconds without classes");
+ t1 = 0;
+ t2 = 0;
+ // With classes
+ json = getString(getClass().getResource("/queries/benchmark5.jsonld").getFile());
+
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- t1 = 0;
- t2 = 0;
- // With classes
- json = getString(getClass().getResource("/queries/benchmark5.jsonld").getFile());
+ seconds = (double)(t2-t1) / 1000000000.0;
+
+ // System.out.println("It took " + seconds + " seconds with classes");
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ t1 = 0;
+ t2 = 0;
+ // With submatch
+ json = getString(getClass().getResource("/queries/benchmark5-submatch.jsonld").getFile());
- seconds = (double)(t2-t1) / 1000000000.0;
-
- System.out.println("It took " + seconds + " seconds with classes");
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- t1 = 0;
- t2 = 0;
- // With submatch
- json = getString(getClass().getResource("/queries/benchmark5-submatch.jsonld").getFile());
+ seconds = (double)(t2-t1) / 1000000000.0;
+
+ // System.out.println("It took " + seconds + " seconds with submatches");
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ /** HERE IS A BUG! */
+
+ // System.err.println(kr.toJsonString());
- seconds = (double)(t2-t1) / 1000000000.0;
-
- System.out.println("It took " + seconds + " seconds with submatches");
+ // System.err.println(kr.toJSON());
- /** HERE IS A BUG! */
-
- System.err.println(kr.toJsonString());
+ // System.err.println(kr.getMatch(3).getSnippetBrackets());
+ // 2000 rounds:
+ // It took 10.872934435 seconds without classes
+ // It took 22.581117396 seconds with classes
- // System.err.println(kr.toJSON());
+ // It took 10.703933598 seconds without classes
+ // It took 19.354674517 seconds with classes
- // System.err.println(kr.getMatch(3).getSnippetBrackets());
+ // It took 10.939948726 seconds without classes
+ // It took 16.998470662 seconds with classes
+ // It took 10.900975837 seconds without classes
+ // It took 14.902590949 seconds with classes
- // 2000 rounds:
- // It took 10.872934435 seconds without classes
- // It took 22.581117396 seconds with classes
+ // It took 10.365989238 seconds without classes
+ // It took 13.833405885 seconds with classes
- // It took 10.703933598 seconds without classes
- // It took 19.354674517 seconds with classes
-
- // It took 10.939948726 seconds without classes
- // It took 16.998470662 seconds with classes
-
- // It took 10.900975837 seconds without classes
- // It took 14.902590949 seconds with classes
-
- // It took 10.365989238 seconds without classes
- // It took 13.833405885 seconds with classes
-
- // It took 15.368675425 seconds without classes
- // It took 18.347603186 seconds with classes
- // It took 15.941057294 seconds with submatches
-
- // It took 15.241253549 seconds without classes
- // It took 17.30375624 seconds with classes
- // It took 15.367171254 seconds with submatches
+ // It took 15.368675425 seconds without classes
+ // It took 18.347603186 seconds with classes
+ // It took 15.941057294 seconds with submatches
+
+ // It took 15.241253549 seconds without classes
+ // It took 17.30375624 seconds with classes
+ // It took 15.367171254 seconds with submatches
};
-
-
-
@Test
public void checkBenchmarkIndexDocuments () throws IOException {
- long t1 = 0, t2 = 0;
+ long t1 = 0, t2 = 0;
- int rounds = 10;
+ int rounds = 10;
- ArrayList<String> docs = new ArrayList<String>(700);
+ ArrayList<String> docs = new ArrayList<String>(700);
- for (int a = 0; a < 50; a++) {
- for (String d : new String[] {"00001", "00002", "00003",
- "00004", "00005", "00006", "02439"}) {
- docs.add(d);
- };
- };
+ for (int a = 0; a < 50; a++) {
+ for (String d : new String[] {
+ "00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ docs.add(d);
+ };
+ };
- t1 = System.nanoTime();
- double length = 0;
- for (int i = 1; i <= rounds; i++) {
- // Construct index
- KorapIndex ki = new KorapIndex();
+ t1 = System.nanoTime();
+ double length = 0;
+ for (int i = 1; i <= rounds; i++) {
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+
+ // Indexing test files
+ for (String d : docs) {
+ FieldDocument fd = ki.addDocFile(
+ getClass().getResource("/wiki/" + d + ".json.gz").getFile(),
+ true
+ );
+ };
+ ki.commit();
+ };
+ t2 = System.nanoTime();
- // Indexing test files
- for (String d : docs) {
- FieldDocument fd = ki.addDocFile(
- getClass().getResource("/wiki/" + d + ".json.gz").getFile(),
- true
- );
- };
- ki.commit();
- };
- t2 = System.nanoTime();
+ double seconds = (double)(t2-t1) / 1000000000.0;
+ // System.out.println("It took " + seconds + " seconds");
- double seconds = (double)(t2-t1) / 1000000000.0;
- System.out.println("It took " + seconds + " seconds");
-
- // 10 times / 350 docs:
- // 36.26158006 seconds
- // 32.52575097 seconds
- // 31.818091536 seconds
- // 32.055321123 seconds
- // 32.32125959 seconds
- // 31.726277979 seconds
- // 31.65826188 seconds
- // 31.287057537 seconds
+ // 10 times / 350 docs:
+ // 36.26158006 seconds
+ // 32.52575097 seconds
+ // 31.818091536 seconds
+ // 32.055321123 seconds
+ // 32.32125959 seconds
+ // 31.726277979 seconds
+ // 31.65826188 seconds
+ // 31.287057537 seconds
};
@Test
public void checkBenchmark3 () throws IOException {
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- long t1 = 0, t2 = 0;
- /// cosmas20.json!!!
- String json = getString(getClass().getResource("/queries/benchmark3.jsonld").getFile());
+ long t1 = 0, t2 = 0;
+ /// cosmas20.json!!!
+ String json = getString(getClass().getResource("/queries/benchmark3.jsonld").getFile());
- int rounds = 500;
+ int rounds = 500;
+
+ KorapResult kr = new KorapResult();
- KorapResult kr = new KorapResult();
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ assertEquals("TotalResults", kr.getTotalResults(), 70229);
- assertEquals("TotalResults", kr.getTotalResults(), 70229);
+ // System.err.println(kr.toJSON());
- // System.err.println(kr.toJSON());
-
- // long seconds = (long) (t2 - t1 / 1000) % 60 ;
- double seconds = (double)(t2-t1) / 1000000000.0;
+ // long seconds = (long) (t2 - t1 / 1000) % 60 ;
+ double seconds = (double)(t2-t1) / 1000000000.0;
- System.out.println("It took " + seconds + " seconds");
+ System.out.println("It took " + seconds + " seconds");
- // 500 times:
- // 71.715862716 seconds
+ // 500 times:
+ // 71.715862716 seconds
};
public static String getString (String path) {
- StringBuilder contentBuilder = new StringBuilder();
- try {
- BufferedReader in = new BufferedReader(new FileReader(path));
- String str;
- while ((str = in.readLine()) != null) {
- contentBuilder.append(str);
- };
- in.close();
- } catch (IOException e) {
- fail(e.getMessage());
- }
- return contentBuilder.toString();
- };
-
- public static SpanQueryWrapper jsonQuery (String jsonFile) {
- SpanQueryWrapper sqwi;
-
- try {
- String json = getString(jsonFile);
- sqwi = new KorapQuery("tokens").fromJson(json);
- }
- catch (QueryException e) {
- fail(e.getMessage());
- sqwi = new KorapQuery("tokens").seg("???");
- };
- return sqwi;
+ StringBuilder contentBuilder = new StringBuilder();
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(path));
+ String str;
+ while ((str = in.readLine()) != null) {
+ contentBuilder.append(str);
+ };
+ in.close();
+ } catch (IOException e) {
+ fail(e.getMessage());
+ }
+ return contentBuilder.toString();
};
+ public static SpanQueryWrapper jsonQuery (String jsonFile) {
+ SpanQueryWrapper sqwi;
+
+ try {
+ String json = getString(jsonFile);
+ sqwi = new KorapQuery("tokens").fromJson(json);
+ }
+ catch (QueryException e) {
+ fail(e.getMessage());
+ sqwi = new KorapQuery("tokens").seg("???");
+ };
+ return sqwi;
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java
index c9f6d56..342a90f 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java
@@ -10,14 +10,13 @@
import de.ids_mannheim.korap.util.QueryException;
/**
- * @author margaretha
- *
+ * @author margaretha, diewald
*/
public class TestSpanSubspanQueryJSON {
@Test
public void testCase1() throws QueryException {
- String filepath = getClass().getResource("/queries/submatch.jsonld")
+ String filepath = getClass().getResource("/queries/submatch/1.jsonld")
.getFile();
SpanQueryWrapper sqwi = getJSONQuery(filepath);
SpanQuery sq = sqwi.toQuery();
@@ -27,20 +26,58 @@
@Test
public void testCase2() throws QueryException {
- String filepath = getClass().getResource("/queries/submatch2.jsonld")
- .getFile();
+ String filepath = getClass().getResource("/queries/submatch/2.jsonld")
+ .getFile();
SpanQueryWrapper sqwi = getJSONQuery(filepath);
SpanQuery sq = sqwi.toQuery();
assertEquals(sq.toString(), "subspan(<tokens:s />,1,4)");
}
+ @Test
public void testCase3() throws QueryException {
- String filepath = getClass().getResource("/queries/submatch3.jsonld")
+ String filepath = getClass().getResource("/queries/submatch/3.jsonld")
.getFile();
SpanQueryWrapper sqwi = getJSONQuery(filepath);
SpanQuery sq = sqwi.toQuery();
assertEquals(sq.toString(), "subspan(<tokens:s />,1,0)");
-
}
+ @Test
+ public void testCaseWrapped() throws QueryException {
+ String filepath = getClass().getResource("/queries/submatch/wrapped.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "shrink(129: spanElementDistance({129: tokens:s:der},"+
+ " {129: subspan(<tokens:s />,0,1)}, [(s[0:0], ordered, notExcluded)]))");
+ }
+
+
+ @Test
+ public void testCaseEmbedded() throws QueryException {
+ String filepath = getClass().getResource("/queries/submatch/embedded.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "spanNext({1: tokens:s:die},"+
+ " {1: subspan(spanExpansion(tokens:s:der, []{1, 100}, right),2,3)})");
+ }
+
+ @Test
+ public void testCaseEmbeddedNull() throws QueryException {
+ String filepath = getClass().getResource("/queries/submatch/embedded-null.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "tokens:s:die");
+ }
+
+ @Test
+ public void testCaseEmbeddedValidEmpty() throws QueryException {
+ String filepath = getClass().getResource("/queries/submatch/embedded-valid-empty.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "??? (Known issue)");
+ }
}
diff --git a/src/test/resources/queries/submatch.jsonld b/src/test/resources/queries/submatch/1.jsonld
similarity index 100%
rename from src/test/resources/queries/submatch.jsonld
rename to src/test/resources/queries/submatch/1.jsonld
diff --git a/src/test/resources/queries/submatch2.jsonld b/src/test/resources/queries/submatch/2.jsonld
similarity index 100%
rename from src/test/resources/queries/submatch2.jsonld
rename to src/test/resources/queries/submatch/2.jsonld
diff --git a/src/test/resources/queries/submatch3.jsonld b/src/test/resources/queries/submatch/3.jsonld
similarity index 100%
rename from src/test/resources/queries/submatch3.jsonld
rename to src/test/resources/queries/submatch/3.jsonld
diff --git a/src/test/resources/queries/submatch/embedded-null.jsonld b/src/test/resources/queries/submatch/embedded-null.jsonld
new file mode 100644
index 0000000..4672473
--- /dev/null
+++ b/src/test/resources/queries/submatch/embedded-null.jsonld
@@ -0,0 +1,39 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "collection":null,
+ "query" : {
+ "@type" : "korap:group",
+ "operation":"operation:sequence",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "die",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:reference",
+ "operands" : [
+ {
+ "@type" : "korap:token"
+ }
+ ],
+ "operation" : "operation:focus",
+ "spanRef" : [
+ 5,
+ 6
+ ]
+ }
+ ],
+ "operation" : "operation:class"
+ }
+ ]
+ }
+}
diff --git a/src/test/resources/queries/submatch/embedded-valid-empty.jsonld b/src/test/resources/queries/submatch/embedded-valid-empty.jsonld
new file mode 100644
index 0000000..a6f22bd
--- /dev/null
+++ b/src/test/resources/queries/submatch/embedded-valid-empty.jsonld
@@ -0,0 +1,40 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "collection":null,
+ "query" : {
+ "@type" : "korap:group",
+ "operation":"operation:sequence",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "die",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:reference",
+ "spanRef" : [2],
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:repetition",
+ "boundary" : {
+ "@type" : "korap:boundary",
+ "max" : 5,
+ "min" : 0
+ },
+ "operands" : [
+ {
+ "@type" : "korap:token"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/test/resources/queries/submatch/embedded.jsonld b/src/test/resources/queries/submatch/embedded.jsonld
new file mode 100644
index 0000000..e09bef5
--- /dev/null
+++ b/src/test/resources/queries/submatch/embedded.jsonld
@@ -0,0 +1,71 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "collection":null,
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "die",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ },
+ {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:reference",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "der",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "boundary" : {
+ "@type" : "korap:boundary",
+ "min" : 1
+ },
+ "operands" : [
+ {
+ "@type" : "korap:token"
+ }
+ ],
+ "operation" : "operation:repetition"
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+ ],
+ "operation" : "operation:focus",
+ "spanRef" : [
+ 2,
+ 3
+ ]
+ }
+ ],
+ "operation" : "operation:class"
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/submatch/wrapped.jsonld b/src/test/resources/queries/submatch/wrapped.jsonld
new file mode 100644
index 0000000..8bc7b0c
--- /dev/null
+++ b/src/test/resources/queries/submatch/wrapped.jsonld
@@ -0,0 +1,73 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "collection":null,
+ "query" : {
+ "@type" : "korap:reference",
+ "classRef" : [
+ 129
+ ],
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "distances" : [
+ {
+ "@type" : "cosmas:distance",
+ "boundary" : {
+ "@type" : "korap:boundary",
+ "max" : 0,
+ "min" : 0
+ },
+ "key" : "s",
+ "max" : 0,
+ "min" : 0
+ }
+ ],
+ "inOrder" : true,
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "class" : 129,
+ "classOut" : 129,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "der",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ },
+ {
+ "@type" : "korap:group",
+ "class" : 129,
+ "classOut" : 129,
+ "operands" : [
+ {
+ "@type" : "korap:reference",
+ "operands" : [
+ {
+ "@type" : "korap:span",
+ "key" : "s"
+ }
+ ],
+ "operation" : "operation:focus",
+ "spanRef" : [
+ 0,
+ 1
+ ]
+ }
+ ],
+ "operation" : "operation:class"
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+ ],
+ "operation" : "operation:focus"
+ }
+}