Fixed query deserialization for sequences with multiple non-anchors
diff --git a/CHANGES b/CHANGES
index b0d8311..f9690c3 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,10 @@
-0.48 2014-11-06
+0.48 2014-11-07
- [feature] Retrieval of token lists (diewald)
- [bugfix] Classes can now be highlighted to 127.
+ - [bugfix] Sequences with distances won't be flattened in
+ SpanSequenceQueryWrapper (diewald)
+ - [bugfix] Sequences with [problem][problem][anchor] can now
+ be deserialized (diewald)
0.47 2014-11-05
- [feature] Support new index format with more metadata (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index a649c35..7278877 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -1,5 +1,7 @@
package de.ids_mannheim.korap;
+// Todo: ADD WORD COUNT AS A METADATA FIELD!
+
// Java classes
import java.util.*;
import java.util.zip.GZIPInputStream;
diff --git a/src/main/java/de/ids_mannheim/korap/KorapQuery.java b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
index b49e2f6..a34c46f 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
@@ -97,7 +97,7 @@
this.max = defaultMax;
if (DEBUG)
- log.trace("Found korap:boundary with {}:{}");
+ log.trace("Found korap:boundary with {}:{}", min, max);
};
};
@@ -245,11 +245,10 @@
case "operation:sequence":
- if (operands.size() < 2)
- throw new QueryException(
- 612,
- "SpanSequenceQuery needs at least two operands"
- );
+ if (operands.size() == 1) {
+ this.addWarning("Sequences with less than two operands are ignored");
+ return this.fromJSON(operands.get(0));
+ };
SpanSequenceQueryWrapper sseqqw = this.seq();
@@ -292,6 +291,7 @@
// Support cosmas distances
else if (firstDistance.get("@type").asText().equals("cosmas:distance"))
distances = json.get("distances");
+
else
throw new QueryException(612, "No valid distances defined");
@@ -308,8 +308,6 @@
);
*/
-
-
int min = 0, max = 100;
if (constraint.has("boundary")) {
Boundary b = new Boundary(constraint.get("boundary"), 0,100);
@@ -341,6 +339,9 @@
if (max < min)
max = min;
+ if (DEBUG)
+ log.trace("Add distance constraint of '{}': {}-{}", unit, min, max);
+
sseqqw.withConstraint(min, max, unit);
};
};
@@ -350,7 +351,7 @@
sseqqw.append(this.fromJSON(operand));
};
- // inOrder was set without a distance constraint
+ // inOrder was set to false without a distance constraint
if (!sseqqw.isInOrder() && !sseqqw.hasConstraints()) {
sseqqw.withConstraint(1,1,"w");
};
@@ -358,9 +359,8 @@
return sseqqw;
case "operation:class":
- number = 0;
+ number = 1;
-
if (json.has("classOut")) {
number = json.get("classOut").asInt(0);
}
@@ -370,7 +370,12 @@
};
if (json.has("classRefCheck"))
- this.addWarning("classRefCheck is not yet supported. Results may not be correct");
+ this.addWarning("classRefCheck is not yet supported - " +
+ "results may not be correct");
+
+ if (json.has("classRefOp"))
+ this.addWarning("classRefOp is not yet supported - " +
+ "results may not be correct");
if (number > 0) {
if (operands.size() != 1)
@@ -380,9 +385,7 @@
);
if (DEBUG)
- log.trace("Found Class definition for {}", json.get("class").asInt(0));
-
- number = json.get("class").asInt(0);
+ log.trace("Found Class definition for {}", number);
if (number > MAX_CLASS_NUM) {
throw new QueryException(
@@ -446,6 +449,9 @@
return sqw.setMin(min).setMax(max);
return new SpanRepetitionQueryWrapper(sqw, min, max);
+
+ case "operation:relation":
+ throw new QueryException(613, "Relations are not yet supported");
};
throw new QueryException(613, "Unknown group operation");
@@ -650,7 +656,8 @@
return this.seg(value.toString());
if (json.has("attr"))
- throw new QueryException(613, "Attributes not yet supported in spans");
+ this.addWarning("Attributes are not yet supported - " +
+ "results may not be correct");
return this.tag(value.toString());
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index 935635a..b56a6d0 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
@@ -83,9 +83,16 @@
this.error = "No query defined";
};
- // Report warning coming from the request
+ // Legacy code: Report warning coming from the request
if (this.request.has("warning"))
this.addWarning(this.request.get("warning").asText());
+ if (this.request.has("warnings")) {
+ JsonNode warnings = this.request.get("warnings");
+ for (JsonNode node : warnings)
+ this.addWarning(node.asText());
+ };
+ // end of legacy code
+
// virtual collections
if (this.request.has("collections") ||
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
index fe671ec..4ed1223 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
@@ -137,7 +137,11 @@
// Embed a sequence
if (ssq instanceof SpanSequenceQueryWrapper) {
+ if (DEBUG)
+ log.trace("Add SpanSequenceQueryWrapper to sequence");
+
// There are no constraints - just next spans
+ // Flatten!
SpanSequenceQueryWrapper ssqw = (SpanSequenceQueryWrapper) ssq;
if (!this.hasConstraints() &&
!ssqw.hasConstraints() &&
@@ -145,6 +149,11 @@
for (int i = 0; i < ssqw.segments.size(); i++) {
this.append(ssqw.segments.get(i));
};
+ }
+
+ // No flattening
+ else {
+ this.segments.add(ssq);
};
}
@@ -187,6 +196,7 @@
if (ssq instanceof SpanSequenceQueryWrapper) {
// There are no constraints - just next spans
+ // Flatten!
SpanSequenceQueryWrapper ssqw = (SpanSequenceQueryWrapper) ssq;
if (!this.hasConstraints() &&
!ssqw.hasConstraints() &&
@@ -194,6 +204,11 @@
for (int i = ssqw.segments.size() - 1; i >= 0; i--) {
this.prepend(ssqw.segments.get(i));
};
+ }
+
+ // No flattening
+ else {
+ this.segments.add(0, ssq);
};
}
@@ -453,6 +468,8 @@
// [problem][anchor]
if (i < (size-1) && this.segments.get(i+1).maybeAnchor()) {
+ if (DEBUG)
+ log.trace("Situation is [problem][anchor]");
// Insert the solution
try {
@@ -471,10 +488,16 @@
if (DEBUG)
log.trace("Remove segment {} - now size {}", i, size);
+
+ // Restart checking
+ i = 0;
}
// [anchor][problem]
else if (i >= 1 && this.segments.get(i-1).maybeAnchor()) {
+ if (DEBUG)
+ log.trace("Situation is [anchor][problem]");
+
// Insert the solution
try {
this.segments.set(
@@ -492,13 +515,21 @@
if (DEBUG)
log.trace("Remove segment {} - now size {}", i, size);
+
+ // Restart checking
+ i = 0;
}
+ // [problem][problem]
else {
+ if (DEBUG)
+ log.trace("Situation is [problem][problem]");
noRemainingProblem = false;
i++;
};
}
else {
+ if (DEBUG)
+ log.trace("segment {} can be an anchor", i);
i++;
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java
index d46c9e2..766d6ee 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java
@@ -224,6 +224,7 @@
assertEquals("spanExpansion(spanExpansion(tokens:tt/p:NN, !tokens:tt/p:DET{1, 1}, right), !tokens:tt/p:ADJ{1, 1}, right)", sqwi.toQuery().toString());
};
+
@Test
public void queryJSONseqNegativeEndSequence2 () throws QueryException {
SpanQueryWrapper sqwi = jsonQueryFile("negative-last-sequence-2.jsonld");
@@ -233,6 +234,13 @@
assertEquals("spanExpansion(spanExpansion(tokens:tt/p:ADJ, !tokens:tt/p:DET{1, 1}, left), !tokens:tt/p:NN{1, 1}, left)", sqwi.toQuery().toString());
};
+ @Test
+ public void queryJSONseqMultipleDistances () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQueryFile("multiple-distances.jsonld");
+ // er []{,10} kann []{1,10} sagte
+
+ assertEquals("spanDistance(tokens:s:er, spanDistance(tokens:s:kann, tokens:s:sagte, [(w[2:11], ordered, notExcluded)]), [(w[1:11], ordered, notExcluded)])", sqwi.toQuery().toString());
+ };
// get query wrapper based on json file
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index 36632e6..8c407ad 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -747,6 +747,48 @@
assertEquals(345, res.at("/matches/0/tokens/2/1").asInt());
};
+ @Test
+ public void searchJSONmultitermRewriteBug () throws IOException {
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ ki.addDocFile(
+ 1,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
+ );
+ ki.addDocFile(
+ 2,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
+ );
+
+ ki.commit();
+
+ String json = getString(
+ getClass().getResource("/queries/bugs/multiterm_rewrite.jsonld").getFile()
+ );
+
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getQuery(),"");
+
+
+ /*
+
+ assertEquals(
+ kr.getQuery(),
+ "{4: spanNext({1: spanNext({2: tokens:s:ins}, {3: tokens:s:Leben})}, tokens:s:gerufen)}"
+ );
+ assertEquals(
+ kr.getMatch(0).getSnippetBrackets(),
+ "... sozialistischen Initiative\" eine neue politische Gruppierung " +
+ "[{4:{1:{2:ins} {3:Leben}} gerufen}] hatten. " +
+ "Pressemeldungen zufolge haben sich in ..."
+ );
+
+ assertEquals(2, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ */
+ };
+
+
@Test
public void searchJSONCollection () throws IOException {
diff --git a/src/test/resources/queries/bsp-class.jsonld b/src/test/resources/queries/bsp-class.jsonld
index 075f927..b4fd65e 100644
--- a/src/test/resources/queries/bsp-class.jsonld
+++ b/src/test/resources/queries/bsp-class.jsonld
@@ -3,7 +3,7 @@
"query":{
"@type":"korap:group",
"operation" : "operation:class",
- "class":0,
+ "class":1,
"operands":[
{
"@type":"korap:group",
diff --git a/src/test/resources/queries/bugs/cosmas_classrefcheck.jsonld b/src/test/resources/queries/bugs/cosmas_classrefcheck.jsonld
new file mode 100644
index 0000000..bd0c08b
--- /dev/null
+++ b/src/test/resources/queries/bugs/cosmas_classrefcheck.jsonld
@@ -0,0 +1,65 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "warnings":[
+ "This is a warning coming from the serialization"
+ ],
+ "query": {
+ "@type" : "korap:reference",
+ "classRef" : [
+ 130
+ ],
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "class" : 131,
+ "classIn" : [
+ 129,
+ 130
+ ],
+ "classOut" : 131,
+ "classRefCheck" : "classRefCheck:includes",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "frame" : "frame:contains",
+ "frames" : [],
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "class" : 129,
+ "classOut" : 129,
+ "operands" : [
+ {
+ "@type" : "korap:span",
+ "key" : "s"
+ }
+ ],
+ "operation" : "operation:class"
+ },
+ {
+ "@type" : "korap:group",
+ "class" : 130,
+ "classOut" : 130,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "wegen",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ }
+ ],
+ "operation" : "operation:position"
+ }
+ ],
+ "operation" : "operation:class"
+ }
+ ],
+ "operation" : "operation:focus"
+ }
+}
diff --git a/src/test/resources/queries/bugs/multiterm_rewrite.jsonld b/src/test/resources/queries/bugs/multiterm_rewrite.jsonld
new file mode 100644
index 0000000..c07f88c
--- /dev/null
+++ b/src/test/resources/queries/bugs/multiterm_rewrite.jsonld
@@ -0,0 +1,64 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "errors":[],
+ "warnings":[],
+ "announcements":[
+ "Deprecated 2014-07-24: 'min' and 'max' to be supported until 3 months from deprecation date."
+ ],
+ "collection":{},
+ "meta":{
+ "startPage":1,
+ "context":"paragraph"
+ },
+ "query":{
+ "@type":"korap:group",
+ "operation":"operation:sequence",
+ "operands":[
+ {
+ "@type":"korap:group",
+ "operation":"operation:repetition",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "wrap":{
+ "@type":"korap:term",
+ "foundry":"tt",
+ "layer":"p",
+ "type":"type:regex",
+ "key":"A.*",
+ "match":"match:eq"
+ }
+ }
+ ],
+ "boundary":{
+ "@type":"korap:boundary",
+ "min":0,
+ "max":3
+ },
+ "min":0,
+ "max":3
+ },
+ {
+ "@type":"korap:token",
+ "wrap":{
+ "@type":"korap:term",
+ "foundry":"tt",
+ "layer":"p",
+ "type":"type:regex",
+ "key":"N.*",
+ "match":"match:eq"
+ }
+ }
+ ]
+ },
+ "collections":[
+ {
+ "@type":"korap:meta-filter",
+ "@value":{
+ "@type":"korap:term",
+ "@field":"korap:field#corpusID",
+ "@value":"WPD"
+ }
+ }
+ ]
+}
diff --git a/src/test/resources/queries/sequence/multiple-distances.jsonld b/src/test/resources/queries/sequence/multiple-distances.jsonld
new file mode 100644
index 0000000..4187ded
--- /dev/null
+++ b/src/test/resources/queries/sequence/multiple-distances.jsonld
@@ -0,0 +1,70 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type" : "korap:group",
+ "distances" : [
+ {
+ "@type" : "korap:distance",
+ "boundary" : {
+ "@type" : "korap:boundary",
+ "max" : 11,
+ "min" : 1
+ },
+ "key" : "w",
+ "max" : 11,
+ "min" : 1
+ }
+ ],
+ "inOrder" : true,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "er",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "distances" : [
+ {
+ "@type" : "korap:distance",
+ "boundary" : {
+ "@type" : "korap:boundary",
+ "max" : 11,
+ "min" : 2
+ },
+ "key" : "w",
+ "max" : 11,
+ "min" : 2
+ }
+ ],
+ "inOrder" : true,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "kann",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "sagte",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}