More tests on deserialized sequences
diff --git a/src/main/java/de/ids_mannheim/korap/KorapQuery.java b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
index e15eeef..b8a3e52 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
@@ -382,8 +382,9 @@
SpanQueryWrapper sqw = this.fromJSON(operands.get(0));
- if (sqw.maybeExtension())
+ if (sqw.maybeExtension()) {
return sqw.setMin(min).setMax(max);
+ };
return new SpanRepetitionQueryWrapper(sqw, min, max);
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
index 3022adb..f9eb3ea 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
@@ -21,7 +21,8 @@
protected boolean isNull = true,
isOptional = false,
isNegative = false,
- isEmpty = false;
+ isEmpty = false,
+ isExtendedToTheRight;
// Serialize query to Lucene SpanQuery
public SpanQuery toQuery () throws QueryException {
@@ -39,6 +40,8 @@
// like in
// "the [pos=ADJ]{0} tree"
public boolean isNull () {
+ if (this.getMin() == 0 && this.getMax() == 0)
+ return true;
return this.isNull;
};
@@ -54,6 +57,15 @@
return this.isEmpty;
};
+ // The subquery may exceed the right text offset due to an empty extension
+ // [base=tree][]{3,4}
+ // This makes it necessary to check the last position of the span
+ // for match testing
+ public boolean isExtendedToTheRight () {
+ return this.isExtendedToTheRight;
+ };
+
+
// Check, if the query may be an anchor
// in a SpanSequenceQueryWrapper
public boolean maybeAnchor () {
@@ -69,6 +81,7 @@
return true;
};
+ // Oposite to maybeAnchor - means "it is complicated"
public boolean maybeExtension () {
return !this.maybeAnchor();
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
index 3b9763d..a6c5640 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
@@ -63,6 +63,7 @@
min = 1;
if (max == 0)
this.isNull = true;
+ System.err.println("++++++++++++++++++++++++++++++");
};
this.min = min;
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
index 297281e..1180aeb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
@@ -28,6 +28,10 @@
TODO:
Make isNegative work!
Make isEmpty work!
+ Make isExtendedToTheRight work!
+
+ Probably the problemsolving should be done on attribute check
+ not on toQuery().
*/
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
index 2f177bd..a05f837 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
@@ -336,6 +336,13 @@
};
@Test
+ public void queryJSONseqEmptyStartRepetition2 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-first-repetition-2.jsonld").getFile());
+ // []{0,0}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
+ };
+
+ @Test
public void queryJSONseqEmptyMiddle () throws QueryException {
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/empty-middle.jsonld").getFile());
// der[][tt/p=NN]
@@ -400,6 +407,78 @@
assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{3, 8}, left, class:1), []{2, 7}, right, class:2)");
};
+ @Test
+ public void queryJSONseqNegative () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative.jsonld").getFile());
+
+ // [tt/p!=NN]
+ assertTrue(sqwi.isNegative());
+ };
+
+ @Test
+ public void queryJSONseqNegativeStart () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-first.jsonld").getFile());
+
+ // [tt/p!=NN][tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, left)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeEnd () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-last.jsonld").getFile());
+
+ // [tt/p=NN][tt/p!=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeStartRepetition () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-first-repetition.jsonld").getFile());
+
+ // [tt/p!=NN]{4,5}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, left)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeStartRepetition2 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-first-repetition-2.jsonld").getFile());
+
+ // [tt/p!=NN]{0,5}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{0, 5}, left)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeStartRepetition3 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-first-repetition-3.jsonld").getFile());
+
+ // [tt/p!=NN]{0,0}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
+ };
+
+ @Test
+ public void queryJSONseqNegativeEndClass () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-last-class.jsonld").getFile());
+
+ // [tt/p=NN]{2:[tt/p!=NN]}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right, class:2)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeEndRepetitionClass () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-last-class-repetition.jsonld").getFile());
+
+ // [tt/p=NN]{2:[tt/p!=NN]{4,5}}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
+ };
+
+ @Test
+ public void queryJSONseqNegativeEndRepetitionClass2 () throws QueryException {
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/sequence/negative-last-class-repetition-2.jsonld").getFile());
+
+ // [tt/p=NN]{2:[tt/p!=NN]}{4,5}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
+ };
+
public static String getString (String path) {
StringBuilder contentBuilder = new StringBuilder();
try {
diff --git a/src/test/resources/queries/sequence/empty-first-repetition-2.jsonld b/src/test/resources/queries/sequence/empty-first-repetition-2.jsonld
new file mode 100644
index 0000000..c84cc4c
--- /dev/null
+++ b/src/test/resources/queries/sequence/empty-first-repetition-2.jsonld
@@ -0,0 +1,43 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "collections" : [
+ {
+ "@type" : "korap:meta-filter",
+ "@value" : {
+ "@field" : "korap:field#corpusID",
+ "@type" : "korap:term",
+ "@value" : "WPD"
+ }
+ }
+ ],
+ "meta" : {},
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:repetition",
+ "boundary": {
+ "@type" : "korap:boundary",
+ "min" : 0,
+ "max" : 0
+ },
+ "operands" : [
+ {
+ "@type" : "korap:token"
+ }]
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/sequence/negative-first-repetition-2.jsonld b/src/test/resources/queries/sequence/negative-first-repetition-2.jsonld
new file mode 100644
index 0000000..ded4793
--- /dev/null
+++ b/src/test/resources/queries/sequence/negative-first-repetition-2.jsonld
@@ -0,0 +1,40 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:repetition",
+ "boundary": {
+ "@type" : "korap:boundary",
+ "min" : 0,
+ "max" : 5
+ },
+ "operands" : [
+ {
+
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:ne"
+ }
+ }]
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/sequence/negative-first-repetition-3.jsonld b/src/test/resources/queries/sequence/negative-first-repetition-3.jsonld
new file mode 100644
index 0000000..7ab3c24
--- /dev/null
+++ b/src/test/resources/queries/sequence/negative-first-repetition-3.jsonld
@@ -0,0 +1,40 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:repetition",
+ "boundary": {
+ "@type" : "korap:boundary",
+ "min" : 0,
+ "max" : 0
+ },
+ "operands" : [
+ {
+
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:ne"
+ }
+ }]
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/sequence/negative-first-repetition.jsonld b/src/test/resources/queries/sequence/negative-first-repetition.jsonld
new file mode 100644
index 0000000..63df1bf
--- /dev/null
+++ b/src/test/resources/queries/sequence/negative-first-repetition.jsonld
@@ -0,0 +1,40 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:repetition",
+ "boundary": {
+ "@type" : "korap:boundary",
+ "min" : 4,
+ "max" : 5
+ },
+ "operands" : [
+ {
+
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:ne"
+ }
+ }]
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/sequence/negative-first.jsonld b/src/test/resources/queries/sequence/negative-first.jsonld
new file mode 100644
index 0000000..5f4a130
--- /dev/null
+++ b/src/test/resources/queries/sequence/negative-first.jsonld
@@ -0,0 +1,29 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:ne"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/sequence/negative-last-class-repetition-2.jsonld b/src/test/resources/queries/sequence/negative-last-class-repetition-2.jsonld
new file mode 100644
index 0000000..4d93449
--- /dev/null
+++ b/src/test/resources/queries/sequence/negative-last-class-repetition-2.jsonld
@@ -0,0 +1,47 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:repetition",
+ "boundary": {
+ "@type" : "korap:boundary",
+ "min" : 4,
+ "max" : 5
+ },
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "class" : 2,
+ "operation" : "operation:class",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:ne"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/sequence/negative-last-class-repetition.jsonld b/src/test/resources/queries/sequence/negative-last-class-repetition.jsonld
new file mode 100644
index 0000000..372e390
--- /dev/null
+++ b/src/test/resources/queries/sequence/negative-last-class-repetition.jsonld
@@ -0,0 +1,47 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "class" : 2,
+ "operation" : "operation:class",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:repetition",
+ "boundary": {
+ "@type" : "korap:boundary",
+ "min" : 4,
+ "max" : 5
+ },
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:ne"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/sequence/negative-last-class.jsonld b/src/test/resources/queries/sequence/negative-last-class.jsonld
new file mode 100644
index 0000000..41e0d45
--- /dev/null
+++ b/src/test/resources/queries/sequence/negative-last-class.jsonld
@@ -0,0 +1,36 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "class" : 2,
+ "operation" : "operation:class",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:ne"
+ }
+ }
+ ]
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/sequence/negative-last.jsonld b/src/test/resources/queries/sequence/negative-last.jsonld
new file mode 100644
index 0000000..7b29c8b
--- /dev/null
+++ b/src/test/resources/queries/sequence/negative-last.jsonld
@@ -0,0 +1,29 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:ne"
+ }
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/sequence/negative.jsonld b/src/test/resources/queries/sequence/negative.jsonld
new file mode 100644
index 0000000..c0f5c21
--- /dev/null
+++ b/src/test/resources/queries/sequence/negative.jsonld
@@ -0,0 +1,13 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "tt",
+ "key" : "NN",
+ "layer" : "p",
+ "match" : "match:ne"
+ }
+ }
+}