Wrapping of extension queries
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java
index 2a04554..809a325 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java
@@ -3,8 +3,11 @@
import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
import de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper;
import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSimpleQueryWrapper;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
+
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
@@ -15,7 +18,7 @@
public class SpanAlterQueryWrapper extends SpanQueryWrapper {
private String field;
private SpanQuery query;
- private List<SpanQuery> alternatives;
+ private List<SpanQueryWrapper> alternatives;
public SpanAlterQueryWrapper (String field) {
this.field = field;
@@ -25,6 +28,14 @@
public SpanAlterQueryWrapper (String field, SpanQuery query) {
this.field = field;
this.alternatives = new ArrayList<>();
+ this.alternatives.add(
+ new SpanSimpleQueryWrapper(query)
+ );
+ };
+
+ public SpanAlterQueryWrapper (String field, SpanQueryWrapper query) {
+ this.field = field;
+ this.alternatives = new ArrayList<>();
this.alternatives.add(query);
};
@@ -33,16 +44,26 @@
this.alternatives = new ArrayList<>();
for (String term : terms) {
this.isNull = false;
- this.alternatives.add(new SpanTermQuery(new Term(this.field, term)));
+ this.alternatives.add(
+ new SpanSimpleQueryWrapper(
+ new SpanTermQuery(
+ new Term(this.field, term)
+ )
+ )
+ );
};
};
public SpanAlterQueryWrapper or (String term) {
- return this.or(new SpanTermQuery(new Term(this.field, term)));
+ return this.or(
+ new SpanTermQuery(new Term(this.field, term))
+ );
};
public SpanAlterQueryWrapper or (SpanQuery query) {
- this.alternatives.add(query);
+ this.alternatives.add(
+ new SpanSimpleQueryWrapper(query)
+ );
this.isNull = false;
return this;
};
@@ -59,35 +80,35 @@
if (term.isOptional())
this.isOptional = true;
- this.alternatives.add( term.toQuery() );
+ this.alternatives.add( term );
this.isNull = false;
return this;
};
public SpanAlterQueryWrapper or (SpanRegexQueryWrapper term) {
- this.alternatives.add( term.toQuery() );
+ this.alternatives.add( term );
this.isNull = false;
return this;
};
public SpanAlterQueryWrapper or (SpanWildcardQueryWrapper wc) {
- this.alternatives.add( wc.toQuery() );
+ this.alternatives.add( wc );
this.isNull = false;
return this;
};
- public SpanQuery toQuery() {
+ public SpanQuery toQuery() throws QueryException {
if (this.isNull || this.alternatives.size() == 0)
return (SpanQuery) null;
if (this.alternatives.size() == 1) {
- return (SpanQuery) this.alternatives.get(0);
+ return (SpanQuery) this.alternatives.get(0).toQuery();
};
- Iterator<SpanQuery> clause = this.alternatives.iterator();
- SpanOrQuery soquery = new SpanOrQuery( clause.next() );
+ Iterator<SpanQueryWrapper> clause = this.alternatives.iterator();
+ SpanOrQuery soquery = new SpanOrQuery( clause.next().toQuery() );
while (clause.hasNext()) {
- soquery.addClause( clause.next() );
+ soquery.addClause( clause.next().toQuery() );
};
return (SpanQuery) soquery;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java
index 865c61e..c455396 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java
@@ -4,6 +4,7 @@
import de.ids_mannheim.korap.query.SpanClassQuery;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
import java.util.*;
@@ -32,7 +33,7 @@
this.number = (byte) 0;
};
- public SpanQuery toQuery () {
+ public SpanQuery toQuery () throws QueryException {
if (this.subquery.isNull())
return (SpanQuery) null;
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanElementQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanElementQueryWrapper.java
index 92a7090..68b844c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanElementQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanElementQueryWrapper.java
@@ -4,6 +4,7 @@
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
public class SpanElementQueryWrapper extends SpanQueryWrapper {
String element;
@@ -14,7 +15,7 @@
this.element = element;
};
- public SpanQuery toQuery () {
+ public SpanQuery toQuery () throws QueryException {
return (SpanQuery) new SpanElementQuery(this.field, this.element);
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanEmptyTokenWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanEmptyTokenWrapper.java
deleted file mode 100644
index a4691f0..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanEmptyTokenWrapper.java
+++ /dev/null
@@ -1,12 +0,0 @@
-package de.ids_mannheim.korap.query.wrap;
-
-import org.apache.lucene.search.spans.SpanQuery;
-import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
-
-import java.util.*;
-
-/**
- * Implement an empty token
- */
-public class SpanEmptyTokenWrapper extends SpanQueryWrapper {
-};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanMatchModifyQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanMatchModifyQueryWrapper.java
index 8e9a812..7faadf8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanMatchModifyQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanMatchModifyQueryWrapper.java
@@ -2,6 +2,8 @@
import org.apache.lucene.search.spans.SpanQuery;
+import de.ids_mannheim.korap.util.QueryException;
+
import de.ids_mannheim.korap.query.SpanMatchModifyClassQuery;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
@@ -32,7 +34,7 @@
this.number = (byte) 0;
};
- public SpanQuery toQuery () {
+ public SpanQuery toQuery () throws QueryException {
if (this.subquery.isNull())
return (SpanQuery) null;
return new SpanMatchModifyClassQuery(this.subquery.toQuery(), this.number);
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQuantifierQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQuantifierQueryWrapper.java
deleted file mode 100644
index 1926310..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQuantifierQueryWrapper.java
+++ /dev/null
@@ -1,69 +0,0 @@
-package de.ids_mannheim.korap.query.wrap;
-
-import java.util.*;
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.Query;
-
-import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
-import org.apache.lucene.search.spans.SpanQuery;
-
-// This might be irrelevant now with repetition!
-
-public class SpanQuantifierQueryWrapper extends SpanQueryWrapper {
- private String field;
-
- public SpanQuantifierQueryWrapper (String field) {
- this.field = field;
- };
-
- public SpanQuery toQuery () {
- return (SpanQuery) null;
- };
-
- public boolean isNull () {
- return false;
- };
-
- /*
-
-Only support spans with minimal one occurrence and then
-flag spans with NOT_NECESSARY.
-This unfortunately means to support this in at least spanNextQuery
-Problem: Queries without context:
-
-[]*[s:tree]? -> matches everything!
-
-The any segment is special, it shuld be supported by a special
-spanNextQuery, where it adds a position (or more) to the matching span.
-spanNext(Query1, ANY)
-
- API idea:
- opt();
- star();
- plus();
- occ(2);
- occ(2, this.UNLIMITED);
- occ(0, 4);
- occ(5, 8);
-
- Implementation idea:
- This query should work similar to NextSpans with looking at all matching spans
- in order per document, returning matching positions for all sequences in the boundary.
- All actions should be translated to {x,y} boundaries.
- ? -> {0,1}
- + -> {1,UNL}
- * -> {0,UNL}
- (2) -> {2,2}
- (,3) -> {0,3}
- (3,) -> {3,UNL}
- (3,4) -> {3,4}
-
- oldSpanEnd = X;
- For (i = 0; i < orderedSpans.length; i) {
- # ...
- };
-
- */
-};
-
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
index 30fe7b9..3022adb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.query.wrap;
import org.apache.lucene.search.spans.SpanQuery;
+import de.ids_mannheim.korap.util.QueryException;
// TODO: Add warning and error
@@ -11,14 +12,19 @@
* @author Nils Diewald
*/
public class SpanQueryWrapper {
- protected boolean isNull = true,
- isOptional = false,
- isNegative = false;
protected int min = 1,
max = 1;
+ protected byte number = (byte) 0;
+ protected boolean hasClass = false;
+
+ protected boolean isNull = true,
+ isOptional = false,
+ isNegative = false,
+ isEmpty = false;
+
// Serialize query to Lucene SpanQuery
- public SpanQuery toQuery () {
+ public SpanQuery toQuery () throws QueryException {
return (SpanQuery) null;
};
@@ -42,15 +48,95 @@
return this.isNegative;
};
+ // The subquery should match everything, like in
+ // "the []"
+ public boolean isEmpty () {
+ return this.isEmpty;
+ };
+
+ // Check, if the query may be an anchor
+ // in a SpanSequenceQueryWrapper
+ public boolean maybeAnchor () {
+ if (this.isNegative())
+ return false;
+
+ if (this.isOptional())
+ return false;
+
+ if (this.isEmpty())
+ return false;
+
+ return true;
+ };
+
+ public boolean maybeExtension () {
+ return !this.maybeAnchor();
+ };
+
// Repetition queries may be more specific regarding repetition
- // This is a minimum repetition value
- public int min () {
+ // Get minimum repetition value
+ public int getMin () {
return this.min;
};
// Repetition queries may be more specific regarding repetition
- // This is a maximum repetition value
- public int max () {
+ // Get maximum repetition value
+ public int getMax () {
return this.max;
};
+
+ // Set minimum repetition value
+ public SpanQueryWrapper setMin (int min) {
+ this.min = min;
+ return this;
+ };
+
+ // Set maximum repetition value
+ public SpanQueryWrapper setMax (int max) {
+ this.max = max;
+ return this;
+ };
+
+
+ // Empty tokens may have class information
+ public boolean hasClass () {
+ return this.hasClass;
+ };
+
+ public SpanQueryWrapper hasClass (boolean value) {
+ this.hasClass = value;
+ return this;
+ };
+
+ // Get class number
+ public byte getClassNumber () {
+ return this.number;
+ };
+
+ // Set class number
+ public SpanQueryWrapper setClassNumber (byte number) {
+ this.hasClass = true;
+ this.number = number;
+ return this;
+ };
+
+ // Set class number
+ public SpanQueryWrapper setClassNumber (short number) {
+ return this.setClassNumber((byte) number);
+ };
+
+ // Set class number
+ public SpanQueryWrapper setClassNumber (int number) {
+ return this.setClassNumber((byte) number);
+ };
+
+ public String toString () {
+ String string = "" +
+ (this.isNull() ? "isNull" : "notNull") +
+ "-" +
+ (this.isEmpty() ? "isEmpty" : "notEmpty") +
+ "-" +
+ (this.isOptional() ? "isOptional" : "notOptional");
+ return string;
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
index b46d929..3b9763d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
@@ -4,14 +4,30 @@
import de.ids_mannheim.korap.query.SpanRepetitionQuery;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+// DEAL WITH NEGATIVITY
public class SpanRepetitionQueryWrapper extends SpanQueryWrapper {
private SpanQueryWrapper subquery;
+ // Logger
+ private final static Logger log = LoggerFactory.getLogger(SpanSequenceQueryWrapper.class);
+
+ public SpanRepetitionQueryWrapper () {
+ this.isEmpty = true;
+ this.isNull = false;
+ };
+
// This is for exact enumbered repetition, like in a{3}
public SpanRepetitionQueryWrapper (SpanQueryWrapper subquery, int exact) {
- this.subquery = subquery;
+ if (!subquery.isEmpty())
+ this.subquery = subquery;
+ else
+ this.isEmpty = true;
if (exact < 1 || this.subquery.isNull()) {
this.isNull = true;
@@ -28,7 +44,10 @@
// This is for a range of repetitions, like in a{2,3}, a{,4}, a{3,}, a+, a*, a?
public SpanRepetitionQueryWrapper (SpanQueryWrapper subquery, int min, int max) {
- this.subquery = subquery;
+ if (!subquery.isEmpty())
+ this.subquery = subquery;
+ else
+ this.isEmpty = true;
// Subquery may be an empty token
if (this.subquery.isNull()) {
@@ -52,12 +71,17 @@
// Serialize to Lucene SpanQuery
- public SpanQuery toQuery () {
+ public SpanQuery toQuery () throws QueryException {
// The query is null
if (this.isNull)
return (SpanQuery) null;
+ if (this.isEmpty) {
+ log.error("You can't queryize an empty query");
+ return (SpanQuery) null;
+ };
+
// The query is not a repetition query at all, but may be optional
if (this.min == 1 && this.max == 1)
return this.subquery.toQuery();
@@ -72,6 +96,8 @@
};
public boolean isNegative () {
+ if (this.subquery == null)
+ return false;
return this.subquery.isNegative();
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
index 21032a9..a434abb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
@@ -13,9 +13,11 @@
import de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper;
import de.ids_mannheim.korap.query.SpanSegmentQuery;
+import de.ids_mannheim.korap.util.QueryException;
+
+
/**
* @author Nils Diewald
- * @version 0.01
*
* Creates a query object for segments, i.e. terms in a term vector
* sharing the same position. A SpanSegment can include simple string terms,
@@ -23,8 +25,8 @@
*/
public class SpanSegmentQueryWrapper extends SpanQueryWrapper {
- public ArrayList<SpanQuery> inclusive;
- public ArrayList<SpanQuery> exclusive;
+ public ArrayList<SpanQueryWrapper> inclusive;
+ public ArrayList<SpanQueryWrapper> exclusive;
private String field;
/**
@@ -34,8 +36,8 @@
*/
public SpanSegmentQueryWrapper (String field) {
this.field = field;
- this.inclusive = new ArrayList<SpanQuery>();
- this.exclusive = new ArrayList<SpanQuery>();
+ this.inclusive = new ArrayList<SpanQueryWrapper>();
+ this.exclusive = new ArrayList<SpanQueryWrapper>();
};
/**
@@ -47,14 +49,14 @@
public SpanSegmentQueryWrapper (String field, String ... terms) {
this(field);
for (int i = 0; i < terms.length; i++) {
- this.inclusive.add((SpanQuery) new SpanTermQuery(new Term(field, terms[i])));
+ this.inclusive.add(new SpanSimpleQueryWrapper(field, terms[i]));
this.isNull = false;
};
};
public SpanSegmentQueryWrapper (String field, SpanRegexQueryWrapper re) {
this(field);
- this.inclusive.add((SpanQuery) re.toQuery());
+ this.inclusive.add(re);
this.isNull = false;
};
@@ -63,7 +65,7 @@
if (!alter.isNull()) {
if (alter.isNegative())
this.isNegative = true;
- this.inclusive.add((SpanQuery) alter.toQuery());
+ this.inclusive.add(alter);
this.isNull = false;
};
};
@@ -72,33 +74,35 @@
this(field);
if (!ssq.isNull()) {
- Iterator<SpanQuery> clause = ssq.inclusive.iterator();
+ Iterator<SpanQueryWrapper> clause = ssq.inclusive.iterator();
while (clause.hasNext()) {
- this.inclusive.add( (SpanQuery) clause.next().clone() );
+ this.inclusive.add( (SpanQueryWrapper) clause.next() );
+ // .clone()
};
clause = ssq.exclusive.iterator();
while (clause.hasNext()) {
- this.exclusive.add( (SpanQuery) clause.next().clone() );
+ this.exclusive.add( (SpanQueryWrapper) clause.next() );
+ // .clone()
};
this.isNull = false;
};
};
public SpanSegmentQueryWrapper with (String term) {
- this.inclusive.add(new SpanTermQuery(new Term(field, term)));
+ this.inclusive.add(new SpanSimpleQueryWrapper(field, term));
this.isNull = false;
return this;
};
- public SpanSegmentQueryWrapper with (SpanRegexQueryWrapper re) {
- this.inclusive.add((SpanQuery) re.toQuery());
+ public SpanSegmentQueryWrapper with (SpanQueryWrapper re) {
+ this.inclusive.add((SpanQueryWrapper) re);
this.isNull = false;
return this;
};
public SpanSegmentQueryWrapper with (SpanWildcardQueryWrapper wc) {
- this.inclusive.add((SpanQuery) wc.toQuery());
+ this.inclusive.add((SpanQueryWrapper) wc);
this.isNull = false;
return this;
};
@@ -107,7 +111,7 @@
if (!alter.isNull()) {
if (alter.isNegative())
this.isNegative = true;
- this.inclusive.add((SpanQuery) alter.toQuery());
+ this.inclusive.add(alter);
this.isNull = false;
};
return this;
@@ -116,10 +120,10 @@
// Identical to without
public SpanSegmentQueryWrapper with (SpanSegmentQueryWrapper seg) {
if (!seg.isNull()) {
- for (SpanQuery sq : seg.inclusive) {
+ for (SpanQueryWrapper sq : seg.inclusive) {
this.inclusive.add(sq);
};
- for (SpanQuery sq : seg.exclusive) {
+ for (SpanQueryWrapper sq : seg.exclusive) {
this.exclusive.add(sq);
};
this.isNull = false;
@@ -128,19 +132,21 @@
};
public SpanSegmentQueryWrapper without (String term) {
- this.exclusive.add(new SpanTermQuery(new Term(field, term)));
+ this.exclusive.add(new SpanSimpleQueryWrapper(field, term));
this.isNull = false;
return this;
};
+ // TODO: THESE MAYBE NOT NECESSARY:
+
public SpanSegmentQueryWrapper without (SpanRegexQueryWrapper re) {
- this.exclusive.add((SpanQuery) re.toQuery());
+ this.exclusive.add(re);
this.isNull = false;
return this;
};
public SpanSegmentQueryWrapper without (SpanWildcardQueryWrapper wc) {
- this.exclusive.add((SpanQuery) wc.toQuery());
+ this.exclusive.add(wc);
this.isNull = false;
return this;
};
@@ -148,10 +154,10 @@
public SpanSegmentQueryWrapper without (SpanAlterQueryWrapper alter) {
if (!alter.isNull()) {
if (alter.isNegative()) {
- this.inclusive.add((SpanQuery) alter.toQuery());
+ this.inclusive.add(alter);
}
else {
- this.exclusive.add((SpanQuery) alter.toQuery());
+ this.exclusive.add(alter);
};
this.isNull = false;
};
@@ -166,7 +172,7 @@
return this;
};
- public SpanQuery toQuery () {
+ public SpanQuery toQuery () throws QueryException {
if (this.isNull || (this.inclusive.size() + this.exclusive.size() == 0)) {
return (SpanQuery) null;
}
@@ -188,25 +194,25 @@
return (SpanQuery) null;
};
- private SpanQuery _listToQuery (ArrayList<SpanQuery> list) {
- SpanQuery query = list.get(0);
+ private SpanQuery _listToQuery (ArrayList<SpanQueryWrapper> list) throws QueryException {
+ SpanQuery query = list.get(0).toQuery();
for (int i = 1; i < list.size(); i++) {
- query = new SpanSegmentQuery(query, list.get(i));
+ query = new SpanSegmentQuery(query, list.get(i).toQuery());
};
return (SpanQuery) query;
};
- private SpanQuery _listToOrQuery (ArrayList<SpanQuery> list) {
+ private SpanQuery _listToOrQuery (ArrayList<SpanQueryWrapper> list) throws QueryException {
if (list.size() == 1) {
- return (SpanQuery) list.get(0);
+ return (SpanQuery) list.get(0).toQuery();
};
- Iterator<SpanQuery> clause = list.iterator();
- SpanOrQuery soquery = new SpanOrQuery( clause.next() );
+ Iterator<SpanQueryWrapper> clause = list.iterator();
+ SpanOrQuery soquery = new SpanOrQuery( clause.next().toQuery() );
while (clause.hasNext()) {
- soquery.addClause( clause.next() );
+ soquery.addClause( clause.next().toQuery() );
};
return (SpanQuery) soquery;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
index 2afb15b..297281e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
@@ -9,11 +9,14 @@
import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSimpleQueryWrapper;
import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
import de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper;
import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
import de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
+
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
@@ -22,7 +25,9 @@
import org.slf4j.LoggerFactory;
/*
-TODO: Make isNegative work!
+ TODO:
+ Make isNegative work!
+ Make isEmpty work!
*/
@@ -30,311 +35,162 @@
* Deserialize complexe sequence queries to Lucene SpanQueries.
*
* @author Nils Diewald
+ * @version 0.02
*/
public class SpanSequenceQueryWrapper extends SpanQueryWrapper {
private String field;
- private ArrayList<SpanQuery> segments;
+ private ArrayList<SpanQueryWrapper> segments;
private ArrayList<DistanceConstraint> constraints;
// Logger
private final static Logger log = LoggerFactory.getLogger(SpanSequenceQueryWrapper.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = true;
-
- private boolean
- isInOrder = true,
- isOptional = true,
- lastIsOptional = false,
- firstIsOptional = false;
+ public static final boolean DEBUG = false;
+ private boolean isInOrder = true;
+
+ /**
+ * Empty constructor.
+ */
public SpanSequenceQueryWrapper (String field) {
this.field = field;
- this.segments = new ArrayList<SpanQuery>(2);
+ this.segments = new ArrayList<SpanQueryWrapper>(2);
};
+
+ /**
+ * Constructor accepting term sequences.
+ */
public SpanSequenceQueryWrapper (String field, String ... terms) {
this(field);
for (int i = 0; i < terms.length; i++) {
- this.segments.add((SpanQuery) new SpanTermQuery(new Term(field, terms[i])));
- this.isOptional = false;
+ this.segments.add(
+ new SpanSimpleQueryWrapper(
+ new SpanTermQuery(new Term(field, terms[i]))
+ )
+ );
};
this.isNull = false;
};
+
+ /**
+ * Constructor accepting SpanQuery sequences.
+ */
public SpanSequenceQueryWrapper (String field, SpanQuery sq) {
this(field);
- if (DEBUG)
- log.trace("New spanquery sequence " + sq.toString());
- this.segments.add((SpanQuery) sq);
- this.isOptional = false;
+ this.segments.add(new SpanSimpleQueryWrapper(sq));
this.isNull = false;
};
+
+ /**
+ * Constructor accepting SpanQueryWrapper sequences.
+ * These wrappers may be optional, negative or empty.
+ */
public SpanSequenceQueryWrapper (String field, SpanQueryWrapper sswq) {
this(field);
- if (!sswq.isNull()) {
+ // Ignore null queries
+ if (sswq.isNull())
+ return;
- if (sswq.isNegative())
- this.isNegative = true;
-
- this.segments.add((SpanQuery) sswq.toQuery());
- this.isNull = false;
- if (sswq.isOptional()) {
- if (DEBUG)
- log.trace("New optional query sequence " +
- sswq.toQuery().toString());
- this.isOptional = true;
- this.lastIsOptional = true;
- this.firstIsOptional = true;
+ if (DEBUG && !sswq.isEmpty) {
+ try {
+ log.trace("New span sequence {}", sswq.toQuery().toString());
}
- else {
- this.isOptional = false;
- if (DEBUG)
- log.trace("New non-optional query sequence " +
- sswq.toQuery().toString());
+ catch (QueryException qe) {
+ log.trace("Unable to serialize query {}", qe.getMessage());
};
};
+
+ this.segments.add(sswq);
+ this.isNull = false;
};
- public SpanSequenceQueryWrapper (String field, SpanRegexQueryWrapper re) {
- this(field);
- if (!re.isNull()) {
- this.segments.add((SpanQuery) re.toQuery());
- this.isNull = false;
- this.isOptional = false;
- };
- };
- public SpanSequenceQueryWrapper (String field, SpanWildcardQueryWrapper wc) {
- this(field);
- if (!wc.isNull()) {
- this.segments.add((SpanQuery) wc.toQuery());
- this.isOptional = false;
- this.isNull = false;
- };
- };
-
- public SpanQuery get (int index) {
- return this.segments.get(index);
- };
-
- public void set (int index, SpanQuery sq) {
- this.segments.set(index, sq);
- };
-
+ /**
+ * Append a term to the sequence.
+ */
public SpanSequenceQueryWrapper append (String term) {
return this.append(
- (SpanQuery) new SpanTermQuery(new Term(field, term))
+ new SpanSimpleQueryWrapper(
+ new SpanTermQuery(new Term(field, term))
+ )
);
};
-
+
+
+ /**
+ * Append a SpanQuery to the sequence.
+ */
public SpanSequenceQueryWrapper append (SpanQuery query) {
- this.isNull = false;
- this.isOptional = false;
-
- // Check if there has to be alternation magic in action
- if (this.lastIsOptional) {
- if (DEBUG)
- log.trace("Append non-opt query to opt query " +
- query.toString());
-
- SpanAlterQueryWrapper saqw = new SpanAlterQueryWrapper(field, query);
- SpanSequenceQueryWrapper ssqw = new SpanSequenceQueryWrapper(field, query);
-
- // Remove last element of the list and prepend it
- ssqw.prepend(this.segments.remove(this.segments.size() - 1));
- saqw.or(ssqw);
-
- // Update boundary optionality
- if (this.firstIsOptional && this.segments.size() == 0)
- this.firstIsOptional = false;
-
- this.lastIsOptional = false;
-
- this.segments.add((SpanQuery) saqw.toQuery());
- }
- else {
- if (DEBUG)
- log.trace("Append non-opt query to non-opt query " +
- query.toString());
-
- this.segments.add(query);
- };
-
- return this;
+ return this.append(new SpanSimpleQueryWrapper(query));
};
+
+ /**
+ * Append a SpanQueryWrapper to the sequence.
+ */
public SpanSequenceQueryWrapper append (SpanQueryWrapper ssq) {
+ if (ssq.isNull())
+ return this;
- if (DEBUG)
- log.trace("Try to append query {}", ssq.toString());
+ this.isNull = false;
+ this.segments.add(ssq);
- if (!ssq.isNull()) {
-
- if (DEBUG)
- log.trace("THe query {} is not null", ssq.toString());
-
- if (ssq.isNegative())
- this.isNegative = true;
-
- SpanQuery appendQuery = ssq.toQuery();
- if (!ssq.isOptional()) {
- if (DEBUG)
- log.trace("Append non-opt query to non-opt query {}",
- appendQuery.toString());
- return this.append(appendQuery);
- };
-
- // Situation is ab? or a?b?
- if (this.segments.size() != 0) {
-
- // Remove last element of the list and prepend it
- SpanQuery lastQuery = this.segments.remove(this.segments.size() - 1);
- SpanAlterQueryWrapper saqw = new SpanAlterQueryWrapper(field, lastQuery);
- SpanSequenceQueryWrapper ssqw = new SpanSequenceQueryWrapper(field, appendQuery);
- ssqw.prepend(lastQuery);
-
- // Situation is a?b?
- if (this.lastIsOptional) {
- saqw.or(appendQuery);
- // last stays optional
- if (DEBUG)
- log.trace("Append opt query to opt query {}",
- appendQuery.toString());
-
- }
- else if (DEBUG) {
- log.trace("Append opt query to non-opt query {}",
- appendQuery.toString());
- };
-
- saqw.or(ssqw);
- this.segments.add((SpanQuery) saqw.toQuery());
- }
-
- // Situation is b?
- else {
- this.segments.add(appendQuery);
-
- if (DEBUG)
- log.trace("Append opt query {}",
- appendQuery.toString());
-
- // Update boundary optionality
- this.firstIsOptional = true;
- this.isOptional = true;
- this.lastIsOptional = true;
- };
- this.isNull = false;
- };
return this;
};
- public SpanSequenceQueryWrapper append (SpanRegexQueryWrapper srqw) {
- if (!srqw.isNull()) {
- return this.append((SpanQuery) srqw.toQuery());
- };
- return this;
- };
-
- public SpanSequenceQueryWrapper append (SpanWildcardQueryWrapper swqw) {
- if (!swqw.isNull()) {
- return this.append((SpanQuery) swqw.toQuery());
- };
- return this;
- };
-
+ /**
+ * Prepend a term to the sequence.
+ */
public SpanSequenceQueryWrapper prepend (String term) {
- return this.prepend(new SpanTermQuery(new Term(field, term)));
+ return this.prepend(
+ new SpanSimpleQueryWrapper(
+ new SpanTermQuery(new Term(field, term))
+ )
+ );
};
+
+ /**
+ * Prepend a SpanQuery to the sequence.
+ */
public SpanSequenceQueryWrapper prepend (SpanQuery query) {
- this.isNull = false;
- this.isOptional = false;
-
- // Check if there has to be alternation magic in action
- if (this.firstIsOptional) {
- SpanAlterQueryWrapper saqw = new SpanAlterQueryWrapper(field, query);
- SpanSequenceQueryWrapper ssqw = new SpanSequenceQueryWrapper(field, query);
- // Remove last element of the list and prepend it
- ssqw.append(this.segments.remove(0));
- saqw.or(ssqw);
-
- // Update boundary optionality
- if (this.lastIsOptional && this.segments.size() == 0)
- this.lastIsOptional = false;
- this.firstIsOptional = false;
-
- this.segments.add(0, (SpanQuery) saqw.toQuery());
- }
- else {
- this.segments.add(0,query);
- };
- return this;
+ return this.prepend(
+ new SpanSimpleQueryWrapper(query)
+ );
};
+ /**
+ * Prepend a SpanQueryWrapper to the sequence.
+ */
public SpanSequenceQueryWrapper prepend (SpanQueryWrapper ssq) {
- if (!ssq.isNull()) {
+ if (ssq.isNull())
+ return this;
- if (ssq.isNegative())
- this.isNegative = true;
-
- SpanQuery prependQuery = ssq.toQuery();
- if (!ssq.isOptional()) {
- return this.prepend(prependQuery);
- };
-
- // Situation is b?a or b?a?
- if (this.segments.size() != 0) {
- // Remove first element of the list and append it
- SpanQuery firstQuery = this.segments.remove(0);
- SpanAlterQueryWrapper saqw = new SpanAlterQueryWrapper(field, firstQuery);
- SpanSequenceQueryWrapper ssqw = new SpanSequenceQueryWrapper(field, prependQuery);
- ssqw.append(firstQuery);
+ this.isNull = false;
+ this.segments.add(0, ssq);
- // Situation is b?a?
- if (this.firstIsOptional)
- saqw.or(prependQuery);
- // first stays optional
-
- saqw.or(ssqw);
- this.segments.add(0, (SpanQuery) saqw.toQuery());
- }
-
- // Situation is b?
- else {
- this.segments.add(prependQuery);
-
- // Update boundary optionality
- this.firstIsOptional = true;
- this.isOptional = true;
- this.lastIsOptional = true;
- };
- this.isNull = false;
- };
return this;
};
- public SpanSequenceQueryWrapper prepend (SpanRegexQueryWrapper re) {
- if (!re.isNull()) {
- return this.prepend(re.toQuery());
- };
- return this;
- };
- public SpanSequenceQueryWrapper prepend (SpanWildcardQueryWrapper swqw) {
- if (!swqw.isNull()) {
- return this.prepend(swqw.toQuery());
- };
- return this;
- };
-
+ /**
+ * Add a sequence constraint to the sequence for tokens,
+ * aka distance constraints.
+ */
public SpanSequenceQueryWrapper withConstraint (int min, int max) {
return this.withConstraint(min, max, false);
};
+
+ /**
+ * Add a sequence constraint to the sequence for tokens,
+ * aka distance constraints (with exclusion).
+ */
public SpanSequenceQueryWrapper withConstraint (int min, int max, boolean exclusion) {
if (this.constraints == null)
this.constraints = new ArrayList<DistanceConstraint>(1);
@@ -342,10 +198,20 @@
return this;
};
+
+ /**
+ * Add a sequence constraint to the sequence for various units,
+ * aka distance constraints.
+ */
public SpanSequenceQueryWrapper withConstraint (int min, int max, String unit) {
return this.withConstraint(min, max, unit, false);
};
+
+ /**
+ * Add a sequence constraint to the sequence for various units,
+ * aka distance constraints (with exclusion).
+ */
public SpanSequenceQueryWrapper withConstraint (int min,
int max,
String unit,
@@ -362,13 +228,63 @@
return this;
};
-
- public SpanQuery toQuery () {
- if (this.segments.size() == 0 || this.isNull) {
+ /**
+ * Respect the order of distances.
+ */
+ public void setInOrder (boolean isInOrder) {
+ this.isInOrder = isInOrder;
+ };
+
+ /**
+ * Check if the order is relevant.
+ */
+ public boolean isInOrder () {
+ return this.isInOrder;
+ };
+
+ /**
+ * Check if there are constraints defined for the sequence.
+ */
+ public boolean hasConstraints () {
+ if (this.constraints == null)
+ return false;
+ if (this.constraints.size() <= 0)
+ return false;
+ return true;
+ };
+
+
+ /**
+ * Serialize Query to Lucene SpanQueries
+ */
+ public SpanQuery toQuery () throws QueryException {
+
+ int size = this.segments.size();
+
+ // Nothing to do
+ if (size == 0 || this.isNull)
return (SpanQuery) null;
+
+ if (size == 1) {
+ if (this.segments.get(0).maybeAnchor())
+ return (SpanQuery) this.segments.get(0).toQuery();
+
+ if (this.segments.get(0).isEmpty())
+ throw new QueryException("Sequence is not allowed to be empty");
+ if (this.segments.get(0).isOptional())
+ throw new QueryException("Sequence is not allowed to be optional");
+ if (this.segments.get(0).isNegative())
+ throw new QueryException("Sequence is not allowed to be negative");
};
- SpanQuery query = this.segments.get(0);
+ if (!_solveProblematicSequence(size)) {
+ if (this.segments.get(0).isNegative())
+ throw new QueryException("Sequence contains unresolvable "+
+ "empty, optional, or negative segments");
+ };
+
+ // Create the initial query
+ SpanQuery query = this.segments.get(0).toQuery();
// NextQueries:
if (this.constraints == null || this.constraints.size() == 0 ||
@@ -378,7 +294,7 @@
for (int i = 1; i < this.segments.size(); i++) {
query = new SpanNextQuery(
query,
- this.segments.get(i) // Todo: Maybe payloads are not necessary
+ this.segments.get(i).toQuery()
);
};
return (SpanQuery) query;
@@ -393,9 +309,9 @@
for (int i = 1; i < this.segments.size(); i++) {
SpanDistanceQuery sdquery = new SpanDistanceQuery(
query,
- this.segments.get(i),
- constraint,
- true
+ this.segments.get(i).toQuery(),
+ constraint,
+ true
);
query = (SpanQuery) sdquery;
};
@@ -405,8 +321,8 @@
else {
for (int i = 1; i < this.segments.size(); i++) {
SpanDistanceQuery sdquery = new SpanDistanceQuery(
- query,
- this.segments.get(i),
+ query,
+ this.segments.get(i).toQuery(),
constraint,
true
);
@@ -421,29 +337,206 @@
for (int i = 1; i < this.segments.size(); i++) {
query = new SpanMultipleDistanceQuery(
query,
- this.segments.get(i),
+ this.segments.get(i).toQuery(),
this.constraints,
isInOrder,
true
);
};
-
return (SpanQuery) query;
};
- public void setInOrder (boolean isInOrder) {
- this.isInOrder = isInOrder;
+ /*
+ While there is a segment isNegative() or isOptional() or isEmpty() do
+ - look for an anchor next to it
+ - merge the problematic segment with the anchor
+ - go on
+ */
+ private boolean _solveProblematicSequence (int size) throws QueryException {
+
+ // Check if there is a problematic segment
+ SpanQueryWrapper underScrutiny;
+ boolean noRemainingProblem = true;
+ int i = 0;
+
+ if (DEBUG)
+ log.trace("Try to solve a query of {} segments", size);
+
+ for (; i < size;) {
+ underScrutiny = this.segments.get(i);
+
+ // Check if there is a problem!
+ if (!underScrutiny.maybeAnchor()) {
+
+ if (DEBUG)
+ log.trace("segment {} is problematic", i);
+
+ // [problem][anchor]
+ if (i < (size-1) && this.segments.get(i+1).maybeAnchor()) {
+
+ // Insert the solution
+ this.segments.set(
+ i+1,
+ _merge(this.segments.get(i+1), underScrutiny, false)
+ );
+
+ // Remove the problem
+ this.segments.remove(i);
+ size--;
+
+ if (DEBUG)
+ log.trace("Remove segment {} - now size {}", i, size);
+ }
+
+ // [anchor][problem]
+ else if (i >= 1 && this.segments.get(i-1).maybeAnchor()) {
+ // Insert the solution
+ this.segments.set(
+ i-1,
+ _merge(this.segments.get(i-1), underScrutiny, true)
+ );
+
+ // Remove the problem
+ this.segments.remove(i);
+ size--;
+
+ if (DEBUG)
+ log.trace("Remove segment {} - now size {}", i, size);
+ }
+ else {
+ noRemainingProblem = false;
+ i++;
+ };
+ }
+ else {
+ i++;
+ };
+ };
+
+ // There is still a remaining problem
+ if (!noRemainingProblem) {
+
+ // The size has changed - retry!
+ if (size != this.segments.size())
+ return _solveProblematicSequence(this.segments.size());
+ else
+ return true;
+ };
+
+ return false;
};
- public boolean isInOrder () {
- return this.isInOrder;
- };
- public boolean hasConstraints () {
- if (this.constraints == null)
- return false;
- if (this.constraints.size() <= 0)
- return false;
- return true;
+ // Todo: Deal with negative and optional!
+ // [base=der][base!=Baum]?
+ public SpanQueryWrapper _merge (
+ SpanQueryWrapper anchor,
+ SpanQueryWrapper problem,
+ boolean mergeLeft) throws QueryException {
+
+ // Extend to the right - merge to the left
+ int direction = 1;
+ if (!mergeLeft)
+ direction = -1;
+
+ if (DEBUG)
+ log.trace("Will merge two spans to {}", mergeLeft ? "left" : "right");
+
+ // Make empty extension to anchor
+ if (problem.isEmpty()) {
+ SpanQuery query;
+
+ if (DEBUG)
+ log.trace("Problem is empty");
+
+ if (problem.hasClass) {
+
+ if (DEBUG)
+ log.trace("Problem has class {}", problem.getClassNumber());
+
+ query = new SpanExpansionQuery(
+ anchor.toQuery(),
+ problem.getMin(),
+ problem.getMax(),
+ direction,
+ problem.getClassNumber(),
+ true
+ );
+ }
+ else {
+
+ if (DEBUG)
+ log.trace("Problem has no class");
+
+ query = new SpanExpansionQuery(
+ anchor.toQuery(),
+ problem.getMin(),
+ problem.getMax(),
+ direction,
+ true
+ );
+ };
+ return new SpanSimpleQueryWrapper(query);
+ }
+
+ // make negative extension to anchor
+ else if (problem.isNegative()) {
+
+ if (DEBUG)
+ log.trace("Problem is negative");
+
+ SpanQuery query;
+ if (problem.hasClass) {
+
+ if (DEBUG)
+ log.trace("Problem has class {}", problem.getClassNumber());
+
+ query = new SpanExpansionQuery(
+ anchor.toQuery(),
+ problem.toQuery(),
+ problem.getMin(),
+ problem.getMax(),
+ direction,
+ problem.getClassNumber(),
+ true
+ );
+ }
+ else {
+ if (DEBUG)
+ log.trace("Problem has no class");
+
+ query = new SpanExpansionQuery(
+ anchor.toQuery(),
+ problem.toQuery(),
+ problem.getMin(),
+ problem.getMax(),
+ direction,
+ true
+ );
+ };
+ return new SpanSimpleQueryWrapper(query);
+ };
+
+ if (DEBUG)
+ log.trace("Problem is optional");
+
+ // [base=der][base=baum]?
+
+ // [base=der]
+ SpanAlterQueryWrapper saqw = new SpanAlterQueryWrapper(this.field, anchor);
+
+ // [base=der]
+ SpanSequenceQueryWrapper ssqw = new SpanSequenceQueryWrapper(this.field, anchor);
+
+ // [base=der][base=baum]
+ if (mergeLeft)
+ ssqw.append(new SpanSimpleQueryWrapper(problem.toQuery()));
+ // [base=baum][base=der]
+ else
+ ssqw.prepend(new SpanSimpleQueryWrapper(problem.toQuery()));
+
+ saqw.or(ssqw);
+
+ return (SpanQueryWrapper) saqw;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSimpleQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSimpleQueryWrapper.java
new file mode 100644
index 0000000..84cc259
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSimpleQueryWrapper.java
@@ -0,0 +1,23 @@
+package de.ids_mannheim.korap.query.wrap;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+
+public class SpanSimpleQueryWrapper extends SpanQueryWrapper {
+ private SpanQuery query;
+
+ public SpanSimpleQueryWrapper (String field, String term) {
+ this.isNull = false;
+ this.query = new SpanTermQuery(new Term(field, term));
+ };
+
+ public SpanSimpleQueryWrapper (SpanQuery query) {
+ this.isNull = false;
+ this.query = query;
+ };
+
+ public SpanQuery toQuery () {
+ return this.query;
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java
index 0707882..3dd1482 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java
@@ -6,12 +6,18 @@
import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
+
import java.util.*;
import org.apache.lucene.search.spans.SpanQuery;
/*
Todo:
+
+ contains(token,token) und matches(token, token) -> termGroup
+
+
- Exclusivity has to be supported
- In case the wrap is negative,
the query has to be interpreted as being exclusive!
@@ -78,7 +84,7 @@
this.isNull = false;
};
- public SpanQuery toQuery () {
+ public SpanQuery toQuery () throws QueryException {
if (this.isNull)
return (SpanQuery) null;