Introducing potential sorting flag
diff --git a/Changes b/Changes
index ffe30ea..75bd931 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.49.4 2015-02-06
+0.49.4 2015-02-13
- [documentation] Improved documentation for API classes,
improved test coverage for utility classes (diewald)
- [performance] Updated Lucene dependency from 4.5.1 to 4.10.3,
@@ -20,6 +20,8 @@
fixed a lot of wrong tests for WithinSpans (diewald)
- [feature] Improved deserialization of SpanSubSpanQueries
(margaretha)
+ - [feature] Introducing the potential need for resorting queries
+ on focussing (set by relations) (diewald)
0.49.3 2015-02-03
- [documentation] Improved documentation for API classes (diewald)
diff --git a/Readme.md b/Readme.md
index 021b28d..d71659f 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,6 +1,6 @@
# Krill
-Corpus Retrieval Index looking up with Lucene
+A Corpus Retrieval Index using Lucene for Look-Ups
## Synopsis
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java
index 809a325..28e9c72 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java
@@ -21,95 +21,102 @@
private List<SpanQueryWrapper> alternatives;
public SpanAlterQueryWrapper (String field) {
- this.field = field;
- this.alternatives = new ArrayList<>();
+ this.field = field;
+ this.alternatives = new ArrayList<>();
};
public SpanAlterQueryWrapper (String field, SpanQuery query) {
- this.field = field;
- this.alternatives = new ArrayList<>();
- this.alternatives.add(
+ this.field = field;
+ this.alternatives = new ArrayList<>();
+ this.alternatives.add(
new SpanSimpleQueryWrapper(query)
);
};
public SpanAlterQueryWrapper (String field, SpanQueryWrapper query) {
- this.field = field;
- this.alternatives = new ArrayList<>();
- this.alternatives.add(query);
+ this.field = field;
+ this.alternatives = new ArrayList<>();
+ if (query.maybeUnsorted())
+ this.maybeUnsorted = true;
+ this.alternatives.add(query);
};
public SpanAlterQueryWrapper (String field, String ... terms) {
- this.field = field;
- this.alternatives = new ArrayList<>();
- for (String term : terms) {
- this.isNull = false;
- this.alternatives.add(
- new SpanSimpleQueryWrapper(
+ this.field = field;
+ this.alternatives = new ArrayList<>();
+ for (String term : terms) {
+ this.isNull = false;
+ this.alternatives.add(
+ new SpanSimpleQueryWrapper(
new SpanTermQuery(
new Term(this.field, term)
- )
+ )
)
);
- };
+ };
};
public SpanAlterQueryWrapper or (String term) {
- return this.or(
- new SpanTermQuery(new Term(this.field, term))
+ return this.or(
+ new SpanTermQuery(new Term(this.field, term))
);
};
public SpanAlterQueryWrapper or (SpanQuery query) {
- this.alternatives.add(
+ this.alternatives.add(
new SpanSimpleQueryWrapper(query)
- );
- this.isNull = false;
- return this;
+ );
+ this.isNull = false;
+ return this;
};
public SpanAlterQueryWrapper or (SpanQueryWrapper term) {
- if (term.isNull())
- return this;
+ if (term.isNull())
+ return this;
- if (term.isNegative())
- this.isNegative = true;
+ if (term.isNegative())
+ this.isNegative = true;
- // If one operand is optional, the whole group can be optional
- // a | b* | c
- if (term.isOptional())
- this.isOptional = true;
+ // If one operand is optional, the whole group can be optional
+ // a | b* | c
+ if (term.isOptional())
+ this.isOptional = true;
- this.alternatives.add( term );
- this.isNull = false;
- return this;
+ this.alternatives.add( term );
+
+ if (term.maybeUnsorted())
+ this.maybeUnsorted = true;
+
+
+ this.isNull = false;
+ return this;
};
public SpanAlterQueryWrapper or (SpanRegexQueryWrapper term) {
- this.alternatives.add( term );
- this.isNull = false;
- return this;
+ this.alternatives.add( term );
+ this.isNull = false;
+ return this;
};
public SpanAlterQueryWrapper or (SpanWildcardQueryWrapper wc) {
- this.alternatives.add( wc );
- this.isNull = false;
- return this;
+ this.alternatives.add( wc );
+ this.isNull = false;
+ return this;
};
public SpanQuery toQuery() throws QueryException {
- if (this.isNull || this.alternatives.size() == 0)
- return (SpanQuery) null;
+ if (this.isNull || this.alternatives.size() == 0)
+ return (SpanQuery) null;
- if (this.alternatives.size() == 1) {
- return (SpanQuery) this.alternatives.get(0).toQuery();
- };
+ if (this.alternatives.size() == 1) {
+ return (SpanQuery) this.alternatives.get(0).toQuery();
+ };
- Iterator<SpanQueryWrapper> clause = this.alternatives.iterator();
- SpanOrQuery soquery = new SpanOrQuery( clause.next().toQuery() );
- while (clause.hasNext()) {
- soquery.addClause( clause.next().toQuery() );
- };
- return (SpanQuery) soquery;
+ Iterator<SpanQueryWrapper> clause = this.alternatives.iterator();
+ SpanOrQuery soquery = new SpanOrQuery( clause.next().toQuery() );
+ while (clause.hasNext()) {
+ soquery.addClause( clause.next().toQuery() );
+ };
+ return (SpanQuery) soquery;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java
index c21a968..52e2036 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java
@@ -15,8 +15,11 @@
this.subquery = sqw;
if (!inclusion) {
this.isNegation = true;
- }
- }
+ };
+
+ if (sqw.maybeUnsorted())
+ this.maybeUnsorted = true;
+ };
@Override
public SpanQuery toQuery() throws QueryException {
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java
index 1c54f89..a10670a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanClassQueryWrapper.java
@@ -31,22 +31,37 @@
this(subquery, (byte) 0);
};
+
+ @Override
public boolean isEmpty () {
return this.subquery.isEmpty();
};
+
+ @Override
public boolean isOptional () {
return this.subquery.isOptional();
};
+
+ @Override
public boolean isNull () {
return this.subquery.isNull();
};
+
+ @Override
public boolean isNegative () {
return this.subquery.isNegative();
};
+
+ @Override
+ public boolean maybeUnsorted () {
+ return this.subquery.maybeUnsorted();
+ };
+
+
public SpanQuery toQuery () throws QueryException {
if (this.subquery.isNull())
return (SpanQuery) null;
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanElementQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanElementQueryWrapper.java
index 68b844c..26853db 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanElementQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanElementQueryWrapper.java
@@ -11,15 +11,15 @@
String field;
public SpanElementQueryWrapper (String field, String element) {
- this.field = field;
- this.element = element;
+ this.field = field;
+ this.element = element;
};
public SpanQuery toQuery () throws QueryException {
- return (SpanQuery) new SpanElementQuery(this.field, this.element);
+ return (SpanQuery) new SpanElementQuery(this.field, this.element);
};
public boolean isNull () {
- return false;
+ return false;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanMatchModifyQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanMatchModifyQueryWrapper.java
index bfdcdbf..4c57320 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanMatchModifyQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanMatchModifyQueryWrapper.java
@@ -9,6 +9,16 @@
import java.util.*;
+// Support maybeUnsorted!
+// Rename this to SpanFocusQueryWrapper
+// Support multiple classes
+
+// Sorting:
+// - Sort with a buffer of matches, e.g. 25/50,
+// So gather 50 hits, sort them, return the first 25,
+// Add new 25, sort the last 50, return 25 etc.
+// On processing, there should be an ability to raise
+// a warning, in case an unordered result bubbles up.
public class SpanMatchModifyQueryWrapper extends SpanQueryWrapper {
private SpanQueryWrapper subquery;
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
index 06f71aa..7ed43eb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanQueryWrapper.java
@@ -33,7 +33,8 @@
isNegative = false,
isEmpty = false,
isExtended = false,
- isExtendedToTheRight = false;
+ isExtendedToTheRight = false,
+ maybeUnsorted = false;
/**
* Serialize the wrapped query and return a SpanQuery.
@@ -214,6 +215,24 @@
/**
+ * Check, if the wrapped query may need to be sorted
+ * on focussing on a specific class.
+ *
+ * Normally spans are always sorted, but in case of
+ * a wrapped relation query, classed operands may
+ * be in arbitrary order. When focussing on these
+ * classes, the span has to me reordered.
+ *
+ * @return <tt>true</tt> in case the wrapped query
+ * has to be sorted on focussing,
+ * otherwise <tt>false</tt>.
+ */
+ public boolean maybeUnsorted () {
+ return this.maybeUnsorted;
+ };
+
+
+ /**
* Get the minimum number of repetitions of the
* wrapped query.
*
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
index 3b9763d..3e829be 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
@@ -18,86 +18,93 @@
private final static Logger log = LoggerFactory.getLogger(SpanSequenceQueryWrapper.class);
public SpanRepetitionQueryWrapper () {
- this.isEmpty = true;
- this.isNull = false;
+ this.isEmpty = true;
+ this.isNull = false;
};
// This is for exact enumbered repetition, like in a{3}
public SpanRepetitionQueryWrapper (SpanQueryWrapper subquery, int exact) {
- if (!subquery.isEmpty())
- this.subquery = subquery;
- else
- this.isEmpty = true;
+ if (!subquery.isEmpty()) {
+ this.subquery = subquery;
+ if (subquery.maybeUnsorted())
+ this.maybeUnsorted = true;
+ }
+ else
+ this.isEmpty = true;
- if (exact < 1 || this.subquery.isNull()) {
- this.isNull = true;
- this.isOptional = true;
- this.min = 0;
- this.max = 0;
- return;
- };
+ if (exact < 1 || this.subquery.isNull()) {
+ this.isNull = true;
+ this.isOptional = true;
+ this.min = 0;
+ this.max = 0;
+ return;
+ };
- this.min = exact;
- this.max = exact;
+ this.min = exact;
+ this.max = exact;
};
// This is for a range of repetitions, like in a{2,3}, a{,4}, a{3,}, a+, a*, a?
public SpanRepetitionQueryWrapper (SpanQueryWrapper subquery, int min, int max) {
- if (!subquery.isEmpty())
- this.subquery = subquery;
- else
- this.isEmpty = true;
+ if (!subquery.isEmpty()) {
+ this.subquery = subquery;
- // Subquery may be an empty token
- if (this.subquery.isNull()) {
- this.isNull = true;
- return;
- }
- else {
- this.isNull = false;
- };
+ if (subquery.maybeUnsorted())
+ this.maybeUnsorted = true;
+ }
+ else
+ this.isEmpty = true;
+
+ // Subquery may be an empty token
+ if (this.subquery.isNull()) {
+ this.isNull = true;
+ return;
+ }
+ else {
+ this.isNull = false;
+ };
- if (min == 0) {
- this.isOptional = true;
- min = 1;
- if (max == 0)
- this.isNull = true;
- };
-
- this.min = min;
- this.max = max;
+ if (min == 0) {
+ this.isOptional = true;
+ min = 1;
+ if (max == 0)
+ this.isNull = true;
+ };
+
+ this.min = min;
+ this.max = max;
};
// Serialize to Lucene SpanQuery
public SpanQuery toQuery () throws QueryException {
- // The query is null
- if (this.isNull)
- return (SpanQuery) null;
+ // The query is null
+ if (this.isNull)
+ return (SpanQuery) null;
+
+ if (this.isEmpty) {
+ log.error("You can't queryize an empty query");
+ return (SpanQuery) null;
+ };
- if (this.isEmpty) {
- log.error("You can't queryize an empty query");
- return (SpanQuery) null;
- };
+ // The query is not a repetition query at all, but may be optional
+ if (this.min == 1 && this.max == 1)
+ return this.subquery.toQuery();
- // The query is not a repetition query at all, but may be optional
- if (this.min == 1 && this.max == 1)
- return this.subquery.toQuery();
-
- // That's a fine repetition query
- return new SpanRepetitionQuery(
- this.subquery.toQuery(),
- this.min,
- this.max,
- true
- );
+ // That's a fine repetition query
+ return new SpanRepetitionQuery(
+ this.subquery.toQuery(),
+ this.min,
+ this.max,
+ true
+ );
};
public boolean isNegative () {
- if (this.subquery == null)
- return false;
- return this.subquery.isNegative();
+ if (this.subquery == null)
+ return false;
+ return this.subquery.isNegative();
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
index 9891459..e3cc0d2 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
@@ -127,6 +127,9 @@
if (sswq.isNull())
return;
+ if (sswq.maybeUnsorted())
+ this.maybeUnsorted = true;
+
// Some debugging on initiating new sequences
if (DEBUG) {
if (!sswq.isEmpty()) {
@@ -190,6 +193,9 @@
if (ssq.isNull())
return this;
+ if (ssq.maybeUnsorted())
+ this.maybeUnsorted = true;
+
// As the spanQueryWrapper is not null,
// the sequence can't be null as well
this.isNull = false;
@@ -278,6 +284,9 @@
// The sequence may be problematic
this.isSolved = false;
+ if (ssq.maybeUnsorted())
+ this.maybeUnsorted = true;
+
// Embed a nested sequence
if (ssq instanceof SpanSequenceQueryWrapper) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java
index fba22e7..c907f5a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java
@@ -138,4 +138,10 @@
return false;
return this.subquery.isOptional();
};
+
+
+ @Override
+ public boolean maybeUnsorted () {
+ return this.subquery.maybeUnsorted();
+ };
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java
index 3dd1482..bb15807 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java
@@ -63,39 +63,57 @@
private byte flag;
public SpanWithinQueryWrapper (SpanQueryWrapper element, SpanQueryWrapper wrap) {
- this.element = element;
- this.wrap = wrap;
+ this.element = element;
+ this.wrap = wrap;
- // TODO: if (wrap.isNegative())
+ // TODO: if (wrap.isNegative())
- this.flag = (byte) SpanWithinQuery.WITHIN;
- if (!element.isNull() && !wrap.isNull())
- this.isNull = false;
+ this.flag = (byte) SpanWithinQuery.WITHIN;
+ if (!element.isNull() && !wrap.isNull())
+ this.isNull = false;
};
- public SpanWithinQueryWrapper (SpanQueryWrapper element, SpanQueryWrapper wrap, byte flag) {
- this.element = element;
- this.wrap = wrap;
- this.flag = flag;
- // TODO: if (wrap.isNegative())
+ public SpanWithinQueryWrapper
+ (SpanQueryWrapper element, SpanQueryWrapper wrap, byte flag) {
+ this.element = element;
+ this.wrap = wrap;
+ this.flag = flag;
- if (!element.isNull() && !wrap.isNull())
- this.isNull = false;
+ // TODO: if (wrap.isNegative())
+
+ if (!element.isNull() && !wrap.isNull())
+ this.isNull = false;
};
+
public SpanQuery toQuery () throws QueryException {
- if (this.isNull)
- return (SpanQuery) null;
+ if (this.isNull)
+ return (SpanQuery) null;
- // TODO: if (wrap.isNegative())
+ // TODO: if (wrap.isNegative())
- return new SpanWithinQuery(this.element.toQuery(), this.wrap.toQuery(), this.flag);
+ return new SpanWithinQuery(this.element.toQuery(), this.wrap.toQuery(), this.flag);
};
+
+ @Override
+ public boolean maybeUnsorted () {
+ if (this.wrap.maybeUnsorted())
+ return true;
+
+ // Todo: This is only true in case of non-exclusivity!
+ if (this.element.maybeUnsorted())
+ return true;
+
+ return this.maybeUnsorted;
+ };
+
+
+ @Override
public boolean isNegative () {
- if (this.element.isNegative())
- return true;
- return false;
+ if (this.element.isNegative())
+ return true;
+ return false;
};
};