Bugfixes for negative segments, focus() and some logging issues
diff --git a/src/main/java/de/ids_mannheim/korap/KorapQuery.java b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
index 6df320e..11616ea 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
@@ -61,6 +61,7 @@
STARTSWITH = SpanWithinQuery.STARTSWITH,
MATCH = SpanWithinQuery.MATCH;
+ private static final int MAX_CLASS_NUM = 127;
/**
* Constructs a new base object for query generation.
@@ -78,6 +79,7 @@
public int min, max;
public Boundary (JsonNode json, int defaultMin, int defaultMax) throws QueryException {
+
if (!json.has("@type") ||
!json.get("@type").asText().equals("korap:boundary")) {
throw new QueryException("Boundary definition is not valid");
@@ -85,6 +87,9 @@
min = json.get("min").asInt(defaultMin);
max = json.get("max").asInt(defaultMax);
+
+ if (DEBUG)
+ log.trace("Found korap:boundary with {}:{}");
};
};
@@ -110,6 +115,8 @@
// TODO: Rename this span context!
public SpanQueryWrapperInterface fromJSON (JsonNode json) throws QueryException {
+ int number = 0;
+
if (!json.has("@type")) {
throw new QueryException("JSON-LD group has no @type attribute");
};
@@ -126,9 +133,15 @@
String operation = json.get("operation").asText();
+ if (DEBUG)
+ log.trace("Found {} group", operation);
+
// Get all operands
JsonNode operands = json.get("operands");
+ if (DEBUG)
+ log.trace("Operands are {}", operands);
+
if (!json.has("operands") || !operands.isArray())
throw new QueryException("Operation needs operand list");
@@ -143,15 +156,19 @@
return ssaq;
case "operation:position":
- if (!json.has("frame"))
- throw new QueryException("Operation needs frame specification");
if (operands.size() != 2)
throw new QueryException("Operation needs exactly two operands");
// TODO: Check for operands
- String frame = json.has("frame") ? json.get("frame").asText() : "contains";
+ String frame = json.has("frame") ?
+ json.get("frame").asText() :
+ "frame:contains";
+
+ if (DEBUG)
+ log.trace("Position frame is '{}'", frame);
+
byte flag = WITHIN;
switch (frame) {
case "frame:contains":
@@ -183,7 +200,9 @@
// Check for exclusion modificator
Boolean exclude;
if (json.has("exclude") && json.get("exclude").asBoolean())
- throw new QueryException("Exclusion is currently not supported in position operations");
+ throw new QueryException(
+ "Exclusion is currently not supported in position operations"
+ );
return new SpanWithinQueryWrapper(
this.fromJSON(operands.get(0)),
@@ -193,7 +212,6 @@
// TODO: This is DEPRECATED and should be communicated that way
case "operation:submatch":
- int number = 0;
if (operands.size() != 1)
throw new QueryException("Operation needs exactly two operands");
@@ -213,10 +231,6 @@
);
case "operation:sequence":
- if (DEBUG) {
- log.trace("Found new sequence");
- System.err.println("----");
- };
if (operands.size() < 2)
throw new QueryException(
@@ -260,7 +274,9 @@
unit = constraint.get("key").asText();
if (unit.equals("t"))
- throw new QueryException("Text based distances are not supported yet");
+ throw new QueryException(
+ "Text based distances are not supported yet"
+ );
int min, max;
if (constraint.has("boundary")) {
@@ -285,27 +301,47 @@
return sseqqw;
case "operation:class":
+
if (json.has("class")) {
if (operands.size() != 1)
throw new QueryException(
"Class group expects exactly one operand in list"
);
- return new SpanClassQueryWrapper(this.fromJSON(operands.get(0)), json.get("class").asInt(0));
+
+ if (DEBUG)
+ log.trace("Found Class definition for {}", json.get("class").asInt(0));
+
+ number = json.get("class").asInt(0);
+
+ if (number > MAX_CLASS_NUM)
+ throw new QueryException("Class numbers limited to " + MAX_CLASS_NUM);
+ return new SpanClassQueryWrapper(
+ this.fromJSON(operands.get(0)), number
+ );
};
throw new QueryException("Class group expects class attribute");
case "operation:repetition":
- int min, max;
+ int min = 0;
+ int max = 100;
if (json.has("boundary")) {
Boundary b = new Boundary(json.get("boundary"), 0, 100);
min = b.min;
max = b.max;
}
else {
- min = json.get("min").asInt(0);
- max = json.get("max").asInt(100);
+ if (json.has("min"))
+ min = json.get("min").asInt(0);
+ if (json.has("max"))
+ max = json.get("max").asInt(100);
+
+ if (DEBUG)
+ log.trace(
+ "Boundary is set by deprecated {}-{}",
+ min,
+ max);
};
// Sanitize max
@@ -322,11 +358,7 @@
// Check relation between min and max
if (min > max)
- throw new QueryException("The maximum repetition value has to " +
- "be greater or equal to the minimum repetition value");
-
- if (DEBUG)
- log.trace("Found new repetition %d-%d", min, max);
+ max = max;
return new SpanRepetitionQueryWrapper(
this.fromJSON(operands.get(0)), min, max
@@ -336,15 +368,17 @@
throw new QueryException("Unknown group operation");
case "korap:reference":
- if (json.has("operation") && !json.get("operation").asText().equals("operation:focus"))
- throw new QueryException("Reference operation " + json.get("operation").asText() + " not supported yet");
-
- int number = 0;
+ if (json.has("operation") &&
+ !json.get("operation").asText().equals("operation:focus"))
+ throw new QueryException("Reference operation " +
+ json.get("operation").asText() +
+ " not supported yet");
operands = json.get("operands");
- if (operands.size() == 0)
+ if (operands.size() == 0) {
throw new QueryException("Focus with peripheral references is not supported yet");
+ };
if (operands.size() != 1)
throw new QueryException("Operation needs exactly two operands");
@@ -355,11 +389,18 @@
throw new QueryException("Class reference operators not supported yet");
number = json.get("classRef").get(0).asInt();
+
+ if (number > MAX_CLASS_NUM)
+ throw new QueryException("Class numbers limited to " + MAX_CLASS_NUM);
+
}
else if (json.has("spanRef")) {
throw new QueryException("Span references not supported yet");
};
+ if (DEBUG)
+ log.trace("Wrap class reference {}", number);
+
return new SpanMatchModifyQueryWrapper(
this.fromJSON(operands.get(0)), number
);
@@ -383,6 +424,10 @@
private SpanQueryWrapperInterface _segFromJSON (JsonNode json) throws QueryException {
String type = json.get("@type").asText();
+
+ if (DEBUG)
+ log.trace("Wrap new token definition by {}", type);
+
switch (type) {
case "korap:term":
@@ -392,7 +437,12 @@
switch (match) {
case "match:ne":
- return this.seg().without((SpanSegmentQueryWrapper) this._termFromJSON(json));
+ if (DEBUG)
+ log.trace("Term is negated");
+ SpanSegmentQueryWrapper ssqw =
+ (SpanSegmentQueryWrapper) this._termFromJSON(json);
+ ssqw.makeNegative();
+ return this.seg().without(ssqw);
case "match:eq":
return this._termFromJSON(json);
};
@@ -488,7 +538,6 @@
if (value.length() == 0 && (layer.equals("l") || layer.equals("p")))
value.append(defaultFoundry);
-
value.append(layer).append(':');
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index c86c0d4..c882ae4 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
@@ -59,6 +59,9 @@
this.query = queryIface.toQuery();
if (queryIface.isOptional())
this.addWarning("Optionality of query is ignored");
+ if (queryIface.isNegative())
+ this.addWarning("Exclusivity of query is ignored");
+
}
catch (QueryException q) {
this.error = q.getMessage();
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index b64d253..66b16f3 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -37,6 +37,9 @@
private boolean isFinish;
protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
public AttributeSpans(SpanAttributeQuery simpleSpanQuery,
AtomicReaderContext context, Bits acceptDocs,
@@ -73,7 +76,8 @@
return true;
}
else{
- logger.info("Setting candidate list");
+ if (DEBUG)
+ logger.info("Setting candidate list");
setCandidateList();
// for (CandidateAttributeSpan cs: candidateList){
// logger.info("cs ref "+cs.getElementRef());
@@ -95,7 +99,8 @@
firstSpans.start() == currentPosition){
short elementRef = retrieveElementRef(firstSpans);
- logger.info("ElementRef: "+elementRef);
+ if (DEBUG)
+ logger.info("ElementRef: "+elementRef);
candidateList.add(new CandidateAttributeSpan(firstSpans,elementRef));
hasMoreSpans = firstSpans.next();
}
@@ -112,7 +117,8 @@
ByteBuffer wrapper = ByteBuffer.wrap(payload.get(0));
short num = wrapper.getShort();
long e = System.nanoTime();
- logger.info("Bytebuffer runtime "+ (e-s));
+ if (DEBUG)
+ logger.info("Bytebuffer runtime "+ (e-s));
return num;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index 6de3fb7..d8cd79b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
@@ -19,6 +19,10 @@
import java.nio.ByteBuffer;
+/**
+ * @author diewald
+ */
+
public class ClassSpans extends Spans {
private List<byte[]> highlightedPayload;
private Collection<byte[]> payload;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java
index cb83e5c..40ff4ca 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java
@@ -21,6 +21,9 @@
private boolean isOrdered;
private boolean hasMoreSecondSpans;
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
public DistanceExclusionSpan(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts, boolean isOrdered)
@@ -106,8 +109,11 @@
setMatchFirstSpan(new CandidateSpan(firstSpans));
//setMatchSecondSpan(new CandidateSpan(secondSpans));
- log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
- matchEndPosition);
+ if (DEBUG)
+ log.trace("doc# {}, start {}, end {}",
+ matchDocNumber,
+ matchStartPosition,
+ matchEndPosition);
}
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
index d963772..9a1c13b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
@@ -32,7 +32,10 @@
private List<AttributeSpans> notAttributeList;
protected Logger logger = LoggerFactory.getLogger(ElementAttributeSpans.class);
-
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
public ElementAttributeSpans(SpanElementAttributeQuery simpleSpanQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
@@ -73,7 +76,8 @@
private boolean advance() throws IOException {
while (hasMoreSpans && computeElementPosition()){
- logger.info("element: " + elements.start() + ","+ elements.end() +
+ if (DEBUG)
+ logger.info("element: " + elements.start() + ","+ elements.end() +
" ref:"+elements.getElementRef());
if (checkElementRef() && checkNotElementRef()){
@@ -82,7 +86,8 @@
this.matchEndPosition = elements.end();
this.matchPayload = elements.getPayload();
hasMoreSpans = attributeList.get(0).next();
- logger.info("MATCH "+matchDocNumber);
+ if (DEBUG)
+ logger.info("MATCH "+matchDocNumber);
hasMoreSpans = elements.next();
return true;
@@ -98,7 +103,8 @@
for (AttributeSpans attribute: attributeList){
if (elements.getElementRef() != attribute.getElementRef()){
- logger.info("attribute ref doesn't match");
+ if (DEBUG)
+ logger.info("attribute ref doesn't match");
if (elements.getElementRef() < attribute.getElementRef())
hasMoreSpans = attribute.next();
else {
@@ -118,7 +124,8 @@
for (AttributeSpans notAttribute: notAttributeList){
if (elements.start() == notAttribute.start() &&
elements.getElementRef() == notAttribute.getElementRef()){
- logger.info("not attribute ref exists");
+ if (DEBUG)
+ logger.info("not attribute ref exists");
hasMoreSpans = elements.next();
return false;
}
@@ -136,13 +143,15 @@
if (elements.getElementRef() < 1){ // the element does not have an attribute
elements.isElementRef = true; // dummy setting enabling reading elementRef
hasMoreSpans = elements.next();
- logger.info("skip");
+ if (DEBUG)
+ logger.info("skip");
continue;
}
if (checkAttributeListPosition() &&
checkNotAttributeListPosition()){
- logger.info("element is found: "+ elements.start());
+ if (DEBUG)
+ logger.info("element is found: "+ elements.start());
return true;
}
}
@@ -159,7 +168,8 @@
for (AttributeSpans a : notAttributeList){
// advance the doc# of not AttributeSpans
- logger.info("a "+a.start());
+ if (DEBUG)
+ logger.info("a "+a.start());
while (!a.isFinish() && a.doc() <= elements.doc()){
if (a.doc() == elements.doc() &&
@@ -183,8 +193,8 @@
for (AttributeSpans a : attributeList){
if(!ensureSamePosition(elements, a)) return false;
-
- logger.info("pos:" + elements.start());
+ if (DEBUG)
+ logger.info("pos:" + elements.start());
if (isFirst){
isFirst = false;
currentPosition = elements.start();
@@ -195,7 +205,8 @@
}
}
- logger.info("same pos: "+isSame+ ", pos "+elements.start());
+ if (DEBUG)
+ logger.info("same pos: "+isSame+ ", pos "+elements.start());
return isSame;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index a9aa08b..5879946 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -33,7 +33,10 @@
public boolean isElementRef = false; // A dummy flag
- protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
+ protected Logger logger = LoggerFactory.getLogger(ElementSpans.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
public ElementSpans(SpanElementQuery spanElementQuery,
AtomicReaderContext context, Bits acceptDocs,
@@ -70,7 +73,8 @@
return true;
}
else{
- logger.info("Setting candidate list");
+ if (DEBUG)
+ logger.info("Setting candidate list");
setCandidateList();
currentDoc = termSpans.doc();
currentPosition = termSpans.start();
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
index 8fa1636..a45e1ee 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
@@ -19,46 +19,52 @@
import java.nio.ByteBuffer;
+/**
+ * Modify matches to, for example, return only certain class or span ranges.
+ *
+ * @author diewald
+ */
+
public class MatchModifyClassSpans extends Spans {
- private List<byte[]> highlightedPayload;
+ private List<byte[]> wrappedPayload;
private Collection<byte[]> payload;
private final Spans spans;
private byte number;
private boolean divide;
private ByteBuffer bb;
- private SpanQuery highlight;
+ private SpanQuery wrapQuery;
private final Logger log = LoggerFactory.getLogger(MatchModifyClassSpans.class);
+
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
-
private int start = -1, end;
private int tempStart = 0, tempEnd = 0;
-
- public MatchModifyClassSpans (SpanQuery highlight,
- AtomicReaderContext context,
- Bits acceptDocs,
- Map<Term,TermContext> termContexts,
- byte number,
- boolean divide) throws IOException {
- spans = highlight.getSpans(context, acceptDocs, termContexts);
- this.number = number;
- this.divide = divide;
- this.highlight = highlight;
- this.highlightedPayload = new ArrayList<byte[]>(6);
- bb = ByteBuffer.allocate(9);
+ public MatchModifyClassSpans (
+ SpanQuery wrapQuery,
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts,
+ byte number,
+ boolean divide) throws IOException {
+ this.spans = wrapQuery.getSpans(context, acceptDocs, termContexts);
+ this.number = number;
+ this.divide = divide;
+ this.wrapQuery = wrapQuery;
+ this.bb = ByteBuffer.allocate(9);
+ this.wrappedPayload = new ArrayList<byte[]>(6);
};
@Override
public Collection<byte[]> getPayload() throws IOException {
- return highlightedPayload;
+ return wrappedPayload;
};
@Override
public boolean isPayloadAvailable() {
- return highlightedPayload.isEmpty() == false;
+ return wrappedPayload.isEmpty() == false;
};
public int doc() { return spans.doc(); }
@@ -75,18 +81,24 @@
// inherit javadocs
@Override
public boolean next() throws IOException {
+ /* TODO:
+ * In case of a split() (instead of a submatch())
+ * Is the cache empty?
+ * Otherwise: Next from list
+ */
+
if (DEBUG)
- log.trace("||> Forward next");
+ log.trace("Forward next match");
- highlightedPayload.clear();
+ // Next span
+ while (spans.next()) {
- /*
- Bei divide:
- Ist der Speicher leer?
- Sonst der nächste Treffer vom Speicher!
- */
+ if (DEBUG)
+ log.trace("Forward next inner span");
- if (spans.next()) {
+ // No classes stored
+ wrappedPayload.clear();
+
start = -1;
if (spans.isPayloadAvailable()) {
end = 0;
@@ -95,10 +107,10 @@
for (byte[] payload : spans.getPayload()) {
bb.clear();
bb.put(payload);
- // bb = ByteBuffer.wrap(payload, 0, 10);
bb.position(8);
// Todo: Implement Divide
+ // Found class payload of structure <i>start<i>end<b>class
if (payload.length == 9 && bb.get() == this.number) {
bb.rewind();
tempStart = bb.getInt();
@@ -107,87 +119,61 @@
if (DEBUG)
log.trace("Found matching class {}-{}", tempStart, tempEnd);
+ // Set start position
if (start == -1)
start = tempStart;
else if (tempStart < start)
start = tempStart;
+ // Set end position
if (tempEnd > end)
end = tempEnd;
}
- // Doesn't mark an important payload - but should be kept
+ // No class payload - but keep!
else {
if (DEBUG)
log.trace("Remember old payload {}", payload);
- highlightedPayload.add(payload);
+ wrappedPayload.add(payload);
};
};
-
- /*
-
- if (DEBUG)
- log.trace("All payload processed, now clean up");
-
- // We have a payload found that is a class for modification!
- if (start != -1) {
- int i = highlightedPayload.size() - 1;
-
- for (; i >= 0; i--) {
- bb.clear();
- bb.put(highlightedPayload.get(i),0,8);
- bb.rewind();
- if (bb.getInt() < start || bb.getInt() > end) {
- bb.rewind();
- if (DEBUG)
- log.trace("Remove highlight {} with {}-{} for {}-{}", i, bb.getInt(), bb.getInt(), start, end);
- highlightedPayload.remove(i);
- continue;
- };
-
- if (DEBUG) {
- bb.rewind();
- log.trace("Highlight {} will stay with {}-{} for {}-{}", i, bb.getInt(), bb.getInt(), start, end);
- };
- };
- // Todo: SPLIT
- // Vorsicht! Bei divide könnten Payloads mehrmals vergeben werden
- // müssen!
-
- };
- */
};
+ // Class not found
+ if (start == -1)
+ continue;
- if (start == -1) {
- start = spans.start();
- end = spans.end();
- }
- else if (DEBUG) {
- log.trace("Start to shrink to {} - {} class: {}",
- start, end, number);
- };
+ if (DEBUG)
+ log.trace(
+ "Start to focus on class {} from {} to {}",
+ number,
+ start,
+ end
+ );
return true;
};
+
+ // No more spans
return false;
};
+
// inherit javadocs
@Override
- public boolean skipTo(int target) throws IOException {
+ public boolean skipTo (int target) throws IOException {
return spans.skipTo(target);
};
@Override
- public String toString() {
- return getClass().getName() + "(" + this.highlight.toString() + ")@" +
+ public String toString () {
+ return getClass().getName() + "(" + this.wrapQuery.toString() + ")@" +
(doc() + ":" + start() + "-" + end());
};
@Override
- public long cost() {
+ public long cost () {
return spans.cost();
- }
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
index e435d50..6a1c3d8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
@@ -21,6 +21,9 @@
public abstract class NonPartialOverlappingSpans extends SimpleSpans{
private Logger log = LoggerFactory.getLogger(NonPartialOverlappingSpans.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
public NonPartialOverlappingSpans(SimpleSpanQuery simpleSpanQuery,
AtomicReaderContext context, Bits acceptDocs,
@@ -52,9 +55,11 @@
while (hasMoreSpans && ensureSameDoc(firstSpans,secondSpans)){
int matchCase = findMatch();
if (matchCase == 0){
+ if (DEBUG) {
log.trace("Match doc#: {}",matchDocNumber);
log.trace("Match positions: {}-{}", matchStartPosition,
- matchEndPosition);
+ matchEndPosition);
+ };
doCollectPayloads();
return true;
}
@@ -78,15 +83,18 @@
/** Collecting available payloads from the current first and second spans */
private void doCollectPayloads() throws IOException {
if (collectPayloads){
+ if (DEBUG)
log.trace("Collect payloads");
if (firstSpans.isPayloadAvailable()) {
Collection<byte[]> payload = firstSpans.getPayload();
- log.trace("Found {} payloads in firstSpans", payload.size());
+ if (DEBUG)
+ log.trace("Found {} payloads in firstSpans", payload.size());
matchPayload.addAll(payload);
}
if (secondSpans.isPayloadAvailable()) {
Collection<byte[]> payload = secondSpans.getPayload();
- log.trace("Found {} payloads in secondSpans", payload.size());
+ if (DEBUG)
+ log.trace("Found {} payloads in secondSpans", payload.size());
matchPayload.addAll(payload);
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
index 94413ed..56724b4 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
@@ -32,10 +32,11 @@
/*
TODO: Use the flag in KorapQuery!
+ TODO: Support exclusivity
*/
/**
- * Compare two spans and check, if the second one relates to the first one.
+ * Compare two spans and check how they relate positionally.
*
* @author Nils Diewald
*/
@@ -43,6 +44,7 @@
// Logger
private final Logger log = LoggerFactory.getLogger(WithinSpans.class);
+
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
@@ -72,7 +74,7 @@
/*
Supported flags are currently:
- ov -> 0 | overlap: A & B != empty)
+ ov -> 0 | overlap: A & B != empty
rov -> 2 | real overlap: A & B != empty and
((A | B) != A or
(A | B) != B)
@@ -84,6 +86,8 @@
sw -> 10 | startswith: A | B = A and
A.end = B.end
m -> 12 | A = B
+
+ This may change in case the system switches to 16bit vector
*/
public static final byte
OVERLAP = (byte) 0,
@@ -96,11 +100,12 @@
private byte flag;
+ // Contains the query
private SpanWithinQuery query;
+ // Contains the spans
private final Spans embeddedSpans, wrapSpans;
-
private boolean tryMatch = true;
private LinkedList<KorapLongSpan>
@@ -151,11 +156,11 @@
// Initialize spans
if (!this.init()) {
- this.more = false;
- this.inSameDoc = false;
- this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
+ this.more = false;
+ this.inSameDoc = false;
+ this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
- this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
+ this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
};
@@ -682,11 +687,6 @@
};
};
- /*
- if (DEBUG)
- log.trace("Match is {}", match);
- */
-
try {
this.todo(currentCase);
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
index 91dbcc4..e72c566 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
@@ -159,10 +159,8 @@
return this;
};
- // Identical to with
public SpanSegmentQueryWrapper without (SpanSegmentQueryWrapper seg) {
if (!seg.isNull()) {
- // TODO!!!
this.with(seg);
this.isNull = false;
};
@@ -180,16 +178,10 @@
);
}
+ // These are now identical but may be negative
else if (this.inclusive.size() == 0 && this.exclusive.size() >= 1) {
-
- // Not supported anymore
- // TODO: Raise error
- return (SpanQuery) new SpanNotQuery(
- new SpanTermQuery(new Term(this.field, "T")),
- this._listToOrQuery(this.exclusive)
- );
+ return (SpanQuery) this._listToQuery(this.exclusive);
}
-
else if (this.inclusive.size() >= 1 && this.exclusive.size() == 0) {
return (SpanQuery) this._listToQuery(this.inclusive);
};
@@ -197,7 +189,6 @@
return (SpanQuery) null;
};
-
private SpanQuery _listToQuery (ArrayList<SpanQuery> list) {
SpanQuery query = list.get(0);
@@ -208,7 +199,6 @@
return (SpanQuery) query;
};
-
private SpanQuery _listToOrQuery (ArrayList<SpanQuery> list) {
if (list.size() == 1) {
return (SpanQuery) list.get(0);
@@ -240,5 +230,15 @@
};
return false;
};
+
+ public void makeNegative () {
+ /*
+ TODO: THIS IS A BIT MORE COMPLICATED!
+ and and or groups have to be switched
+ */
+
+ this.exclusive.addAll(this.inclusive);
+ this.inclusive.clear();
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java
index 47face4..d67505c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithinQueryWrapper.java
@@ -11,10 +11,43 @@
import org.apache.lucene.search.spans.SpanQuery;
/*
- Document: Optionality of operands will be ignored - while the optionality of the wrap is herited!
+ Todo:
+ - Exclusivity has to be supported
+ - In case the wrap is negative,
+ the query has to be interpreted as being exclusive!
+ - within(<s>,[base=term]) -> all <s> including [base=term]
+ - within(<s>,[base!=term]) -> all <s> not including [base=term]
+ - !within(<s>,[base=term]) -> all <s> not including [base=term]
+ - within(!<s>,[base!=term]) -> failure - embedding span has to be positive
+ -> Exception: It is an Overlap!
+ -> BUT! This becomes weird with classes, as
+ - within(<s>, {2:[base!=term]}) will match quite often!
+ -> so this is no valid solution!
- Idea:
- - Maybe inherit the optionality when it is in an element and rewrite the query to an alternation if the wrap is
+ Better - Exclusivity and Negation:
+ - within(<s>,[base!=term]) -> all <s>, hitting only [base!=term] tokens
+ -> is this technically doable? NO!
+ - !within(<s>,[base=term]) -> all <s>, not containing [base=term]
+ - within(!<s>,[base=term]) -> failure
+
+
+ - Optionality:
+ - At the moment:
+ - Optionality of operands will be ignored
+ while the optionality of the wrap is herited!
+ - within(<s>?, [base=term]) -> opt
+ - within(<s>, {2:[base=term]*}) -> (<s>|within(<s>, {2:[base=term]+}))
+ - within(<s>?, {2:[base=term]*}) -> (<s>|within(<s>, {2:[base=term]+})) and opt
+
+ - Speed improvement:
+ - Check for classes!
+ - within(<s>, [base=term]*) -> <s>
+ - within(<s>, {2:[base=term]*}) -> (<s>|within(<s>, {2:[base=term]+}))
+
+ - Special case overlaps(), overlapsStrictly():
+ - overlaps(<s>, <p>) == overlaps(<p>, <s>)
+ - overlaps(<s>?, <p>) -> optionality is always inherited!
+
*/
@@ -27,6 +60,9 @@
public SpanWithinQueryWrapper (SpanQueryWrapperInterface element, SpanQueryWrapperInterface wrap) {
this.element = element;
this.wrap = wrap;
+
+ // TODO: if (wrap.isNegative())
+
this.flag = (byte) SpanWithinQuery.WITHIN;
if (!element.isNull() && !wrap.isNull())
this.isNull = false;
@@ -37,6 +73,8 @@
this.wrap = wrap;
this.flag = flag;
+ // TODO: if (wrap.isNegative())
+
if (!element.isNull() && !wrap.isNull())
this.isNull = false;
};
@@ -45,6 +83,8 @@
if (this.isNull)
return (SpanQuery) null;
+ // TODO: if (wrap.isNegative())
+
return new SpanWithinQuery(this.element.toQuery(), this.wrap.toQuery(), this.flag);
};
@@ -57,9 +97,8 @@
};
public boolean isNegative () {
- if (this.element.isNegative() || this.wrap.isNegative()) {
+ if (this.element.isNegative())
return true;
- };
return false;
};
};
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 3619d79..6de0cdd 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -4,6 +4,8 @@
# Spans:
#log4j.logger.de.ids_mannheim.korap.query.spans.ElementSpans = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.query.spans.AttributeSpans = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.query.spans.ElementAttributeSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.KorapTermSpan = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.WithinSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout