Removed KorapSpans
diff --git a/Changes b/Changes
index 38fca92..edeae96 100644
--- a/Changes
+++ b/Changes
@@ -9,7 +9,8 @@
Warning: This is a breaking change!
- [performance] Payloads in ElementSpans can now be lazily loaded (diewald)
- [cleanup] Renamed /filter to /collection,
- merge KorapHTML and KorapString (diewald)
+ merge KorapHTML and KorapString,
+ removed KorapSpan, KorapTermSpan and KorapLongSpan (diewald)
0.49.3 2015-02-03
- [documentation] Improved documentation for API classes (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index 4b087cd..411a50b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
@@ -17,11 +17,16 @@
import org.slf4j.LoggerFactory;
/**
+ * Add a payload to the span with an identification number (a class)
+ * And the start and end position of the span, so this information
+ * can bubble up for later processing (similar to captures in regular
+ * expression).
+ *
* @author diewald
*/
public class ClassSpans extends Spans {
- private List<byte[]> highlightedPayload;
+ private List<byte[]> classedPayload;
private final Spans spans;
private byte number;
private SpanQuery operand;
@@ -30,114 +35,128 @@
private ByteBuffer bb = ByteBuffer.allocate(9);
private final static Logger log = LoggerFactory.getLogger(ClassSpans.class);
+
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
+ /**
+ * Construct a new ClassSpans object.
+ *
+ * @param operand An arbitrary nested {@link SpanQuery}.
+ * @param context The {@link AtomicReaderContext}.
+ * @param acceptDocs Bit vector representing the documents
+ * to be searched in.
+ * @param termContexts A map managing {@link TermState TermStates}.
+ * @param number The identifying class number.
+ */
public ClassSpans (SpanQuery operand,
- AtomicReaderContext context,
- Bits acceptDocs,
- Map<Term,TermContext> termContexts,
- byte number) throws IOException {
- spans = operand.getSpans(context, acceptDocs, termContexts);
- this.number = number;
- this.operand = operand;
- this.highlightedPayload = new ArrayList<byte[]>(6);
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts,
+ byte number) throws IOException {
+ spans = operand.getSpans(context, acceptDocs, termContexts);
+
+ // The number of the class
+ this.number = number;
+
+ // The current operand
+ this.operand = operand;
+
+ // The highlighted payload
+ this.classedPayload = new ArrayList<byte[]>(3);
};
+
@Override
- public Collection<byte[]> getPayload() throws IOException {
- return highlightedPayload;
+ public Collection<byte[]> getPayload () throws IOException {
+ return classedPayload;
};
+
@Override
- public boolean isPayloadAvailable() {
- return true;
+ public boolean isPayloadAvailable () {
+ // We set payloads here - so it's always true
+ return true;
};
- public int doc() { return spans.doc(); }
- // inherit javadocs
@Override
- public int start() { return spans.start(); }
+ public int doc () {
+ return spans.doc();
+ };
- // inherit javadocs
+
@Override
- public int end() { return spans.end(); }
+ public int start () {
+ return spans.start();
+ };
- // inherit javadocs
@Override
- public boolean next() throws IOException {
- if (DEBUG)
- log.trace("Forward next");
+ public int end () {
+ return spans.end();
+ };
- if (spans.next()) {
- addClassPayload();
- return true;
- };
- hasmorespans = false;
- return false;
+
+ @Override
+ public boolean next () throws IOException {
+ if (DEBUG) log.trace("Forward next");
+
+ if (spans.next())
+ return this.addClassPayload();
+
+ hasmorespans = false;
+ return false;
};
- private void addClassPayload() throws IOException {
+ private boolean addClassPayload () throws IOException {
hasmorespans = true;
- highlightedPayload.clear();
+ classedPayload.clear();
+ // Subquery has payloads
if (spans.isPayloadAvailable()) {
- highlightedPayload.addAll(spans.getPayload());
- if (DEBUG)
- log.trace("Found payload");
+ classedPayload.addAll(spans.getPayload());
+ if (DEBUG) log.trace("Found payload in nested SpanQuery");
};
- if (DEBUG)
- log.trace("Start to create class {} with span {} - {}",
- number,
- spans.start(),
- spans.end());
+ if (DEBUG) {
+ log.trace(
+ "Wrap class {} around span {} - {}",
+ number,
+ spans.start(),
+ spans.end()
+ );
+ };
// Todo: Better allocate using a Factory!
-
- //private
bb.clear();
bb.putInt(spans.start()).putInt(spans.end()).put(number);
- // System.out.println(number+":"+spans.start()+","+spans.end());
- /*
- if (DEBUG)
- log.trace("Results in {} with {}", bb.toString(), bb.array());
- */
- // Add highlight information as byte after offsets
- highlightedPayload.add(bb.array());
- /*
- if (DEBUG) {
- bb.rewind();
- log.trace("That was a class from {}-{} of class {}", bb.getInt(), bb.getInt(), bb.get());
- };
- */
-
- }
- // inherit javadocs
+ // Add highlight information as byte array
+ classedPayload.add(bb.array());
+ return true;
+ };
+
+
@Override
- public boolean skipTo(int target) throws IOException {
- highlightedPayload.clear();
- if (hasmorespans && spans.doc() < target &&
- spans.skipTo(target)){
- addClassPayload();
- return true;
- }
- return false;
+ public boolean skipTo (int target) throws IOException {
+ classedPayload.clear();
+ if (hasmorespans && spans.doc() < target && spans.skipTo(target))
+ return this.addClassPayload();
+ return false;
};
+
@Override
- public String toString() {
- return getClass().getName() + "(" + this.operand.toString() + ")@" +
- (doc() + ":" + start() + "-" + end());
+ public String toString () {
+ return getClass().getName() + "(" + this.operand.toString() + ")@" +
+ (doc() + ":" + start() + "-" + end());
};
@Override
public long cost() {
- return spans.cost();
- }
+ return spans.cost();
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index fc2408e..cc9d064 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -20,8 +20,6 @@
/**
* Enumeration of special spans which length is stored in their payload,
* representing elements such as phrases, sentences and paragraphs.
- *
- * Payloads are
*
* @author margaretha
* @author diewald
@@ -33,10 +31,11 @@
/**
* Constructs ElementSpans for the given {@link SpanElementQuery}.
*
- * @param spanElementQuery a SpanElementQuery
- * @param context
- * @param acceptDocs
- * @param termContexts
+ * @param spanElementQuery A {@link SpanElementQuery}.
+ * @param context The {@link AtomicReaderContext}.
+ * @param acceptDocs Bit vector representing the documents
+ * to be searched in.
+ * @param termContexts A map managing {@link TermState TermStates}.
* @throws IOException
*/
public ElementSpans(SpanElementQuery spanElementQuery,
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapLongSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapLongSpan.java
deleted file mode 100644
index 26a1367..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapLongSpan.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package de.ids_mannheim.korap.query.spans;
-
-import de.ids_mannheim.korap.query.spans.KorapSpan;
-
-import java.util.Collection;
-
-public class KorapLongSpan extends KorapSpan {
- public Collection<byte[]> payload;
-
- @Override
- public Object clone() {
- KorapLongSpan span = new KorapLongSpan();
- span.start = this.start;
- span.end = this.end;
- span.doc = this.doc;
- span.payload.addAll(this.payload);
- return span;
- };
-
- public KorapSpan copyFrom (KorapLongSpan o) {
- super.copyFrom((KorapSpan) o);
- this.payload.addAll(o.payload);
- return this;
- };
-
- @Override
- public void clearPayload () {
- if (this.payload != null)
- this.payload.clear();
- };
-
- @Override
- public void initPayload () {
- };
-
- @Override
- public String toString () {
- StringBuilder sb = new StringBuilder("[");
- return sb.append(this.start).append('-')
- .append(this.end)
- .append('(').append(this.doc).append(')')
- .append(']')
- .toString();
- };
-
-};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
deleted file mode 100644
index 3905996..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
+++ /dev/null
@@ -1,71 +0,0 @@
-package de.ids_mannheim.korap.query.spans;
-
-import java.lang.StringBuilder;
-
-public abstract class KorapSpan implements Comparable<KorapSpan>, Cloneable {
- public int
- start = -1,
- end = -1,
- doc = -1;
-
- public short elementRef = -1;
-
- public KorapSpan () {
- initPayload();
- };
-
- public void clear () {
- this.start = -1;
- this.end = -1;
- this.doc = -1;
- clearPayload();
- };
-
- public KorapSpan copyFrom (KorapSpan o) {
- this.start = o.start;
- this.end = o.end;
- this.doc = o.doc;
- this.clearPayload();
- return this;
- };
-
- public abstract void clearPayload ();
- public abstract void initPayload ();
-
- @Override
- public int compareTo (KorapSpan o) {
- /* optimizable for short numbers to return o.end - this.end */
- if (this.doc < o.doc) {
- return -1;
- }
- else if (this.doc == o.doc) {
- if (this.start < o.start) {
- return -1;
- }
- else if (this.start == o.start) {
- if (this.end < o.end)
- return -1;
- };
- };
- return 1;
- };
-
- public String toString () {
- StringBuilder sb = new StringBuilder("[");
- return sb.append(this.start).append('-')
- .append(this.end)
- .append('(').append(this.doc).append(')')
- .append(']')
- .toString();
- }
-
- public short getElementRef() {
- return elementRef;
- }
-
- public void setElementRef(short elementRef) {
- this.elementRef = elementRef;
- };
-
- // equals und hashcode implementieren
-};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
deleted file mode 100644
index 184ee4b..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
+++ /dev/null
@@ -1,112 +0,0 @@
-package de.ids_mannheim.korap.query.spans;
-
-import java.nio.ByteBuffer;
-import de.ids_mannheim.korap.query.spans.KorapSpan;
-
-// TODO: Store payloads in 12 byte instead of the complicated ByteBuffer stuff!
-
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class KorapTermSpan extends KorapSpan {
-
- public ByteBuffer payload;
- public boolean isPayloadRead = false;
-
- private final Logger log = LoggerFactory.getLogger(ElementSpans.class);
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
- @Override
- public Object clone() {
- KorapTermSpan span = new KorapTermSpan();
- span.start = this.start;
- span.end = this.end;
- span.doc = this.doc;
- span.isPayloadRead = this.isPayloadRead;
- span.elementRef = this.elementRef;
-
- if (this.payload != null) {
- this.payload.rewind();
- span.payload.put(this.payload);
-
- if (DEBUG) {
- log.trace("[TS] Clone payload {} to payload {} ...",
- this.payload.toString(),
- span.payload.toString());
- log.trace("[TS] ... from {}-{} to {}-{}",
- this.startChar(),
- this.endChar(),
- span.startChar(),
- span.endChar());
- };
- };
-
- return span;
- };
-
- public KorapSpan copyFrom (KorapTermSpan o) {
- this.start = o.start;
- this.end = o.end;
- this.doc = o.doc;
- this.payload.rewind();
- this.payload.put(o.payload.array());
- return this;
- };
-
- public KorapSpan shallowCopyFrom (KorapTermSpan o) {
- this.start = o.start;
- this.end = o.end;
- this.doc = o.doc;
- this.payload = o.payload;
- return this;
- };
-
-
- @Override
- public void clearPayload () {
- if (this.payload != null) {
- this.payload.clear();
- // this.payload.rewind();
- };
- };
-
- @Override
- public void initPayload () {
- this.payload = ByteBuffer.allocate(128);
- };
-
- @Override
- public String toString () {
- StringBuilder sb = new StringBuilder("[");
- return sb.append(this.start).append('-')
- .append(this.end)
- .append("#")
- .append(this.startChar()).append('-').append(this.endChar())
- .append('(').append(this.doc).append(')')
- .append('$').append(this.payload.toString())
- .append(']')
- .toString();
- };
-
- public int startChar () {
- return this.payload.getInt(0);
- };
-
- public int endChar () {
- return this.payload.getInt(4);
- };
-
- public short elementRef(){
- return this.payload.getShort(8);
- }
-
- public void reset () {
- this.clearPayload();
- this.start = -1;
- this.end = -1;
- this.doc = -1;
- this.isPayloadRead = false;
- };
-};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index 44045ef..cfd7694 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
@@ -21,7 +21,8 @@
*
* The implementation allows multiple matches at the same firstspan position.
*
- * @author margaretha, diewald
+ * @author margaretha
+ * @author diewald
*/
public class NextSpans extends SimpleSpans {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
index 219fcb5..dc68911 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
@@ -103,7 +103,7 @@
private boolean tryMatch = true;
// Two buffers for storing candidates
- private LinkedList<KorapLongSpan>
+ private LinkedList<WithinSpan>
spanStore1,
spanStore2;
@@ -114,7 +114,8 @@
* @param context The {@link AtomicReaderContext}.
* @param acceptDocs Bit vector representing the documents
* to be searched in.
- * @param termContexts A map managing {@link TermState}
+ * @param termContexts A map managing {@link TermState TermStates}.
+ * @param flag A byte flag indicating the positional condition of the sub spans.
*/
public WithinSpans (SpanWithinQuery spanWithinQuery,
AtomicReaderContext context,
@@ -143,8 +144,8 @@
this.flag = flag;
// SpanStores for backtracking
- this.spanStore1 = new LinkedList<KorapLongSpan>();
- this.spanStore2 = new LinkedList<KorapLongSpan>();
+ this.spanStore1 = new LinkedList<WithinSpan>();
+ this.spanStore2 = new LinkedList<WithinSpan>();
// kept for toString() only.
this.query = spanWithinQuery;
@@ -215,7 +216,7 @@
if (DEBUG)
log.trace("In the next embedded branch");
- KorapLongSpan current = null;
+ WithinSpan current = null;
// New - fetch until theres a span in the correct doc or bigger
while (!this.spanStore2.isEmpty()) {
@@ -342,7 +343,7 @@
if (!this.spanStore1.isEmpty()) {
if (DEBUG) {
log.trace("Move everything from SpanStore 1 to SpanStore 2:");
- for (KorapLongSpan i : this.spanStore1) {
+ for (WithinSpan i : this.spanStore1) {
log.trace(" | {}", i.toString());
};
};
@@ -350,13 +351,13 @@
// Move everything to spanStore2
this.spanStore2.addAll(
0,
- (LinkedList<KorapLongSpan>) this.spanStore1.clone()
+ (LinkedList<WithinSpan>) this.spanStore1.clone()
);
this.spanStore1.clear();
if (DEBUG) {
log.trace("SpanStore 2 now is:");
- for (KorapLongSpan i : this.spanStore2) {
+ for (WithinSpan i : this.spanStore2) {
log.trace(" | {}", i.toString());
};
};
@@ -800,16 +801,16 @@
};
- private KorapLongSpan _currentWrap () {
- KorapLongSpan _wrap = new KorapLongSpan();
+ private WithinSpan _currentWrap () {
+ WithinSpan _wrap = new WithinSpan();
_wrap.start = this.wrapStart != -1 ? this.wrapStart : this.wrapSpans.start();
_wrap.end = this.wrapEnd != -1 ? this.wrapEnd : this.wrapSpans.end();
_wrap.doc = this.wrapDoc != -1 ? this.wrapDoc : this.wrapSpans.doc();
return _wrap;
};
- private KorapLongSpan _currentEmbedded () {
- KorapLongSpan _embedded = new KorapLongSpan();
+ private WithinSpan _currentEmbedded () {
+ WithinSpan _embedded = new WithinSpan();
_embedded.start = this.embeddedStart != -1 ?
this.embeddedStart : this.embeddedSpans.start();
_embedded.end = this.embeddedEnd != -1 ?
@@ -892,7 +893,7 @@
private void storeEmbedded () throws IOException {
// Create a current copy
- KorapLongSpan embedded = new KorapLongSpan();
+ WithinSpan embedded = new WithinSpan();
embedded.start = this.embeddedStart != -1 ?
this.embeddedStart : this.embeddedSpans.start();
embedded.end = this.embeddedEnd != -1 ?
@@ -1102,4 +1103,84 @@
return getClass().getName() + "("+query.toString()+")@"+
(embeddedDoc <= 0?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
};
+
+
+ // This was formerly the default candidate span class,
+ // before it was refactored out
+ private class WithinSpan implements Comparable<WithinSpan>, Cloneable {
+ public int
+ start = -1,
+ end = -1,
+ doc = -1;
+
+ public Collection<byte[]> payload;
+
+ public short elementRef = -1;
+
+ public void clear () {
+ this.start = -1;
+ this.end = -1;
+ this.doc = -1;
+ clearPayload();
+ };
+
+ @Override
+ public int compareTo (WithinSpan o) {
+ /* optimizable for short numbers to return o.end - this.end */
+ if (this.doc < o.doc) {
+ return -1;
+ }
+ else if (this.doc == o.doc) {
+ if (this.start < o.start) {
+ return -1;
+ }
+ else if (this.start == o.start) {
+ if (this.end < o.end)
+ return -1;
+ };
+ };
+ return 1;
+ };
+
+ public short getElementRef() {
+ return elementRef;
+ }
+
+ public void setElementRef(short elementRef) {
+ this.elementRef = elementRef;
+ };
+
+ @Override
+ public Object clone() {
+ WithinSpan span = new WithinSpan();
+ span.start = this.start;
+ span.end = this.end;
+ span.doc = this.doc;
+ span.payload.addAll(this.payload);
+ return span;
+ };
+
+ public WithinSpan copyFrom (WithinSpan o) {
+ this.start = o.start;
+ this.end = o.end;
+ this.doc = o.doc;
+ // this.clearPayload();
+ this.payload.addAll(o.payload);
+ return this;
+ };
+
+ public void clearPayload () {
+ if (this.payload != null)
+ this.payload.clear();
+ };
+
+ public String toString () {
+ StringBuilder sb = new StringBuilder("[");
+ return sb.append(this.start).append('-')
+ .append(this.end)
+ .append('(').append(this.doc).append(')')
+ .append(']')
+ .toString();
+ };
+ };
};