ElementAttributeQuery for Cosmas ELEM function
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
new file mode 100644
index 0000000..d43f67d
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
@@ -0,0 +1,48 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
+
+import de.ids_mannheim.korap.query.spans.AttributeSpans;
+
+public class SpanAttributeQuery extends SimpleSpanQuery{
+
+ public SpanAttributeQuery(SpanTermQuery firstClause, boolean collectPayloads) {
+ super(firstClause, collectPayloads);
+ }
+
+ @Override
+ public SimpleSpanQuery clone() {
+ SpanAttributeQuery sq = new SpanAttributeQuery(
+ (SpanTermQuery) this.firstClause.clone(),
+ this.collectPayloads);
+ sq.setBoost(getBoost());
+ return sq;
+ }
+
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return new AttributeSpans(this, context, acceptDocs, termContexts);
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanAttribute(");
+ sb.append(firstClause.toString(field));
+ sb.append(")");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
new file mode 100644
index 0000000..8b74661
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
@@ -0,0 +1,47 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.spans.ElementAttributeSpans;
+
+public class SpanElementAttributeQuery extends SimpleSpanQuery{
+
+ public SpanElementAttributeQuery(SpanElementQuery firstClause,
+ SpanAttributeQuery secondClause, boolean collectPayloads) {
+ super(firstClause, secondClause, collectPayloads);
+ }
+
+ @Override
+ public SimpleSpanQuery clone() {
+ SpanElementAttributeQuery sq = new SpanElementAttributeQuery(
+ (SpanElementQuery) firstClause.clone(),
+ (SpanAttributeQuery) secondClause.clone(),
+ collectPayloads);
+ return null;
+ }
+
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return new ElementAttributeSpans(this, context, acceptDocs, termContexts);
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanElementAttribute");
+ sb.append("(");
+ sb.append(firstClause.toString(field));
+ sb.append(", ");
+ sb.append(secondClause.toString(field));
+ sb.append(")");
+ return sb.toString();
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
index 7e4ee9e..15177c6 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
@@ -9,6 +9,7 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.spans.Spans;
@@ -19,8 +20,8 @@
import java.util.Map;
import java.util.Set;
-/**
- * @author Nils Diewald
+/** Extend element with attribute option
+ * @author Nils Diewald, Margaretha
*/
/** Matches spans wrapped by an element. */
@@ -28,6 +29,7 @@
protected Term element;
private String elementStr;
private String field;
+ private boolean attribute = false;
/** Constructor. */
public SpanElementQuery (String field, String term) {
@@ -36,6 +38,11 @@
this.elementStr = term;
this.element = new Term(field, sb.append(term).toString());
};
+
+ public SpanElementQuery(String field, String term, boolean attribute){
+ this(field, term);
+ this.attribute = attribute;
+ }
/** Return the element whose spans are matched. */
public Term getElement() { return element; };
@@ -119,17 +126,17 @@
termsEnum.seekExact(element.bytes(), state);
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
-
- if (postings != null)
+
+ if (postings != null){
return new ElementSpans(postings, element);
-
+ }
// element does exist, but has no positions
throw new IllegalStateException("field \"" + element.field() + "\" was indexed " +
"without position data; cannot run " +
"SpanElementQuery (element=" + element.text() + ")");
};
- public String getElementStr () {
+ public String getElementStr () {
return elementStr;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
index e25e5fe..97ecd0a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
@@ -13,6 +13,12 @@
import de.ids_mannheim.korap.query.spans.RepetitionSpans;
+/** SpanRepetitionQuery means that the given query can appears
+ * multiple times specified by the minimum and the maximum number
+ * of repetitions parameters.
+ *
+ * @author margaretha
+ * */
public class SpanRepetitionQuery extends SimpleSpanQuery{
private int min, max;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
new file mode 100644
index 0000000..52439a6
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -0,0 +1,165 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import de.ids_mannheim.korap.query.SimpleSpanQuery;
+import de.ids_mannheim.korap.query.SpanAttributeQuery;
+
+/** Span enumeration of attributes which are treated as a normal term with
+ * special payload assignment referring to to which element it belongs.
+ * The class is basically a wrapper of the TermSpan with additional
+ * functionality regarding the element reference. Element reference is
+ * annotated ascendingly starting from the left side.
+ *
+ * The enumeration is ordered firstly by the start position of the attribute
+ * and secondly by the element reference descendingly with respect to the
+ * nature of the order of the element enumeration.
+ *
+ * @author margaretha
+ * */
+public class AttributeSpans extends SimpleSpans{
+
+ private List<CandidateAttributeSpan> candidateList;
+ private int currentDoc, currentPosition;
+ public short elementRef;
+
+ protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
+
+ public AttributeSpans(SpanAttributeQuery simpleSpanQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(simpleSpanQuery, context, acceptDocs, termContexts);
+ candidateList = new ArrayList<>();
+ hasMoreSpans = firstSpans.next();
+ if (hasMoreSpans) {
+ currentDoc = firstSpans.doc();
+ currentPosition = firstSpans.start();
+ }
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration=false;
+ return advance();
+ }
+
+ private boolean advance() throws IOException {
+
+ while(hasMoreSpans || !candidateList.isEmpty()){
+ if (!candidateList.isEmpty()){
+ // set AttributeSpan from
+ CandidateAttributeSpan cs = candidateList.get(0);
+ this.matchDocNumber = cs.getDoc();
+ this.matchStartPosition = cs.getStart();
+ this.matchEndPosition = cs.getEnd();
+ this.setElementRef(cs.getElementRef());
+ candidateList.remove(0);
+ return true;
+ }
+ else{
+ logger.info("Setting candidate list");
+ setCandidateList();
+ for (CandidateAttributeSpan cs: candidateList){
+ logger.info("cs ref "+cs.getElementRef());
+ }
+ currentDoc = firstSpans.doc();
+ currentPosition = firstSpans.start();
+ }
+ }
+ return false;
+ }
+
+ private void setCandidateList() throws IOException {
+
+ while (hasMoreSpans && firstSpans.doc() == currentDoc &&
+ firstSpans.start() == currentPosition){
+
+ short elementRef = retrieveElementRef(firstSpans);
+ logger.info("ElementRef: "+elementRef);
+ candidateList.add(new CandidateAttributeSpan(firstSpans,elementRef));
+ hasMoreSpans = firstSpans.next();
+ }
+
+ Collections.sort(candidateList);
+ Collections.reverse(candidateList);
+ }
+
+ private short retrieveElementRef(Spans firstSpans) throws IOException {
+ List<byte[]> payload = (List<byte[]>) firstSpans.getPayload();
+ long s = System.nanoTime();
+ ByteBuffer wrapper = ByteBuffer.wrap(payload.get(0));
+ short num = wrapper.getShort();
+ long e = System.nanoTime();
+ logger.info("Bytebuffer runtime "+ (e-s));
+ return num;
+ }
+
+ public short getElementRef(){
+ return this.elementRef;
+ }
+
+ public void setElementRef(short elementRef) {
+ this.elementRef = elementRef;
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)){
+ if (!firstSpans.skipTo(target)){
+ candidateList.clear();
+ return false;
+ }
+ }
+ setCandidateList();
+ matchPayload.clear();
+ isStartEnumeration=false;
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return firstSpans.cost();
+ }
+
+
+ class CandidateAttributeSpan extends CandidateSpan
+ implements Comparable<CandidateAttributeSpan>{
+
+ private short elementRef;
+
+ public CandidateAttributeSpan(Spans span, short elementRef)
+ throws IOException {
+ super(span);
+ setElementRef(elementRef);
+ }
+
+ public void setElementRef(short elementRef) {
+ this.elementRef = elementRef;
+ }
+ public short getElementRef() {
+ return elementRef;
+ }
+
+ @Override
+ public int compareTo(CandidateAttributeSpan o) {
+ if (this.elementRef == o.elementRef)
+ return 0;
+ else if (this.elementRef > o.elementRef )
+ return 1;
+ return -1;
+ }
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
new file mode 100644
index 0000000..d2360ac
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
@@ -0,0 +1,97 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.util.Bits;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import de.ids_mannheim.korap.query.SpanElementAttributeQuery;
+
+/** A wrapper matching the element and attribute spans. Specifically searching
+ * the elements to which a certain attribute belongs to.
+ *
+ * */
+public class ElementAttributeSpans extends SimpleSpans{
+
+ ElementSpans elements;
+ AttributeSpans attributes;
+
+ protected Logger logger = LoggerFactory.getLogger(ElementAttributeSpans.class);
+
+ public ElementAttributeSpans(SpanElementAttributeQuery simpleSpanQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(simpleSpanQuery, context, acceptDocs, termContexts);
+ elements = (ElementSpans) firstSpans;
+ attributes = (AttributeSpans) secondSpans;
+ hasMoreSpans = firstSpans.next() & secondSpans.next();
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration=false;
+ return advance();
+ }
+
+ private boolean advance() throws IOException {
+
+ while (hasMoreSpans && ensureSamePosition(elements,attributes)){
+ logger.info("element: " + elements.start() + ","+ elements.end() +" ref:"+elements.getElementRef());
+ logger.info("attribute {} ref:{}", attributes.start(), attributes.getElementRef());
+
+ if (elements.getElementRef() == attributes.getElementRef()){
+ this.matchDocNumber = elements.doc();
+ this.matchStartPosition = elements.start();
+ this.matchEndPosition = elements.end();
+ this.matchPayload = elements.getPayload();
+ hasMoreSpans = attributes.next();
+ return true;
+ }
+
+ if (elements.getElementRef() < attributes.getElementRef())
+ hasMoreSpans = attributes.next();
+ else hasMoreSpans = elements.next();
+ }
+
+ return false;
+ }
+
+ private boolean ensureSamePosition(ElementSpans elements,
+ AttributeSpans attributes) throws IOException {
+
+ while (hasMoreSpans && ensureSameDoc(elements, attributes)){
+ if (attributes.start() == elements.start())
+ return true;
+ else if (attributes.start() > elements.start())
+ hasMoreSpans = elements.next();
+ else
+ hasMoreSpans= attributes.next();
+ }
+
+ return false;
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (attributes.doc() < target)){
+ if (!attributes.skipTo(target)){
+ return false;
+ }
+ }
+ matchPayload.clear();
+ isStartEnumeration=false;
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return elements.cost() + attributes.cost();
+ }
+
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 98bcae5..51e538e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -45,9 +45,9 @@
private final static Logger log = LoggerFactory.getLogger(ElementSpans.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+ public static final boolean DEBUG = true;
-
+
/**
* The constructor.
*/
@@ -68,7 +68,6 @@
this.temp = new KorapTermSpan();
};
-
// only for EmptyElementSpans (below)
public ElementSpans() {
this.term = null;
@@ -282,20 +281,77 @@
@Override
public int end() {
- if (this.current.end >= 0)
- return this.current.end;
-
- try {
- this.current.end = this.getPayloadEndPosition();
- }
- catch (Exception e) {
- this.current.end = this.current.start;
- };
- return this.current.end;
+ if (!this.current.isPayloadRead){
+ try {
+ readPayload();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ return this.current.end;
};
+ public short getElementRef() throws IOException{
+ if (!this.current.isPayloadRead){
+ readPayload();
+ }
+ return this.current.elementRef;
+ }
- @Override
+ private void readPayload() throws IOException {
+
+ this.current.clearPayload();
+ BytesRef payload = postings.getPayload();
+
+ if (payload != null) {
+ //System.out.println(payload.bytes.length);
+
+ // Copy some payloads like start character and end character
+ this.current.payload.put(payload.bytes, payload.offset, 8);
+ // Copy rest of payloads after the end position and elementref
+ this.current.payload.put(payload.bytes, payload.offset + 12, payload.length - 12);
+
+ this.current.end = readEndPostion(payload);
+ this.current.elementRef = readElementRef(payload);
+ }
+ else {
+ this.current.end = this.current.start;
+ this.current.elementRef = -1;
+ };
+
+ this.current.isPayloadRead = true;
+
+ }
+
+ private short readElementRef(BytesRef payload) {
+ byte[] b = new byte[2];
+ System.arraycopy(payload.bytes, payload.offset + 12, b, 0, 2);
+ ByteBuffer wrapper = ByteBuffer.wrap(b);
+ return wrapper.getShort();
+ }
+
+
+
+ private int readEndPostion(BytesRef payload) {
+
+ this.payloadByte = new byte[4];
+ // Copy end position integer to payloadByte
+ System.arraycopy(payload.bytes, payload.offset + 8, this.payloadByte, 0, 4);
+
+ bb.clear();
+ int t = bb.wrap(payloadByte).getInt();
+
+ if (DEBUG)
+ log.trace("Get Endposition and payload: {}-{} with end position {} in doc {}",
+ this.current.payload.getInt(0),
+ this.current.payload.getInt(4),
+ t,
+ this.current.doc);
+
+ return t;
+ }
+
+ @Override
public long cost() {
// ???
return this.postings.cost();
@@ -305,8 +361,8 @@
@Override
public Collection<byte[]> getPayload() throws IOException {
byte[] offsetCharacters = new byte[8];
- if (this.current.end <= 0)
- this.getPayloadEndPosition();
+ if (!this.current.isPayloadRead)
+ readPayload();
System.arraycopy(this.current.payload.array(), 0, offsetCharacters, 0, 8);
@@ -335,9 +391,9 @@
private void setToCurrent (int debugNumber) throws IOException {
this.current.start = this.postings.nextPosition();
-
// This will directly save stored payloads
- this.current.end = this.getPayloadEndPosition();
+ //this.current.end = this.getPayloadEndPosition();
+ readPayload();
if (DEBUG)
log.trace(
@@ -355,59 +411,6 @@
};
- private int getPayloadEndPosition () {
- try {
- BytesRef payload = postings.getPayload();
-
- this.current.clearPayload();
-
- if (payload != null) {
-
- this.payloadByte = new byte[4];
-
- // Copy some payloads like start character and end character
- this.current.payload.put(payload.bytes, payload.offset, 8);
- this.current.payload.put(payload.bytes, payload.offset + 12, payload.length - 12);
-
- // Copy end position integer to payloadByte
- System.arraycopy(payload.bytes, payload.offset + 8, this.payloadByte, 0, 4);
- }
-
- else {
- this.payloadByte = null;
- };
-
- // Todo: REWRITE!
- if (this.payloadByte != null) {
-
- // Todo: This is weird!
-
- bb.clear();
- int t = bb.wrap(payloadByte).getInt();
-
-
- if (DEBUG)
- log.trace("Get Endposition and payload: {}-{} with end position {} in doc {}",
- this.current.payload.getInt(0),
- this.current.payload.getInt(4),
- t,
- this.current.doc);
-
- return t;
- }
- else if (DEBUG) {
- log.trace("Get Endposition and payload: None found");
- };
- }
- catch (IOException e) {
- if (DEBUG)
- log.trace("IOException {}", e);
- };
-
- return -1;
- };
-
-
@Override
public boolean isPayloadAvailable() throws IOException {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
index 098573c..3905996 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
@@ -7,7 +7,9 @@
start = -1,
end = -1,
doc = -1;
-
+
+ public short elementRef = -1;
+
public KorapSpan () {
initPayload();
};
@@ -55,7 +57,15 @@
.append('(').append(this.doc).append(')')
.append(']')
.toString();
- };
+ }
+
+ public short getElementRef() {
+ return elementRef;
+ }
+
+ public void setElementRef(short elementRef) {
+ this.elementRef = elementRef;
+ };
// equals und hashcode implementieren
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
index a12b565..184ee4b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
@@ -12,7 +12,8 @@
public class KorapTermSpan extends KorapSpan {
public ByteBuffer payload;
-
+ public boolean isPayloadRead = false;
+
private final Logger log = LoggerFactory.getLogger(ElementSpans.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
@@ -23,6 +24,8 @@
span.start = this.start;
span.end = this.end;
span.doc = this.doc;
+ span.isPayloadRead = this.isPayloadRead;
+ span.elementRef = this.elementRef;
if (this.payload != null) {
this.payload.rewind();
@@ -94,11 +97,16 @@
public int endChar () {
return this.payload.getInt(4);
};
-
+
+ public short elementRef(){
+ return this.payload.getShort(8);
+ }
+
public void reset () {
this.clearPayload();
this.start = -1;
this.end = -1;
this.doc = -1;
+ this.isPayloadRead = false;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index 350d00e..ebc14a1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -30,7 +30,7 @@
protected Spans firstSpans, secondSpans;
protected int matchDocNumber, matchStartPosition, matchEndPosition;
- protected List<byte[]> matchPayload;
+ protected Collection<byte[]> matchPayload;
public SimpleSpans (SimpleSpanQuery simpleSpanQuery,
AtomicReaderContext context,
@@ -43,7 +43,7 @@
matchDocNumber= -1;
matchStartPosition= -1;
matchEndPosition= -1;
- matchPayload = new LinkedList<byte[]>();
+ matchPayload = new LinkedList<byte[]>(); // why linkedlist?
// Get the enumeration of the two spans to match
firstSpans = simpleSpanQuery.getFirstClause().
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
new file mode 100644
index 0000000..cae40aa
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
@@ -0,0 +1,71 @@
+package de.ids_mannheim.korap.index;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanAttributeQuery;
+import de.ids_mannheim.korap.query.SpanElementAttributeQuery;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+
+public class TestAttributeIndex {
+
+ private KorapIndex ki;
+ private KorapResult kr;
+ private FieldDocument fd;
+
+ public TestAttributeIndex() throws IOException {
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc0());
+// ki.addDoc(createFieldDoc1());
+// ki.addDoc(createFieldDoc2());
+ ki.commit();
+ }
+
+ private FieldDocument createFieldDoc0(){
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-0");
+ fd.addTV("base",
+ "bcbabd",
+ "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|<>:div#0-2$<i>2<s>3|@:class=header$<s>2|@:class=header$<s>3]" +
+ "[(1-2)s:c|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<s>1]" +
+ "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<s>1]" +
+ "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<s>1]" +
+ "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<s>2]" +
+ "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>2|<>:div#5-6$<i>6<s>1|@:class=header$<s>1|@:class=header$<s>2]");
+ return fd;
+ }
+
+
+ @Test
+ public void testCase1() {
+ SpanAttributeQuery saq = new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base","@:class=header")),
+ true);
+
+ SpanQuery sq = new SpanElementAttributeQuery(
+ new SpanElementQuery("base", "div"),
+ saq, true);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals(4, kr.getTotalResults());
+ assertEquals(0,kr.getMatch(0).getStartPos());
+ assertEquals(2,kr.getMatch(0).getEndPos());
+ assertEquals(0,kr.getMatch(1).getStartPos());
+ assertEquals(3,kr.getMatch(1).getEndPos());
+ assertEquals(3,kr.getMatch(2).getStartPos());
+ assertEquals(5,kr.getMatch(2).getEndPos());
+ assertEquals(5,kr.getMatch(3).getStartPos());
+ assertEquals(6,kr.getMatch(3).getEndPos());
+ }
+
+
+}