ElementAttributeQuery with multiple attributes and negation,
update SimpleSpanQuery to accommodate a list queries as the second clause.
diff --git a/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
index 1e24d7f..3582ca0 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.query;
import java.io.IOException;
+import java.util.List;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
@@ -16,6 +17,7 @@
implements Cloneable{
protected SpanQuery firstClause, secondClause;
+ protected List<SpanQuery> clauseList;
private String field;
protected boolean collectPayloads;
@@ -28,13 +30,34 @@
public SimpleSpanQuery(SpanQuery firstClause, SpanQuery secondClause,
boolean collectPayloads) {
this(firstClause,collectPayloads);
- if (!secondClause.getField().equals(field)){
+ checkField(secondClause);
+ this.setSecondClause(secondClause);
+ }
+
+ public SimpleSpanQuery(SpanQuery firstClause, List<SpanQuery>
+ secondClauses, boolean collectPayloads) {
+ this(firstClause,collectPayloads);
+ for (SpanQuery secondClause : secondClauses){
+ checkField(secondClause);
+ }
+ this.setClauseList(secondClauses);
+ }
+
+ private void checkField(SpanQuery clause) {
+ if (!clause.getField().equals(field)){
throw new IllegalArgumentException(
"Clauses must have the same field.");
- }
- this.setSecondClause(secondClause);
- }
-
+ }
+ }
+
+ public List<SpanQuery> getClauseList() {
+ return clauseList;
+ }
+
+ public void setClauseList(List<SpanQuery> clauseList) {
+ this.clauseList = clauseList;
+ }
+
@Override
public String getField() {
return field;
@@ -68,8 +91,15 @@
@Override
public void extractTerms(Set<Term> terms) {
firstClause.extractTerms(terms);
- if (secondClause != null)
+ if (secondClause != null){
secondClause.extractTerms(terms);
+ }
+ else if (clauseList != null){
+ for (SpanQuery clause : clauseList){
+ clause.extractTerms(terms);
+ }
+ }
+
};
@Override
@@ -79,10 +109,26 @@
if (secondClause != null){
clone = updateClone(reader, clone, secondClause, 2);
}
+ else if (clauseList != null){
+ clone = updateClone(reader, clone, clauseList);
+ }
return (clone != null ? clone : this );
}
private SimpleSpanQuery updateClone(IndexReader reader, SimpleSpanQuery clone,
+ List<SpanQuery> spanQueries) throws IOException{
+
+ for (int i=0; i < spanQueries.size(); i++){
+ SpanQuery query = (SpanQuery) spanQueries.get(i).rewrite(reader);
+ if (!query.equals(spanQueries.get(i))) {
+ if (clone == null) clone = clone();
+ clone.getClauseList().set(i, query);
+ }
+ }
+ return clone;
+ }
+
+ private SimpleSpanQuery updateClone(IndexReader reader, SimpleSpanQuery clone,
SpanQuery sq, int clauseNumber) throws IOException{
SpanQuery query = (SpanQuery) sq.rewrite(reader);
if (!query.equals(sq)) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
index d43f67d..16b771c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
@@ -6,7 +6,6 @@
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
@@ -15,10 +14,17 @@
import de.ids_mannheim.korap.query.spans.AttributeSpans;
public class SpanAttributeQuery extends SimpleSpanQuery{
-
+
+ boolean isNegation;
+
public SpanAttributeQuery(SpanTermQuery firstClause, boolean collectPayloads) {
super(firstClause, collectPayloads);
}
+
+ public SpanAttributeQuery(SpanTermQuery firstClause, boolean isNegation, boolean collectPayloads) {
+ super(firstClause, collectPayloads);
+ this.isNegation = isNegation;
+ }
@Override
public SimpleSpanQuery clone() {
@@ -45,4 +51,12 @@
return sb.toString();
}
+ public boolean isNegation() {
+ return isNegation;
+ }
+
+ public void setNegation(boolean isNegation) {
+ this.isNegation = isNegation;
+ }
+
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
index 8b74661..d050682 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementAttributeQuery.java
@@ -1,11 +1,14 @@
package de.ids_mannheim.korap.query;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
@@ -13,18 +16,42 @@
public class SpanElementAttributeQuery extends SimpleSpanQuery{
+ boolean isMultipleAttributes;
+
public SpanElementAttributeQuery(SpanElementQuery firstClause,
SpanAttributeQuery secondClause, boolean collectPayloads) {
super(firstClause, secondClause, collectPayloads);
}
+
+ public SpanElementAttributeQuery(SpanElementQuery firstClause,
+ List<SpanQuery> secondClauses, boolean collectPayloads) {
+ super(firstClause, secondClauses, collectPayloads);
+ isMultipleAttributes = true;
+ }
@Override
public SimpleSpanQuery clone() {
- SpanElementAttributeQuery sq = new SpanElementAttributeQuery(
- (SpanElementQuery) firstClause.clone(),
- (SpanAttributeQuery) secondClause.clone(),
- collectPayloads);
- return null;
+ SpanElementAttributeQuery sq;
+ if (!isMultipleAttributes){
+ sq = new SpanElementAttributeQuery(
+ (SpanElementQuery) firstClause.clone(),
+ (SpanAttributeQuery) secondClause.clone(),
+ collectPayloads);
+ }
+ else {
+ List<SpanQuery> clauseList = new ArrayList<SpanQuery>();
+ SpanAttributeQuery saq;
+ for (SpanQuery q : this.clauseList ){
+ saq = (SpanAttributeQuery) q;
+ clauseList.add(saq.clone());
+ }
+
+ sq = new SpanElementAttributeQuery(
+ (SpanElementQuery) firstClause.clone(),
+ clauseList,
+ collectPayloads);
+ }
+ return sq;
}
@Override
@@ -40,7 +67,16 @@
sb.append("(");
sb.append(firstClause.toString(field));
sb.append(", ");
- sb.append(secondClause.toString(field));
+ if (isMultipleAttributes){
+ sb.append("[");
+ for (SpanQuery sq : clauseList){
+ sb.append(sq.toString(field));
+ }
+ sb.append("]");
+ }
+ else {
+ sb.append(secondClause.toString(field));
+ }
sb.append(")");
return sb.toString();
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
index 43cff3f..5cc4d28 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
@@ -18,7 +18,6 @@
* No repetition of constraints of the same type is allowed. For example,
* there must only exactly one constraint for word/token-based distance.
*
- * Warning: Exclusion constraint is not suitable yet!
* @author margaretha
* */
public class SpanMultipleDistanceQuery extends SimpleSpanQuery{
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index 7f57290..4f468ef 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -33,7 +33,8 @@
private List<CandidateAttributeSpan> candidateList;
private int currentDoc, currentPosition;
- public short elementRef;
+ private short elementRef;
+ private boolean isFinish;
protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
@@ -114,6 +115,14 @@
this.elementRef = elementRef;
}
+ public boolean isFinish() {
+ return isFinish;
+ }
+
+ public void setFinish(boolean isFinish) {
+ this.isFinish = isFinish;
+ }
+
@Override
public boolean skipTo(int target) throws IOException {
if (hasMoreSpans && (firstSpans.doc() < target)){
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
index f6b72da..1c9eed8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
@@ -1,25 +1,35 @@
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.Bits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import de.ids_mannheim.korap.query.SpanAttributeQuery;
import de.ids_mannheim.korap.query.SpanElementAttributeQuery;
-/** A wrapper matching the element and attribute spans. Specifically searching
- * the elements to which a certain attribute belongs to.
+/** Span enumeration of elements that have some attribute and/or do <em>not</em>
+ * have some attributes. This class handles <em>and</em> operation on attributes.
*
+ * Use SpanOrQuery to perform <em>or</em> operation on attributes, i.e. choose
+ * between two elements with some attribute constraints. Note that the attribute
+ * constraints have to be in Conjunctive Normal Form (CNF).
+ *
+ * @author margaretha
* */
public class ElementAttributeSpans extends SimpleSpans{
- ElementSpans elements;
- AttributeSpans attributes;
+ private ElementSpans elements;
+ private List<AttributeSpans> attributeList;
+ private List<AttributeSpans> notAttributeList;
protected Logger logger = LoggerFactory.getLogger(ElementAttributeSpans.class);
@@ -28,9 +38,25 @@
Map<Term, TermContext> termContexts) throws IOException {
super(simpleSpanQuery, context, acceptDocs, termContexts);
elements = (ElementSpans) firstSpans;
- attributes = (AttributeSpans) secondSpans;
elements.isElementRef = true; // dummy setting enabling reading elementRef
- hasMoreSpans = elements.next() & attributes.next();
+ hasMoreSpans = elements.next();
+
+ attributeList = new ArrayList<AttributeSpans>();
+ notAttributeList = new ArrayList<AttributeSpans>();
+
+ List<SpanQuery> sqs = simpleSpanQuery.getClauseList();
+ AttributeSpans as;
+ for (SpanQuery sq: sqs){
+ as = (AttributeSpans) sq.getSpans(context, acceptDocs, termContexts);
+ if (((SpanAttributeQuery) sq).isNegation()){
+ notAttributeList.add(as);
+ as.next();
+ }
+ else {
+ attributeList.add(as);
+ hasMoreSpans &= as.next();
+ }
+ }
}
@Override
@@ -38,42 +64,121 @@
isStartEnumeration=false;
return advance();
}
-
+
private boolean advance() throws IOException {
- while (hasMoreSpans && ensureSamePosition(elements,attributes)){
-
+ while (hasMoreSpans && computeElementPosition()){
logger.info("element: " + elements.start() + ","+ elements.end() +" ref:"+elements.getElementRef());
- if (elements.getElementRef() < 1){
+ if (checkElementRef() && checkNotElementRef()){
+ this.matchDocNumber = elements.doc();
+ this.matchStartPosition = elements.start();
+ this.matchEndPosition = elements.end();
+ this.matchPayload = elements.getPayload();
+ hasMoreSpans = attributeList.get(0).next();
+ logger.info("MATCH "+matchDocNumber);
+
+ hasMoreSpans = elements.next();
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean checkElementRef() throws IOException{
+
+ for (AttributeSpans attribute: attributeList){
+ if (elements.getElementRef() != attribute.getElementRef()){
+ logger.info("attribute ref doesn't match");
+ if (elements.getElementRef() < attribute.getElementRef())
+ hasMoreSpans = attribute.next();
+ else {
+ hasMoreSpans = elements.next();
+ }
+
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+ private boolean checkNotElementRef() throws IOException{
+ for (AttributeSpans notAttribute: notAttributeList){
+ if (elements.start() == notAttribute.start() &&
+ elements.getElementRef() == notAttribute.getElementRef()){
+ logger.info("not attribute ref exists");
+ hasMoreSpans = elements.next();
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+ private boolean computeElementPosition() throws IOException {
+
+ while (hasMoreSpans){
+
+ if (elements.getElementRef() < 1){ // the element does not have an attribute
elements.isElementRef = true; // dummy setting enabling reading elementRef
hasMoreSpans = elements.next();
logger.info("skip");
continue;
}
- logger.info("attribute {} ref:{}", attributes.start(), attributes.getElementRef());
-
- if (elements.getElementRef() == attributes.getElementRef()){
- this.matchDocNumber = elements.doc();
- this.matchStartPosition = elements.start();
- this.matchEndPosition = elements.end();
- this.matchPayload = elements.getPayload();
- hasMoreSpans = attributes.next();
+ if (checkAttributeListPosition() &&
+ checkNotAttributeListPosition()){
+ logger.info("element is found: "+ elements.start());
return true;
- }
-
- if (elements.getElementRef() < attributes.getElementRef())
- hasMoreSpans = attributes.next();
- else {
- elements.isElementRef = true; // dummy setting enabling reading elementRef
- hasMoreSpans = elements.next();
- }
- }
+ }
+ }
return false;
}
-
+
+ private boolean checkNotAttributeListPosition() throws IOException{
+
+ for (AttributeSpans a : notAttributeList){
+ // advance the doc# of not AttributeSpans
+ logger.info("a "+a.start());
+ while (!a.isFinish() && a.doc() <= elements.doc()){
+
+ if (a.doc() == elements.doc() &&
+ a.start() >= elements.start())
+ break;
+
+ if (!a.next()) a.setFinish(true);
+ }
+ }
+
+ return true;
+ }
+
+ private boolean checkAttributeListPosition() throws IOException{
+ int currentPosition = elements.start();
+ boolean isSame = true;
+ boolean isFirst = true;
+
+ for (AttributeSpans a : attributeList){
+ if(!ensureSamePosition(elements, a)) return false;
+
+ logger.info("pos:" + elements.start());
+ if (isFirst){
+ isFirst = false;
+ currentPosition = elements.start();
+ }
+ else if (currentPosition != elements.start()){
+ currentPosition = elements.start();
+ isSame = false;
+
+ }
+ }
+ logger.info("same pos: "+isSame+ ", pos "+elements.start());
+ return isSame;
+ }
+
+
private boolean ensureSamePosition(ElementSpans elements,
AttributeSpans attributes) throws IOException {
@@ -91,19 +196,26 @@
@Override
public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (attributes.doc() < target)){
- if (!attributes.skipTo(target)){
+ if (hasMoreSpans && (elements.doc() < target)){
+ if (!elements.skipTo(target)){
return false;
}
}
- matchPayload.clear();
isStartEnumeration=false;
return advance();
}
@Override
public long cost() {
- return elements.cost() + attributes.cost();
+
+ long cost = 0;
+ for (AttributeSpans as: attributeList){
+ cost += as.cost();
+ }
+ for (AttributeSpans as: notAttributeList){
+ cost += as.cost();
+ }
+ return elements.cost() + cost;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
index 51ec7bd..997ab09 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
@@ -16,11 +16,11 @@
import de.ids_mannheim.korap.query.SpanDistanceQuery;
import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
-/** Span enumeration of matches whose two sub-spans has exactly the same
+/** Span enumeration of matches whose two sub-spans have exactly the same
* first and second sub-sub-spans. This class basically filters the span
* matches of its child spans.
*
- * TODO: This doesn't accommodate distance constraint with exclusion
+ * This accommodates distance constraint with exclusion.
* Case 1: return the match from another non-exclusion constraint.
* Case 2: return only the first-span when all constraints are exclusions.
* Case 3: spans are not in the same doc
@@ -102,14 +102,17 @@
if (x.isExclusion() || y.isExclusion()){
if (xf.getStart() == yf.getStart() && xf.getEnd() == yf.getEnd()){
+ // case 2
if (x.isExclusion() && y.isExclusion()){
// set x or y doesnt matter
setMatchProperties(x,true);
}
+ // case 1
else if (x.isExclusion()){
// set y, the usual match
setMatchProperties(y,true);
}
+ // case 1
else { setMatchProperties(x,true); }
return true;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index 4400f6d..265d9af 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -49,6 +49,7 @@
// Get the enumeration of the two spans to match
firstSpans = simpleSpanQuery.getFirstClause().
getSpans(context, acceptDocs, termContexts);
+
if (simpleSpanQuery.getSecondClause() != null)
secondSpans = simpleSpanQuery.getSecondClause().
getSpans(context, acceptDocs, termContexts);
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
index def6a49..b8feb03 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
@@ -3,6 +3,8 @@
import static org.junit.Assert.assertEquals;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanQuery;
@@ -14,29 +16,26 @@
import de.ids_mannheim.korap.query.SpanAttributeQuery;
import de.ids_mannheim.korap.query.SpanElementAttributeQuery;
import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanNextQuery;
public class TestAttributeIndex {
- private KorapIndex ki;
+ private KorapIndex ki = new KorapIndex();
private KorapResult kr;
private FieldDocument fd;
public TestAttributeIndex() throws IOException {
- ki = new KorapIndex();
- ki.addDoc(createFieldDoc0());
-// ki.addDoc(createFieldDoc1());
-// ki.addDoc(createFieldDoc2());
- ki.commit();
+ ki = new KorapIndex();
}
- private FieldDocument createFieldDoc0(){
+ private FieldDocument createFieldDoc0(){
fd = new FieldDocument();
fd.addString("ID", "doc-0");
fd.addTV("base",
"bcbabd",
- "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<s>1|@:class=header$<s>2]" +
- "[(1-2)s:c|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<s>1]" +
- "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<s>1]" +
+ "[(0-1)s:a|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<s>1|@:class=header$<s>2]" +
+ "[(1-2)s:e|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<s>1]" +
+ "[(2-3)s:e|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<s>1]" +
"[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<s>1]" +
"[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<s>2]" +
"[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>1|<>:div#5-6$<i>6<s>-1|@:class=header$<s>1]"+
@@ -45,16 +44,57 @@
return fd;
}
+ private FieldDocument createFieldDoc1(){
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV("base",
+ "bcbabd",
+ "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|@:class=header$<s>1|@:class=title$<s>1|@:class=book$<s>1]" +
+ "[(1-2)s:c|_2#1-2|<>:div#1-2$<i>2<s>1|@:class=header$<s>1|@:class=title$<s>1]" +
+ "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=book$<s>1]" +
+ "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1]" +
+ "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1|@:class=book$<s>1|@:class=title$<s>1]" +
+ "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1]"+
+ "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=title$<s>1]");
+
+ return fd;
+ }
+ private FieldDocument createFieldDoc2(){
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV("base",
+ "bcbabd",
+ "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|@:class=header$<s>2|@:class=book$<s>1|@:class=book$<s>2]" +
+ "[(1-2)s:e|_2#1-2|<>:div#1-2$<i>2<s>1|<>:a#1-2$<i>2<s>2|@:class=book$<s>2|@:class=header$<s>1]" +
+ "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|<>:a#1-2$<i>2<s>2|@:class=header$<s>2|@:class=book$<s>1]" +
+ "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1]" +
+ "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1|@:class=book$<s>1|@:class=book$<s>1]" +
+ "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1]"+
+ "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=book$<s>2]");
+
+ return fd;
+ }
+
+
+ /** Test matching elementRef
+ * @throws IOException
+ * */
@Test
- public void testCase1() {
+ public void testCase1() throws IOException {
+ ki.addDoc(createFieldDoc0());
+ ki.commit();
+
SpanAttributeQuery saq = new SpanAttributeQuery(
new SpanTermQuery(new Term("base","@:class=header")),
true);
+ List<SpanQuery> sql = new ArrayList<>();
+ sql.add(saq);
+
SpanQuery sq = new SpanElementAttributeQuery(
new SpanElementQuery("base", "div"),
- saq, true);
+ sql, true);
kr = ki.search(sq, (short) 10);
@@ -69,5 +109,129 @@
assertEquals(7,kr.getMatch(3).getEndPos());
}
+ /** Test multiple attributes and negation
+ * @throws IOException
+ * */
+ @Test
+ public void testCase2() throws IOException{
+ ki.addDoc(createFieldDoc1());
+ ki.commit();
+
+ List<SpanQuery> sql = new ArrayList<>();
+ sql.add(new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base","@:class=header")),true)
+ );
+ sql.add(new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base","@:class=title")),true)
+ );
+
+ SpanQuery sq = new SpanElementAttributeQuery(
+ new SpanElementQuery("base", "div"),
+ sql, true);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals(4, kr.getTotalResults());
+ assertEquals(0,kr.getMatch(0).getStartPos());
+ assertEquals(3,kr.getMatch(0).getEndPos());
+ assertEquals(1,kr.getMatch(1).getStartPos());
+ assertEquals(2,kr.getMatch(1).getEndPos());
+ assertEquals(4,kr.getMatch(2).getStartPos());
+ assertEquals(5,kr.getMatch(2).getEndPos());
+ assertEquals(6,kr.getMatch(3).getStartPos());
+ assertEquals(7,kr.getMatch(3).getEndPos());
+
+ // Add not Attribute
+ sql.add(new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base","@:class=book")),true,true)
+ );
+
+ sq = new SpanElementAttributeQuery(
+ new SpanElementQuery("base", "div"),
+ sql, true);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals(2, kr.getTotalResults());
+ assertEquals(1,kr.getMatch(0).getStartPos());
+ assertEquals(2,kr.getMatch(0).getEndPos());
+ assertEquals(6,kr.getMatch(1).getStartPos());
+ assertEquals(7,kr.getMatch(1).getEndPos());
+
+ // Test multiple negations
+ sql.remove(1);
+ sql.add(new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base","@:class=title")),true,true)
+ );
+
+ sq = new SpanElementAttributeQuery(
+ new SpanElementQuery("base", "div"),
+ sql, true);
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals(1, kr.getTotalResults());
+ assertEquals(5,kr.getMatch(0).getStartPos());
+ assertEquals(6,kr.getMatch(0).getEndPos());
+ }
+
+ /** same attribute types referring to different element types
+ * */
+ @Test
+ public void testCase3() throws IOException{
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ List<SpanQuery> sql = new ArrayList<>();
+ sql.add(new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base","@:class=header")),true)
+ );
+ sql.add(new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base","@:class=book")),true,true)
+ );
+ SpanQuery sq = new SpanElementAttributeQuery(
+ new SpanElementQuery("base", "div"),
+ sql, true);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals(3, kr.getTotalResults());
+ assertEquals(1,kr.getMatch(0).getStartPos());
+ assertEquals(2,kr.getMatch(0).getEndPos());
+ assertEquals(5,kr.getMatch(1).getStartPos());
+ assertEquals(6,kr.getMatch(1).getEndPos());
+ assertEquals(6,kr.getMatch(2).getStartPos());
+ assertEquals(7,kr.getMatch(2).getEndPos());
+ }
+
+ /** Test SkipTo Doc */
+ @Test
+ public void testCase4() throws IOException{
+ ki.addDoc(createFieldDoc1());
+ ki.addDoc(createFieldDoc0());
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanAttributeQuery saq = new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base","@:class=book")),
+ true);
+
+ List<SpanQuery> sql = new ArrayList<>();
+ sql.add(saq);
+
+ SpanElementAttributeQuery sq = new SpanElementAttributeQuery(
+ new SpanElementQuery("base", "div"),
+ sql, true);
+
+ SpanNextQuery snq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:e"))
+ ,sq);
+
+ kr = ki.search(snq, (short) 10);
+
+ assertEquals(2,kr.getMatch(0).getLocalDocID());
+ assertEquals(2,kr.getMatch(0).getStartPos());
+ assertEquals(5,kr.getMatch(0).getEndPos());
+ }
+
}