Fixed missing rewrite in SpanNext() queries
diff --git a/CHANGES b/CHANGES
index f9690c3..fc83369 100644
--- a/CHANGES
+++ b/CHANGES
@@ -5,6 +5,8 @@
SpanSequenceQueryWrapper (diewald)
- [bugfix] Sequences with [problem][problem][anchor] can now
be deserialized (diewald)
+ - [bugfix] Queries with regular expressions in spanNext() are now
+ correctly rewritten (diewald)
0.47 2014-11-05
- [feature] Support new index format with more metadata (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
index 7274826..122a65b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
@@ -10,6 +10,8 @@
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Query;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.SpanQuery;
@@ -17,9 +19,11 @@
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
+
import de.ids_mannheim.korap.query.spans.NextSpans;
-/** Matches spans which are directly next to each other.
+/**
+ * Matches spans which are directly next to each other.
* this is identical to a phrase query with exactly two clauses.
*/
public class SpanNextQuery extends SimpleSpanQuery implements Cloneable {
@@ -57,7 +61,39 @@
spanNextQuery.setBoost(getBoost());
return spanNextQuery;
};
-
+
+
+ /*
+ * Rewrite query in case it includes regular expressions or wildcards
+ */
+ @Override
+ public Query rewrite (IndexReader reader) throws IOException {
+ SpanNextQuery clone = null;
+
+ // Does the first clause needs a rewrite?
+ SpanQuery query = (SpanQuery) firstClause.rewrite(reader);
+ if (query != firstClause) {
+ if (clone == null)
+ clone = this.clone();
+ clone.firstClause = query;
+ };
+
+ // Does the second clause needs a rewrite?
+ query = (SpanQuery) secondClause.rewrite(reader);
+ if (query != secondClause) {
+ if (clone == null)
+ clone = this.clone();
+ clone.secondClause = query;
+ };
+
+ // There is a clone and it is important
+ if (clone != null)
+ return clone;
+
+ return this;
+ };
+
+
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index 8c407ad..c2d73e2 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -755,37 +755,46 @@
ki.addDocFile(
1,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
);
- ki.addDocFile(
- 2,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
- );
-
ki.commit();
+ // [tt/p="A.*"]{0,3}[tt/p="N.*"]
String json = getString(
getClass().getResource("/queries/bugs/multiterm_rewrite.jsonld").getFile()
);
KorapSearch ks = new KorapSearch(json);
KorapResult kr = ks.run(ki);
- assertEquals(kr.getQuery(),"");
-
-
- /*
-
assertEquals(
kr.getQuery(),
- "{4: spanNext({1: spanNext({2: tokens:s:ins}, {3: tokens:s:Leben})}, tokens:s:gerufen)}"
- );
- assertEquals(
- kr.getMatch(0).getSnippetBrackets(),
- "... sozialistischen Initiative\" eine neue politische Gruppierung " +
- "[{4:{1:{2:ins} {3:Leben}} gerufen}] hatten. " +
- "Pressemeldungen zufolge haben sich in ..."
+ "spanOr([SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/), " +
+ "spanNext(spanRepetition(SpanMultiTermQueryWrapper(tokens:/tt/p:A.*/){1,3}), " +
+ "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])"
);
- assertEquals(2, kr.getTotalResults());
+ assertEquals(58, kr.getTotalResults());
assertEquals(0, kr.getStartIndex());
- */
+
+ assertEquals(
+ kr.getMatch(0).getSnippetBrackets(),
+ "[Saragat-Partei] zerfällt Rom (ADN) die von dem"
+ );
+ assertEquals(
+ kr.getMatch(1).getSnippetBrackets(),
+ "[Saragat-Partei] zerfällt Rom (ADN) die von dem"
+ );
+ assertEquals(
+ kr.getMatch(2).getSnippetBrackets(),
+ "Saragat-Partei zerfällt [Rom] (ADN) die von dem Rechtssozialisten Saragat"
+ );
+ assertEquals(
+ kr.getMatch(3).getSnippetBrackets(),
+ "Saragat-Partei zerfällt Rom ([ADN]) die von dem Rechtssozialisten Saragat geführte"
+ );
+
+ assertEquals(
+ kr.getMatch(23).getSnippetBrackets(),
+ "dem Namen \"Einheitsbewegung der sozialistischen Initiative\" [eine neue politische Gruppierung] ins Leben gerufen hatten. Pressemeldungen zufolge"
+ );
};