Added Span expansion query with exclusion
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
index 8a00787..5749746 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
@@ -233,6 +233,4 @@
assertEquals(1,kr.getMatch(0).getStartPos());
assertEquals(5,kr.getMatch(0).getEndPos());
}
-
-
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMultipleDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMultipleDistanceIndex.java
index 038b157..a3e649b 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMultipleDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMultipleDistanceIndex.java
@@ -179,7 +179,7 @@
constraints.add(createConstraint("p", 0, 0, false, false));
mdq = createQuery("s:b", "s:c", constraints,false);
kr = ki.search(mdq, (short) 10);
- assertEquals(3, kr.getTotalResults());
+ assertEquals(3, kr.getTotalResults());
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
index 02df26c..243627c 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
@@ -11,6 +11,7 @@
import org.junit.Test;
import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapMatch;
import de.ids_mannheim.korap.KorapResult;
import de.ids_mannheim.korap.query.SpanNextQuery;
import de.ids_mannheim.korap.query.SpanRepetitionQuery;
@@ -43,15 +44,15 @@
fd.addString("ID", "doc-1");
fd.addTV("base",
"text",
- "[(0-1)s:b|_1#0-1|<>:s#0-2$<i>1]" +
- "[(1-2)s:e|_2#1-2|<>:s#1-2$<i>4]" +
+ "[(0-1)s:b|_1#0-1]" +
+ "[(1-2)s:e|_2#1-2]" +
"[(2-3)s:c|_3#2-3]" +
- "[(3-4)s:c|s:d|_4#3-4]" +
- "[(4-5)s:d|_5#4-5|<>:s#4-5$<i>7]" +
- "[(5-6)s:e|_6#5-6]" +
+ "[(3-4)s:c|s:d]" +
+ "[(4-5)s:d|s:c|_5#4-5]" +
+ "[(5-6)s:e|s:c|_6#5-6]" +
"[(6-7)s:e|_7#6-7]" +
- "[(7-8)s:c|_8#7-8|<>:x#7-9$<i>9]" +
- "[(8-9)s:d|_9#8-9|<>:x#8-10$<i>10]" +
+ "[(7-8)s:c|_8#7-8]" +
+ "[(8-9)s:d|_9#8-9]" +
"[(9-10)s:d|_10#9-10]");
return fd;
}
@@ -81,8 +82,7 @@
"[(2-3)s:e|_3#2-3]");
return fd;
}
-
-
+
@Test
public void testCase1() throws IOException{
ki = new KorapIndex();
@@ -91,6 +91,7 @@
SpanQuery sq, sq2;
// Quantifier only
+ // c{1,2}
sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:c")),1,2, true);
kr = ki.search(sq, (short) 10);
// 0-1, 2-3, 2-4, 3-4, 5-6
@@ -133,10 +134,13 @@
ki.commit();
SpanQuery sq;
+ // c{2,2}
sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:c")),2,2, true);
kr = ki.search(sq, (short) 10);
- assertEquals(4,kr.getTotalResults());
-
+ // doc1 2-4, 3-5, 4-6
+ assertEquals(6,kr.getTotalResults());
+
+ // ec{2,2}
kr = ki.search(sq, (short) 10);
sq = new SpanNextQuery(
new SpanTermQuery(new Term("base", "s:e")),
@@ -173,6 +177,26 @@
assertEquals(6, kr.getMatch(1).endPos);
assertEquals(7, kr.getMatch(2).startPos);
assertEquals(9, kr.getMatch(2).endPos);
+
+ }
+
+ @Test
+ public void testCase4() throws IOException {
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc1());
+ ki.commit();
+
+ SpanQuery sq;
+ // c{2,2}
+ sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:c")),1,3, true);
+ kr = ki.search(sq, (short) 10);
+ // 2-3, 2-4, 2-5, 3-4, 3-5, 3-6, 4-5, 4-6, 5-6, 7-8
+ assertEquals(10,kr.getTotalResults());
+
+ sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base","s:c")),2,3, true);
+ kr = ki.search(sq, (short) 10);
+ // 2-4, 2-5, 3-5, 3-6, 4-6
+ assertEquals(5,kr.getTotalResults());
// System.out.print(kr.getTotalResults()+"\n");
// for (int i=0; i< kr.getTotalResults(); i++){
@@ -180,7 +204,45 @@
// kr.match(i).getLocalDocID()+" "+
// kr.match(i).startPos + " " +
// kr.match(i).endPos
-// );
+// );
// }
}
+
+ @Test
+ public void testCase5() throws IOException {
+ ki = new KorapIndex();
+ for (String i : new String[] {"AAA-12402"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+
+ SpanQuery sq0, sq1, sq2;
+ sq0 = new SpanTermQuery(new Term("tokens", "s:Mann"));
+ sq1 = new SpanRepetitionQuery(new SpanTermQuery(new Term("tokens","cnx/p:A")),2,3, true);
+ sq2 = new SpanNextQuery(sq1,sq0);
+ kr = ki.search(sq2, (short) 10);
+
+ assertEquals(2,kr.getTotalResults());
+ assertEquals(672, kr.getMatch(0).getStartPos());
+ assertEquals(676, kr.getMatch(0).getEndPos());
+ assertEquals(673, kr.getMatch(1).getStartPos());
+ assertEquals(676, kr.getMatch(1).getEndPos());
+
+
+ sq2 = new SpanNextQuery(
+ new SpanTermQuery(new Term("tokens", "s:scheinbar")),
+ sq2);
+ kr = ki.search(sq2, (short) 10);
+
+ assertEquals(1,kr.getTotalResults());
+ assertEquals(672, kr.getMatch(0).getStartPos());
+ assertEquals(676, kr.getMatch(0).getEndPos());
+
+ /* for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getSnippetBrackets());
+ System.out.println(km.getStartPos() +","+km.getEndPos());
+ }*/
+ }
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
index 4c1704e..1a9524a 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
@@ -34,7 +34,7 @@
public void testCase1() throws IOException {
SpanTermQuery stq = new SpanTermQuery(new Term("tokens","s:Kaiser") );
- SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, true, true);
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1, true);
kr = ki.search(seq, (short) 10);
assertEquals(72,kr.getTotalResults());
@@ -45,7 +45,7 @@
assertEquals(7, kr.getMatch(2).getStartPos());
assertEquals(8, kr.getMatch(2).getEndPos());
- seq = new SpanExpansionQuery(stq, 3, 4, false, true);
+ seq = new SpanExpansionQuery(stq, 3, 4, 0, true);
kr = ki.search(seq, (short) 10);
assertEquals(7, kr.getMatch(0).getStartPos());
@@ -57,11 +57,11 @@
assertEquals(15, kr.getMatch(3).getStartPos());
assertEquals(20, kr.getMatch(3).getEndPos());
-// for (KorapMatch km : kr.getMatches()){
-// System.out.println(km.getStartPos() +","+km.getEndPos()+" "
-// +km.getSnippetBrackets());
-// }
-
+ /*for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+ +km.getSnippetBrackets());
+ }
+ */
}
/** Classnumber
@@ -73,12 +73,12 @@
// create new payload for the expansion offsets
SpanTermQuery stq = new SpanTermQuery(new Term("tokens","s:Kaiser") );
- SpanExpansionQuery sq = new SpanExpansionQuery(stq, 0, 2, classNumber, true, true);
+ SpanExpansionQuery sq = new SpanExpansionQuery(stq, 0, 2, -1, classNumber, true);
kr = ki.search(sq, (short) 10);
// add expansion offsets to the existing payload
SpanElementQuery seq = new SpanElementQuery("tokens", "cnx/c:np");
- sq = new SpanExpansionQuery(seq, 1, 2, classNumber, false, true);
+ sq = new SpanExpansionQuery(seq, 1, 2, 0, classNumber, true);
kr = ki.search(sq, (short) 10);
/*for (KorapMatch km : kr.getMatches()){
@@ -87,4 +87,39 @@
}*/
}
+
+ /** Expansion with exclusion
+ * */
+ @Test
+ public void testCase3() throws IOException {
+
+ SpanTermQuery stq = new SpanTermQuery(new Term("tokens","cnx/p:N") );
+ SpanTermQuery notQuery = new SpanTermQuery(new Term("tokens","s:September"));
+
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 2, 3, 0, true);
+ kr = ki.search(seq, (short) 20);
+
+ /*for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+ +km.getSnippetBrackets());
+ }*/
+ }
+
+ /** Expansion with exclusion
+ * */
+ @Test
+ public void testCase4() throws IOException {
+
+ SpanTermQuery stq = new SpanTermQuery(new Term("tokens","cnx/p:N") );
+ SpanTermQuery notQuery = new SpanTermQuery(new Term("tokens","cnx/p:A"));
+
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 0, 2, -1, true);
+ kr = ki.search(seq, (short) 10);
+
+ for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+ +km.getSnippetBrackets());
+ }
+ }
+
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
index ae2cef4..179effa 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
@@ -54,7 +54,7 @@
kr = ki.search(ssq, (short) 10);
assertEquals(34, kr.getMatch(0).getStartPos());
- assertEquals(35, kr.getMatch(0).getEndPos());
+ assertEquals(36, kr.getMatch(0).getEndPos());
assertEquals(85, kr.getMatch(1).getStartPos());
assertEquals(87, kr.getMatch(1).getEndPos());
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestUnorderedDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestUnorderedDistanceIndex.java
index b9f7065..ddff51d 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestUnorderedDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestUnorderedDistanceIndex.java
@@ -283,7 +283,7 @@
getClass().getResource("/a00/" + i + ".json.gz").getFile(), true
);
};
- ki.commit();
+ ki.commit();
SpanQuery sq = new SpanDistanceQuery(
new SpanTermQuery(new Term("tokens","s:in")),
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestWPDIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestWPDIndex.java
index a5a945e..4f6226d 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestWPDIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestWPDIndex.java
@@ -84,6 +84,12 @@
ks = new KorapSearch(sq);
kr = ks.run(ki);
assertEquals(11, kr.getTotalResults());
+ //System.out.println(kr.getTotalResults());
+ //for (KorapMatch km : kr.getMatches()){
+ //System.out.println(km.getDocID() +" "+km.getStartPos() +" "+ km.getEndPos());
+ //System.out.println(km.getSnippetBrackets());
+ //System.out.println(km.toJSON());
+ //}
}
/** Token exclusion distance spans */
@@ -107,12 +113,6 @@
ks = new KorapSearch(sq);
kr = ks.run(ki);
assertEquals(1896, kr.getTotalResults());
-
-// System.out.println(kr.getTotalResults());
-// for (KorapMatch km : kr.getMatches()){
-// System.out.println(km.getDocID() +" "+km.getStartPos() +" "+ km.getEndPos());
-// System.out.println(km.getSnippetBrackets());
-// }
}
/** Element distance spans */
@@ -169,7 +169,7 @@
assertEquals(451,kr.getMatch(1).getEndPos());
}
- /** Quantifier */
+ /** Repetition */
@Test
public void testCase5() throws IOException{
SpanQuery sq;
@@ -191,7 +191,7 @@
//0.65s
}
- /** Next and quantifier */
+ /** Next and repetition */
@Test
public void testCase6() throws IOException{
SpanQuery sq = new SpanNextQuery(
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
index da0d5d7..0d99899 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
@@ -1,3 +1,5 @@
+package de.ids_mannheim.korap.query;
+
import java.util.*;
import java.io.*;
@@ -25,7 +27,7 @@
// There is a repetition in here
// ([base=foo]|[base=bar])[base=foobar]
assertEquals(sqwi.toQuery().toString(),
- "spanOr([tokens:base:foo, spanQuantifier(spanNext(tokens:base:foo, tokens:base:bar)[1:100])])");
+ "spanOr([tokens:base:foo, spanRepetition(spanNext(tokens:base:foo, tokens:base:bar){1,100})])");
assertTrue(sqwi.isOptional());
};
@@ -269,7 +271,7 @@
SpanQueryWrapperInterface sqwi = jsonQuery(getClass().getResource("/queries/bsp-repetition.jsonld").getFile());
// der[cnx/p=A]{0,2}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanNext(spanOr([tokens:s:der, spanNext(tokens:s:der, spanQuantifier(tokens:cnx/p:A[1:2]))]), tokens:tt/p:NN)");
+ assertEquals(sqwi.toQuery().toString(), "spanNext(spanOr([tokens:s:der, spanNext(tokens:s:der, spanRepetition(tokens:cnx/p:A{1,2}))]), tokens:tt/p:NN)");
};
public static String getString (String path) {