Lucene Backend is now a module (1)
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/Test.java b/trunk/src/test/java/de/ids_mannheim/korap/Test.java
new file mode 100644
index 0000000..0e8d68b
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/Test.java
@@ -0,0 +1,97 @@
+package de.ids_mannheim.korap;
+
+import java.util.*;
+import java.io.IOException;
+
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+import de.ids_mannheim.korap.analysis.MultiTermToken;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.IndexWriter;
+
+/**
+ * @author Nils Diewald
+ *
+ * Helper class for testing the KorapIndex framework (Normal).
+ */
+public class Test {
+
+ public static void addDoc(IndexWriter w, Map<String, String> m) throws IOException {
+ Document doc = new Document();
+ String[] strInt = { "pubDate" };
+ String[] strStr = { "id", "corpus", "pubPlace" };
+ String[] strTxt = { "title", "subtitle", "textClass" };
+
+ // Text fields
+ for (String s : strTxt) {
+ doc.add(new TextField(s, m.get(s), Field.Store.YES));
+ };
+
+ // String fields
+ for (String s : strStr) {
+ doc.add(new StringField(s, m.get(s), Field.Store.YES));
+ };
+
+ // Integer fields
+ for (String s : strInt) {
+ doc.add(new IntField(s, Integer.parseInt(m.get(s)), Field.Store.YES));
+ };
+
+ FieldType textFieldWithTermVectors = new FieldType(TextField.TYPE_STORED);
+ textFieldWithTermVectors.setStoreTermVectors(true);
+ textFieldWithTermVectors.setStoreTermVectorOffsets(true);
+ textFieldWithTermVectors.setStoreTermVectorPositions(true);
+ textFieldWithTermVectors.setStoreTermVectorPayloads(true);
+
+
+ Field textFieldAnalyzed = new Field(
+ "text",
+ m.get("textStr"),
+ textFieldWithTermVectors
+ );
+
+ MultiTermTokenStream ts = getTermVector(m.get("text"));
+
+ textFieldAnalyzed.setTokenStream( ts );
+
+ doc.add(textFieldAnalyzed);
+
+ // Add document to writer
+ w.addDocument(doc);
+ };
+
+ public static MultiTermTokenStream getTermVector (String stream) {
+ MultiTermTokenStream ts = new MultiTermTokenStream();
+
+ int pos = 0;
+ for (String seg : stream.split(" ")) {
+
+ String[] tokseg = seg.split("\\|");
+
+ MultiTermToken mtt = new MultiTermToken('s', tokseg[0]);
+
+ mtt.add("T");
+ mtt.add('i', tokseg[0].toLowerCase());
+ mtt.add('p', tokseg[1]);
+ mtt.add('l', tokseg[2]);
+
+ if (tokseg.length == 4) {
+ for (String morph : tokseg[3].split(";")) {
+ mtt.add('m', morph);
+ }
+ };
+ if (tokseg.length == 5) {
+ mtt.add('e', tokseg[4]);
+ };
+
+ ts.addMultiTermToken(mtt);
+ };
+
+ return ts;
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/TestSimple.java b/trunk/src/test/java/de/ids_mannheim/korap/TestSimple.java
new file mode 100644
index 0000000..552d5a3
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/TestSimple.java
@@ -0,0 +1,129 @@
+package de.ids_mannheim.korap;
+
+import java.util.*;
+import java.io.IOException;
+
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+import de.ids_mannheim.korap.analysis.MultiTermToken;
+import static de.ids_mannheim.korap.util.KorapByte.*;
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.AtomicReaderContext;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.search.spans.SpanQuery;
+
+import org.apache.lucene.util.Bits;
+
+/**
+ * @author Nils Diewald
+ *
+ * Helper class for testing the KorapIndex framework (Simple).
+ */
+public class TestSimple {
+
+ public static void addDoc(IndexWriter w, Map<String, String> m) throws IOException {
+ Document doc = new Document();
+
+ FieldType textFieldWithTermVectors = new FieldType(TextField.TYPE_STORED);
+ textFieldWithTermVectors.setStoreTermVectors(true);
+ /*
+ No offsets are stored.
+ textFieldWithTermVectors.setStoreTermVectorOffsets(true);
+ */
+ textFieldWithTermVectors.setStoreTermVectorPositions(true);
+ textFieldWithTermVectors.setStoreTermVectorPayloads(true);
+
+ Field textFieldAnalyzed = new Field(
+ "text",
+ m.get("textStr"),
+ textFieldWithTermVectors
+ );
+
+ MultiTermTokenStream ts = getTermVector(m.get("text"));
+
+ textFieldAnalyzed.setTokenStream( ts );
+
+ doc.add(textFieldAnalyzed);
+
+ // Add document to writer
+ w.addDocument(doc);
+ };
+
+
+ public static MultiTermTokenStream getTermVector (String stream) {
+ MultiTermTokenStream ts = new MultiTermTokenStream();
+
+ int pos = 0;
+ for (String seg : stream.split(" ")) {
+ // System.err.println("** Prepare " + seg);
+ String[] tokens = seg.split("\\|");
+
+ int i = 0;
+
+ while (tokens[i].length() == 0)
+ i++;
+
+ MultiTermToken mtt = new MultiTermToken(tokens[i]);
+ // System.err.println("** Add term " + tokens[i]);
+ i++;
+ for (; i < tokens.length; i++) {
+ if (tokens[i].length() == 0)
+ continue;
+ mtt.add(tokens[i]);
+ };
+
+ ts.addMultiTermToken(mtt);
+ };
+
+ return ts;
+ };
+
+ public static List<String> getSpanInfo (IndexReader reader, SpanQuery query) throws IOException {
+ Map<Term, TermContext> termContexts = new HashMap<>();
+ List<String> spanArray = new ArrayList<>();
+
+ for (AtomicReaderContext atomic : reader.leaves()) {
+ Bits bitset = atomic.reader().getLiveDocs();
+ // Spans spans = NearSpansOrdered();
+ Spans spans = query.getSpans(atomic, bitset, termContexts);
+
+ while (spans.next()) {
+ StringBuffer payloadString = new StringBuffer();
+ int docid = atomic.docBase + spans.doc();
+ if (spans.isPayloadAvailable()) {
+ for (byte[] payload : spans.getPayload()) {
+ /* retrieve payload for current matching span */
+
+ payloadString.append(byte2int(payload)).append(",");
+ payloadString.append(byte2int(payload, 2));
+ // payloadString.append(byte2int(payload, 1));
+ payloadString.append(" (" + payload.length + ")");
+ payloadString.append(" | ");
+ };
+ };
+ spanArray.add(
+ "Doc: " +
+ docid +
+ " with " +
+ spans.start() +
+ "-" +
+ spans.end() +
+ " || " +
+ payloadString.toString()
+ );
+ };
+ };
+ return spanArray;
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java b/trunk/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java
new file mode 100644
index 0000000..9e33133
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java
@@ -0,0 +1,102 @@
+import java.util.*;
+import de.ids_mannheim.korap.analysis.MultiTerm;
+import java.io.IOException;
+import org.apache.lucene.util.BytesRef;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+
+@RunWith(JUnit4.class)
+public class TestMultiTerm {
+ @Test
+ public void multiTermSimple () {
+ MultiTerm mt = new MultiTerm("test");
+ assertEquals(mt.term, "test");
+ assertNull(mt.payload);
+ assertEquals(mt.start, 0);
+ assertEquals(mt.end, 0);
+ };
+
+ @Test
+ public void multiTermPayload () {
+ MultiTerm mt = new MultiTerm("test$5");
+ assertEquals("test", mt.term);
+ assertEquals(new BytesRef("5"), mt.payload);
+ assertEquals(mt.start, 0);
+ assertEquals(mt.end, 0);
+ };
+
+ @Test
+ public void multiTermOffset () {
+ MultiTerm mt = new MultiTerm("versuch#2-34");
+ assertEquals(mt.term, "versuch");
+ assertNull(mt.payload);
+ assertEquals(mt.start, 2);
+ assertEquals(mt.end, 34);
+ };
+
+ @Test
+ public void multiTermOffsetPayload () {
+ MultiTerm mt = new MultiTerm("example#6-42$hihi");
+ assertEquals(mt.term, "example");
+ assertEquals(new BytesRef("hihi"), mt.payload);
+ assertEquals(mt.start,6);
+ assertEquals(mt.end, 42);
+ };
+
+ @Test
+ public void multiTermString () {
+ MultiTerm mt = new MultiTerm("example#6-42$hihi");
+ assertEquals("example#6-42$hihi", mt.toString());
+ mt.term = "spassmacher";
+ assertEquals("spassmacher#6-42$hihi", mt.toString());
+ };
+
+ @Test
+ public void multiTermStringPayloadType () {
+ MultiTerm mt = new MultiTerm("example$<i>4000");
+ assertEquals("example$<?>[0,0,f,a0]", mt.toString());
+
+ mt = new MultiTerm("example$<l>757574643438");
+ assertEquals("example$<?>[0,0,0,b0,62,f7,ae,ee]", mt.toString());
+ };
+
+ @Test
+ public void multiTermStringPayloadType2 () {
+ MultiTerm mt = new MultiTerm();
+ mt.term("beispiel");
+ mt.start(40);
+ mt.end(50);
+ mt.payload((int) 4000);
+ assertEquals("beispiel#40-50$<?>[0,0,f,a0]", mt.toString());
+ };
+
+ @Test
+ public void multiTermStringPayloadType3 () {
+ MultiTerm mt = new MultiTerm("example$<b>120");
+ assertEquals("example$x", mt.toString());
+ };
+
+ @Test
+ public void multiTermStringPayloadType4 () {
+ MultiTerm mt = new MultiTerm("example$<i>420<b>120");
+ assertEquals("example$<?>[0,0,1,a4,78]", mt.toString());
+ };
+
+
+ @Test
+ public void multiTermStringPayloadType5 () {
+ MultiTerm mt = new MultiTerm("example$<i>4000");
+ assertEquals("example$<?>[0,0,f,a0]", mt.toString());
+
+ mt = new MultiTerm("example$<i>4000<b>120");
+ assertEquals("example$<?>[0,0,f,a0,78]", mt.toString());
+
+ mt = new MultiTerm("example$<l>4000<b>120");
+ assertEquals("example$<?>[0,0,0,0,0,0,f,a0,78]", mt.toString());
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java b/trunk/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
new file mode 100644
index 0000000..ab071c2
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
@@ -0,0 +1,38 @@
+import java.util.*;
+import de.ids_mannheim.korap.analysis.MultiTermToken;
+import java.io.IOException;
+import org.apache.lucene.util.BytesRef;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+
+@RunWith(JUnit4.class)
+public class TestMultiTermToken {
+ @Test
+ public void multiTermTokenSimple () {
+ MultiTermToken mtt = new MultiTermToken("t:test", "a:abbruch");
+ assertEquals("[t:test|a:abbruch]", mtt.toString());
+ mtt.add("b:banane");
+ assertEquals("[t:test|a:abbruch|b:banane]", mtt.toString());
+ mtt.add("c:chaos#21-26");
+ assertEquals("[t:test|a:abbruch|b:banane|c:chaos]", mtt.toString());
+ mtt.add("d:dadaismus#21-26$vergleich");
+ assertEquals("[t:test|a:abbruch|b:banane|c:chaos|d:dadaismus$vergleich]", mtt.toString());
+ };
+
+ @Test
+ public void multiTermTokenOffsets () {
+ MultiTermToken mtt = new MultiTermToken("t:test#23-27");
+ assertEquals("[(23-27)t:test]", mtt.toString());
+ mtt.add("b:baum#34-45");
+ assertEquals("[(23-27)t:test|b:baum]", mtt.toString());
+ mtt.add("c:cannonball#34-45$tatsache");
+ assertEquals("[(23-27)t:test|b:baum|c:cannonball$tatsache]", mtt.toString());
+ assertEquals(23, mtt.start);
+ assertEquals(27, mtt.end);
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java b/trunk/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
new file mode 100644
index 0000000..b80ded6
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
Binary files differ
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/cache/TestCache.java b/trunk/src/test/java/de/ids_mannheim/korap/cache/TestCache.java
new file mode 100644
index 0000000..1d15bd0
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/cache/TestCache.java
@@ -0,0 +1,41 @@
+import java.util.Collections;
+
+import net.sf.jsr107cache.Cache;
+import net.sf.jsr107cache.CacheException;
+import net.sf.jsr107cache.CacheFactory;
+import net.sf.jsr107cache.CacheManager;
+
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestCache {
+
+ @Test
+ public void cache1 () {
+
+ Cache cache;
+
+ try {
+ CacheFactory cacheFactory = CacheManager.getInstance().getCacheFactory();
+ cache = cacheFactory.createCache(Collections.emptyMap());
+ }
+
+ catch (CacheException e) {
+ // ...
+ return;
+ };
+
+ cache.put("beispiel1", "Das ist ein Test");
+ cache.put("beispiel2", "Das ist ein Versuch");
+ cache.put("beispiel3", "Das ist ein Beispiel");
+
+ assertEquals(cache.get("beispiel1"), "Das ist ein Test");
+ assertEquals(cache.get("beispiel2"), "Das ist ein Versuch");
+ assertEquals(cache.get("beispiel3"), "Das ist ein Beispiel");
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java b/trunk/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
new file mode 100644
index 0000000..8ccefa8
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
@@ -0,0 +1,79 @@
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.KorapFilter;
+
+@RunWith(JUnit4.class)
+public class TestKorapFilter {
+
+ @Test
+ public void filterExample () throws IOException {
+
+ KorapFilter kf = new KorapFilter();
+
+ assertEquals("textClass:tree", kf.genre("tree").toString());
+ assertEquals("+textClass:tree +textClass:sport", kf.genre("tree").and("sport").toString());
+ assertEquals("(+textClass:tree +textClass:sport) textClass:news", kf.genre("tree").and("sport").or("news").toString());
+ assertEquals("textClass:tree textClass:sport textClass:news", kf.genre("tree", "sport", "news").toString());
+ };
+
+ @Test
+ public void rangeExample () throws IOException {
+
+ KorapFilter kf = new KorapFilter();
+
+ assertEquals("pubDate:[20030604 TO 20030899]", kf.between("2003-06-04", "2003-08-99").toString());
+ assertEquals("pubDate:[0 TO 20030604]", kf.till("2003-06-04").toString());
+ assertEquals("pubDate:[20030604 TO 99999999]", kf.since("2003-06-04").toString());
+ assertEquals("pubDate:20030604", kf.date("2003-06-04").toString());
+ };
+
+ @Test
+ public void rangeLimited () throws IOException {
+
+ KorapFilter kf = new KorapFilter();
+ assertEquals("pubDate:[20050000 TO 20099999]", kf.between("2005", "2009").toString());
+ assertEquals("pubDate:[20051000 TO 20090899]", kf.between("200510", "200908").toString());
+ assertEquals("pubDate:[20051000 TO 20090899]", kf.between("2005-10", "2009-08").toString());
+ assertEquals("pubDate:[20051006 TO 20090803]", kf.between("2005-1006", "2009-0803").toString());
+ assertEquals("pubDate:[20051006 TO 20090803]", kf.between("2005-10-06", "2009-08-03").toString());
+
+ assertEquals("pubDate:[0 TO 20059999]", kf.till("2005").toString());
+ assertEquals("pubDate:[0 TO 20051099]", kf.till("200510").toString());
+ assertEquals("pubDate:[0 TO 20051099]", kf.till("2005-10").toString());
+ assertEquals("pubDate:[0 TO 20051006]", kf.till("2005-1006").toString());
+ assertEquals("pubDate:[0 TO 20051006]", kf.till("2005-10-06").toString());
+
+ assertEquals("pubDate:[20050000 TO 99999999]", kf.since("2005").toString());
+ assertEquals("pubDate:[20051000 TO 99999999]", kf.since("200510").toString());
+ assertEquals("pubDate:[20051000 TO 99999999]", kf.since("2005-10").toString());
+ assertEquals("pubDate:[20051006 TO 99999999]", kf.since("2005-1006").toString());
+ assertEquals("pubDate:[20051006 TO 99999999]", kf.since("2005-10-06").toString());
+
+ assertEquals("pubDate:[20050000 TO 20059999]", kf.date("2005").toString());
+ assertEquals("pubDate:[20051000 TO 20051099]", kf.date("200510").toString());
+ assertEquals("pubDate:[20051000 TO 20051099]", kf.date("2005-10").toString());
+ assertEquals("pubDate:20051006", kf.date("2005-1006").toString());
+ assertEquals("pubDate:20051006", kf.date("2005-10-06").toString());
+ };
+
+ @Test
+ public void rangeFailure () throws IOException {
+
+ KorapFilter kf = new KorapFilter();
+ assertNull(kf.between("aaaa-bb-cc", "aaaabbcc"));
+ assertNull(kf.till("aaaa-bb-cc"));
+ assertNull(kf.since("aaaa-bb-cc"));
+ assertNull(kf.date("aaaa-bb-cc"));
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java b/trunk/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
new file mode 100644
index 0000000..bfc6c33
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
@@ -0,0 +1,103 @@
+import java.util.*;
+import java.io.IOException;
+
+// import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DirectoryReader;
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+
+import org.apache.lucene.search.IndexSearcher;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+import org.apache.lucene.util.Version;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+
+import static de.ids_mannheim.korap.Test.*;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestHighlight { // extends LuceneTestCase {
+
+ // Create index in RAM
+ private Directory index = new RAMDirectory();
+
+ StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
+
+ IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
+
+
+ @Test
+ public void checkHighlights () throws IOException {
+ // Check directory
+
+ IndexWriter w = new IndexWriter(index, config);
+
+ Document doc = new Document();
+ FieldType textFieldWithTermVectors = new FieldType(TextField.TYPE_STORED);
+ textFieldWithTermVectors.setStoreTermVectors(true);
+ textFieldWithTermVectors.setStoreTermVectorOffsets(true);
+ textFieldWithTermVectors.setStoreTermVectorPositions(true);
+
+ Field textFieldAnalyzed = new Field(
+ "text",
+ "Er wagte nicht, sich zu ruehren. Er war starr vor Angst.",
+ textFieldWithTermVectors
+ );
+
+ MultiTermTokenStream ts = getTermVector(
+ "Er#0-2|PPER|er|c:nom;n:sg;g:masc;p:3|s:<$0-32 " +
+ "wagte#3-8|VVFIN|wagen|p:3;n:sg;t:past;m:ind| " +
+ "nicht#9-14|PTKNEG|nicht|| " +
+ ",#14-15|$,|,|| " +
+ "sich#16-20|PRF|sich|c:acc;p:3;n:sg| " +
+ "zu#21-23|PTKZU|zu|| " +
+ "ruehren#24-31|VVFIN|ruehren|| " +
+ ".#31-32|$.|.||s:>$0-32 " +
+ "Er#33-35|PPER|er|c:nom;p:3;n:sg;g:masc|s:<$33-56 " +
+ "war#36-39|VAFIN|sein|p:3;n:sg;t:past;m:ind| " +
+ "starr#40-45|ADJD|starr|comp:pos| " +
+ "vor#46-49|APPR|vor|| " +
+ "Angst#50-55|NN|angst|c:dat;n:sg;g:fem| " +
+ ".#55-56|$.|.||s:>$33-56"
+ );
+
+ textFieldAnalyzed.setTokenStream( ts );
+
+ doc.add(textFieldAnalyzed);
+
+ // Add document to writer
+ w.addDocument(doc);
+
+ assertEquals(1, w.numDocs());
+
+ w.close();
+
+ DirectoryReader reader = DirectoryReader.open( index );
+
+ // Check searcher
+ IndexSearcher searcher = new IndexSearcher( reader );
+
+
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestClassIndex.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestClassIndex.java
new file mode 100644
index 0000000..ff3abaf
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestClassIndex.java
@@ -0,0 +1,283 @@
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.query.SpanClassQuery;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanWithinQuery;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.index.Term;
+
+// mvn -Dtest=TestWithinIndex#indexExample1 test
+
+@RunWith(JUnit4.class)
+public class TestClassIndex {
+
+ @Test
+ public void indexExample1 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:b")),
+ new SpanTermQuery(new Term("base", "s:a"))
+ );
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 1, kr.totalResults());
+ assertEquals("StartPos (0)", 7, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 9, kr.match(0).endPos);
+ assertEquals("SnippetBrackets (0)", "... bcabca[ba]c", kr.match(0).snippetBrackets());
+ assertEquals("SnippetHTML (0)", "<span class=\"korap-more-left\"></span>bcabca<span class=\"korap-match\">ba</span>c", kr.match(0).snippetHTML());
+
+ sq = new SpanTermQuery(new Term("base", "s:b"));
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 3, kr.totalResults());
+ assertEquals("StartPos (0)", 1, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 2, kr.match(0).endPos);
+ assertEquals("SnippetBrackets (0)", "a[b]cabcab ...", kr.match(0).snippetBrackets());
+
+
+ assertEquals("SnippetHTML (0)", "a<span class=\"korap-match\">b</span>cabcab<span class=\"korap-more-right\"></span>", kr.match(0).snippetHTML());
+
+ assertEquals("StartPos (1)", 4, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 5, kr.match(1).endPos);
+ assertEquals("SnippetBrackets (1)", "abca[b]cabac", kr.match(1).snippetBrackets());
+ assertEquals("StartPos (2)", 7, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 8, kr.match(2).endPos);
+ assertEquals("SnippetBrackets (2)", "... bcabca[b]ac", kr.match(2).snippetBrackets());
+
+ sq = new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")));
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 3, kr.totalResults());
+ assertEquals("StartPos (0)", 1, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 2, kr.match(0).endPos);
+ assertEquals("snippetBrackets (0)", "a[{b}]cabcab ...", kr.match(0).snippetBrackets());
+ assertEquals("snippetHTML (0)", "a<span class=\"korap-match\"><span class=\"korap-highlight korap-class-0\">b</span></span>cabcab<span class=\"korap-more-right\"></span>", kr.match(0).snippetHTML());
+
+ assertEquals("StartPos (1)", 4, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 5, kr.match(1).endPos);
+ assertEquals("snippetBrackets (1)", "abca[{b}]cabac", kr.match(1).snippetBrackets());
+ assertEquals("StartPos (2)", 7, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 8, kr.match(2).endPos);
+ assertEquals("snippetBrackets (2)", "... bcabca[{b}]ac", kr.match(2).snippetBrackets());
+
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:a")),
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 1)
+ );
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 3, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 2, kr.match(0).endPos);
+ assertEquals("SnippetBrackets (0)", "[a{1:b}]cabcab ...", kr.match(0).snippetBrackets());
+
+ assertEquals("SnippetHTML (0)", "<span class=\"korap-match\">a<span class=\"korap-highlight korap-class-1\">b</span></span>cabcab<span class=\"korap-more-right\"></span>", kr.match(0).snippetHTML());
+
+ assertEquals("StartPos (1)", 3, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 5, kr.match(1).endPos);
+ assertEquals("SnippetBrackets (1)", "abc[a{1:b}]cabac", kr.match(1).snippetBrackets());
+ assertEquals("StartPos (2)", 6, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 8, kr.match(2).endPos);
+ assertEquals("SnippetBrackets (2)", "abcabc[a{1:b}]ac", kr.match(2).snippetBrackets());
+
+
+ // abcabcabac
+ sq = new SpanNextQuery(
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)
+ );
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 2, kr.match(0).endPos);
+ assertEquals("SnippetBrackets (0)", "[{2:a}{3:b}]cabcab ...", kr.match(0).snippetBrackets());
+ assertEquals("StartPos (1)", 3, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 5, kr.match(1).endPos);
+ assertEquals("SnippetBrackets (1)", "abc[{2:a}{3:b}]cabac", kr.match(1).snippetBrackets());
+
+ assertEquals("StartPos (2)", 6, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 8, kr.match(2).endPos);
+ assertEquals("SnippetBrackets (2)", "abcabc[{2:a}{3:b}]ac", kr.match(2).snippetBrackets());
+
+ // abcabcabac
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:a")),
+ new SpanClassQuery(
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:b")),
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")))
+ ), (byte) 2
+ ));
+
+ kr = ki.search(sq, (short) 10);
+
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 1, kr.totalResults());
+ assertEquals("SnippetBrackets (0)", "abcabc[a{2:b{a}}]c", kr.match(0).snippetBrackets());
+ assertEquals("SnippetHTML (0)", "abcabc<span class=\"korap-match\">a<span class=\"korap-highlight korap-class-2\">b<span class=\"korap-highlight korap-class-0\">a</span></span></span>c", kr.match(0).snippetHTML());
+
+ // Offset tokens
+ kr = ki.search(sq, 0, (short) 10, true, (short) 2, true, (short) 2);
+ assertEquals("totalResults", 1, kr.totalResults());
+ assertEquals("SnippetBrackets (0)", "... bc[a{2:b{a}}]c", kr.match(0).snippetBrackets());
+ assertEquals("SnippetHTML (0)", "<span class=\"korap-more-left\"></span>bc<span class=\"korap-match\">a<span class=\"korap-highlight korap-class-2\">b<span class=\"korap-highlight korap-class-0\">a</span></span></span>c", kr.match(0).snippetHTML());
+
+ // Offset Characters
+ kr = ki.search(sq, 0, (short) 10, false, (short) 2, false, (short) 2);
+ assertEquals("totalResults", 1, kr.totalResults());
+ assertEquals("SnippetBrackets (0)", "... bc[a{2:b{a}}]c", kr.match(0).snippetBrackets());
+ assertEquals("SnippetHTML (0)", "<span class=\"korap-more-left\"></span>bc<span class=\"korap-match\">a<span class=\"korap-highlight korap-class-2\">b<span class=\"korap-highlight korap-class-0\">a</span></span></span>c", kr.match(0).snippetHTML());
+
+
+ // System.err.println(kr.toJSON());
+
+ sq = new SpanNextQuery(
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 1),
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:c")), (byte) 2)
+ );
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 2, kr.totalResults());
+ assertEquals("StartPos (0)", 1, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 4, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 6, kr.match(1).endPos);
+
+ assertEquals("Document count", 1, ki.numberOf("documents"));
+ assertEquals("Token count", 10, ki.numberOf("t"));
+
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:a")),
+ new SpanClassQuery(
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:b")),
+ new SpanTermQuery(new Term("base", "s:c"))
+ )
+ )
+ );
+
+ kr = ki.search(sq, (short) 2);
+
+ assertEquals("totalResults", 2, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 3, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 6, kr.match(1).endPos);
+
+ assertEquals(1, ki.numberOf("documents"));
+ assertEquals(10, ki.numberOf("t"));
+ };
+
+
+ @Test
+ public void indexExample2 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4|<>:x#3-7$<i>7]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ /*
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:c")),
+ new SpanElementQuery("base", "x")
+ );
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals("ab[cabca]bac", kr.match(0).getSnippetBrackets());
+ System.err.println();
+ */
+
+ /*
+ sq = new SpanNextQuery(
+ new SpanElementQuery("base", "x"),
+ new SpanTermQuery(new Term("base", "s:b"))
+ );
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals("abc[abcab}ac]", kr.match(0).getSnippetBrackets());
+ System.err.println();
+
+ */
+
+ /*
+ sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "x"),
+ new SpanClassQuery(
+ new SpanTermQuery(new Term("base", "s:a"))
+ )
+ );
+
+ // new SpanTermQuery(new Term("base", "s:a")),
+ // new SpanClassQuery(
+ // )
+ // );
+
+ */
+
+ }
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestElementIndex.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestElementIndex.java
new file mode 100644
index 0000000..07bafb8
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestElementIndex.java
@@ -0,0 +1,325 @@
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.index.Term;
+
+
+@RunWith(JUnit4.class)
+public class TestElementIndex {
+
+ // Todo: primary data as a non-indexed field separated.
+
+ @Test
+ public void indexExample1 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // <a>x<a>y<a>zhij</a>hij</a>hij</a>hij</a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z h i j h i j h i j ",
+ "[(0-3)s:x|<>:a#0-3$<i>12]" +
+ "[(3-6)s:y|<>:a#3-6$<i>9]" +
+ "[(6-9)s:z|<>:a#6-9$<i>6]" +
+ "[(9-12)s:h]" +
+ "[(12-15)s:i]" +
+ "[(15-18)s:j]" +
+ "[(18-21)s:h]" +
+ "[(21-24)s:i]" +
+ "[(24-27)s:j]" +
+ "[(27-30)s:h]" +
+ "[(30-33)s:i]" +
+ "[(33-36)s:j]");
+ ki.addDoc(fd);
+
+ // <a>x<a>y<a>zcde</a>cde</a>cde</a>cde</a>
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z c d e c d e c d e ",
+ "[(0-3)s:x|<>:a#0-3$<i>12]" +
+ "[(3-6)s:y|<>:a#3-6$<i>9]" +
+ "[(6-9)s:z|<>:a#6-9$<i>6]" +
+ "[(9-12)s:c]" +
+ "[(12-15)s:d]" +
+ "[(15-18)s:e]" +
+ "[(18-21)s:c]" +
+ "[(21-24)s:d]" +
+ "[(24-27)s:e]" +
+ "[(27-30)s:c]" +
+ "[(30-33)s:d]" +
+ "[(33-36)s:e]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(2, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 6, kr.totalResults());
+
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 12, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 1, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 9, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 2, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 6, kr.match(2).endPos);
+
+ assertEquals("StartPos (0)", 0, kr.match(3).startPos);
+ assertEquals("EndPos (0)", 12, kr.match(3).endPos);
+ assertEquals("StartPos (1)", 1, kr.match(4).startPos);
+ assertEquals("EndPos (1)", 9, kr.match(4).endPos);
+ assertEquals("StartPos (2)", 2, kr.match(5).startPos);
+ assertEquals("EndPos (2)", 6, kr.match(5).endPos);
+
+ // System.err.println(kr.toJSON());
+ };
+
+ @Test
+ public void indexExample2 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // <a><a><a>h</a>hhij</a>hij</a>hij</a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "h h i j h i j h i j ",
+ "[(0-3)s:h|<>:a#0-27$<i>6|<>:a#0-18$<i>3|<>:a#0-36$<i>9]" +
+ "[(3-6)s:h]" +
+ "[(12-15)s:i]" +
+ "[(15-18)s:j]" +
+ "[(18-21)s:h]" +
+ "[(21-24)s:i]" +
+ "[(24-27)s:j]" +
+ "[(27-30)s:h]" +
+ "[(30-33)s:i]" +
+ "[(33-36)s:j]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 3, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 6, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 9, kr.match(2).endPos);
+ };
+
+ @Test
+ public void indexExample3 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // <a><a><a>u</a></a></a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "xyz",
+ "[(0-3)s:xyz|<>:a#0-3$<i>0|<>:a#0-3$<i>0|<>:a#0-3$<i>0|<>:b#0-3$<i>0]");
+ ki.addDoc(fd);
+
+ // <a><b>x<a>y<a>zcde</a>cde</a>cde</b></a>
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z c d e c d e c d e ",
+ "[(0-3)s:x|<>:a#0-36$<i>12|<>:b#0-36$<i>12]" +
+ "[(3-6)s:y|<>:a#3-27$<i>9]" +
+ "[(6-9)s:z|<>:a#6-18$<i>6]" +
+ "[(9-12)s:c]" +
+ "[(12-15)s:d]" +
+ "[(15-18)s:e]" +
+ "[(18-21)s:c]" +
+ "[(21-24)s:d]" +
+ "[(24-27)s:e]" +
+ "[(27-30)s:c]" +
+ "[(30-33)s:d]" +
+ "[(33-36)s:e]");
+ ki.addDoc(fd);
+
+ // xyz
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z ",
+ "[(0-3)s:x]" +
+ "[(3-6)s:y]" +
+ "[(6-9)s:z]");
+ ki.addDoc(fd);
+
+ // <a>x<a><b>y<a>zcde</a>cde</b></a>cde</a>
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z k l m k l m k l m ",
+ "[(0-3)s:x|<>:a#0-3$<i>12]" +
+ "[(3-6)s:y|<>:a#3-6$<i>9|<>:b#3-6$<i>9]" +
+ "[(6-9)s:z|<>:a#6-9$<i>6]" +
+ "[(9-12)s:k]" +
+ "[(12-15)s:l]" +
+ "[(15-18)s:m]" +
+ "[(18-21)s:k]" +
+ "[(21-24)s:l]" +
+ "[(24-27)s:m]" +
+ "[(27-30)s:k]" +
+ "[(30-33)s:l]" +
+ "[(33-36)s:m]");
+ ki.addDoc(fd);
+
+ // <a><a><a>h</a>hhij</a>hij</a>hij</a>
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "h h i j h i j h i j ",
+ "[(0-3)s:h|<>:a#0-27$<i>6|<>:a#0-18$<i>3|<>:a#0-36$<i>9]" +
+ "[(3-6)s:h]" +
+ "[(12-15)s:i]" +
+ "[(15-18)s:j]" +
+ "[(18-21)s:h]" +
+ "[(21-24)s:i]" +
+ "[(24-27)s:j]" +
+ "[(27-30)s:h]" +
+ "[(30-33)s:i]" +
+ "[(33-36)s:j]");
+ ki.addDoc(fd);
+
+ // xyz
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "a b c ",
+ "[(0-3)s:a]" +
+ "[(3-6)s:b]" +
+ "[(6-9)s:c]");
+ ki.addDoc(fd);
+
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(6, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, (short) 15);
+
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 12, kr.totalResults());
+
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 0, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 0, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 0, kr.match(2).endPos);
+
+ assertEquals("StartPos (3)", 0, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 12, kr.match(3).endPos);
+ assertEquals("StartPos (4)", 1, kr.match(4).startPos);
+ assertEquals("EndPos (4)", 9, kr.match(4).endPos);
+ assertEquals("StartPos (5)", 2, kr.match(5).startPos);
+ assertEquals("EndPos (5)", 6, kr.match(5).endPos);
+
+ assertEquals("StartPos (6)", 0, kr.match(6).startPos);
+ assertEquals("EndPos (6)", 12, kr.match(6).endPos);
+ assertEquals("StartPos (7)", 1, kr.match(7).startPos);
+ assertEquals("EndPos (7)", 9, kr.match(7).endPos);
+ assertEquals("StartPos (8)", 2, kr.match(8).startPos);
+ assertEquals("EndPos (8)", 6, kr.match(8).endPos);
+
+ assertEquals("StartPos (9)", 0, kr.match(9).startPos);
+ assertEquals("EndPos (9)", 3, kr.match(9).endPos);
+ assertEquals("StartPos (10)", 0, kr.match(10).startPos);
+ assertEquals("EndPos (10)", 6, kr.match(10).endPos);
+ assertEquals("StartPos (11)", 0, kr.match(11).startPos);
+ assertEquals("EndPos (11)", 9, kr.match(11).endPos);
+ };
+
+
+ @Test
+ public void indexExample4 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "111111ccc222222fff333333iiijjj",
+ "[(0-3)s:a|_0#0-3]" +
+ "[(3-6)s:b|_1#3-6]" +
+ "[(6-9)s:c|_2#6-9]" +
+ "[(9-12)s:d|_3#9-12|<>:a#9-15$<i>4]" +
+ "[(12-15)s:e|_4#12-15]" +
+ "[(15-18)s:f|_5#15-18]" +
+ "[(18-21)s:g|_6#18-21|<>:a#18-24$<i>8]" +
+ "[(21-24)s:h|_7#21-24]" +
+ "[(24-27)s:i|_8#24-27]" +
+ "[(27-30)s:j|_9#27-30]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, 0, (short) 15, false, (short) 3, false, (short) 3);
+
+ assertEquals("... ccc[222222]fff ...", kr.match(0).getSnippetBrackets());
+ assertEquals("... fff[333333]iii ...", kr.match(1).getSnippetBrackets());
+ };
+
+
+ @Test
+ public void indexExample5 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "111111ccc222222fff333333iiijjj",
+ "[(0-3)s:a|_0#0-3|<>:a#0-6$<i>1]" +
+ "[(3-6)s:b|_1#3-6]" +
+ "[(6-9)s:c|_2#6-9]" +
+ "[(9-12)s:d|_3#9-12|<>:a#9-15$<i>4]" +
+ "[(12-15)s:e|_4#12-15]" +
+ "[(15-18)s:f|_5#15-18]" +
+ "[(18-21)s:g|_6#18-21|<>:a#18-24$<i>8]" +
+ "[(21-24)s:h|_7#21-24]" +
+ "[(24-27)s:i|_8#24-27]" +
+ "[(27-30)s:j|_9#27-30]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, 0, (short) 15, false, (short) 3, false, (short) 3);
+
+ assertEquals("[111111]ccc ...", kr.match(0).getSnippetBrackets());
+ assertEquals("... ccc[222222]fff ...", kr.match(1).getSnippetBrackets());
+ assertEquals("... fff[333333]iii ...", kr.match(2).getSnippetBrackets());
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
new file mode 100644
index 0000000..f228b93
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -0,0 +1,208 @@
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import com.fasterxml.jackson.annotation.*;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.KorapMatch;
+import de.ids_mannheim.korap.KorapDocument;
+import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.query.SpanMatchModifyQuery;
+import de.ids_mannheim.korap.query.SpanClassQuery;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.index.Term;
+
+// mvn -Dtest=TestWithinIndex#indexExample1 test
+
+@RunWith(JUnit4.class)
+public class TestFieldDocument {
+
+ @Test
+ public void indexExample1 () throws IOException {
+ FieldDocument fd = new FieldDocument();
+
+ fd.addString("corpusID", "WPD");
+ fd.addString("ID", "WPD-AAA-00001");
+ fd.addText("textClass", "music entertainment");
+ fd.addText("author", "Peter Frankenfeld");
+ fd.addInt("pubDate", 20130617);
+ fd.addText("title", "Wikipedia");
+ fd.addText("subTitle", "Die freie Enzyklopädie");
+ fd.addString("pubPlace", "Bochum");
+ fd.addInt("lastModified", 20130717);
+ fd.addTV("tokens",
+ "abc",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]");
+
+ assertEquals(fd.doc.getField("title").name(), "title");
+ assertEquals(fd.doc.getField("title").stringValue(), "Wikipedia");
+
+ assertEquals(fd.doc.getField("corpusID").name(), "corpusID");
+ assertEquals(fd.doc.getField("corpusID").stringValue(), "WPD");
+
+ assertEquals(fd.doc.getField("ID").name(), "ID");
+ assertEquals(fd.doc.getField("ID").stringValue(), "WPD-AAA-00001");
+
+ assertEquals(fd.doc.getField("subTitle").name(), "subTitle");
+ assertEquals(fd.doc.getField("subTitle").stringValue(), "Die freie Enzyklopädie");
+
+ assertEquals(fd.doc.getField("pubPlace").name(), "pubPlace");
+ assertEquals(fd.doc.getField("pubPlace").stringValue(), "Bochum");
+
+ assertEquals(fd.doc.getField("lastModified").name(), "lastModified");
+ assertEquals(fd.doc.getField("lastModified").stringValue(), "20130717");
+
+ assertEquals(fd.doc.getField("tokens").name(), "tokens");
+ assertEquals(fd.doc.getField("tokens").stringValue(), "abc");
+
+ assertEquals(fd.doc.getField("author").name(), "author");
+ assertEquals(fd.doc.getField("author").stringValue(), "Peter Frankenfeld");
+
+ assertEquals(fd.doc.getField("textClass").name(), "textClass");
+ assertEquals(fd.doc.getField("textClass").stringValue(), "music entertainment");
+ };
+
+ @Test
+ public void indexExample2 () throws IOException {
+
+ String json = new String(
+"{" +
+" \"fields\" : [" +
+" { "+
+" \"primaryData\" : \"abc\"" +
+" }," +
+" {" +
+" \"name\" : \"tokens\"," +
+" \"data\" : [" +
+" [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"]," +
+" [ \"s:b\", \"i:b\", \"_1#1-2\" ]," +
+" [ \"s:c\", \"i:c\", \"_2#2-3\" ]" +
+" ]" +
+" }" +
+" ]," +
+" \"corpusID\" : \"WPD\"," +
+" \"ID\" : \"WPD-AAA-00001\"," +
+" \"textClass\" : \"music entertainment\"," +
+" \"author\" : \"Peter Frankenfeld\"," +
+" \"pubDate\" : 20130617," +
+" \"title\" : \"Wikipedia\"," +
+" \"subTitle\" : \"Die freie Enzyklopädie\"," +
+" \"pubPlace\" : \"Bochum\"" +
+"}");
+
+ KorapIndex ki = new KorapIndex();
+ FieldDocument fd = ki.addDoc(json);
+
+ ki.commit();
+
+ assertEquals(fd.getPrimaryData(),"abc");
+ assertEquals(fd.getCorpusID(),"WPD");
+ assertEquals(fd.getID(),"WPD-AAA-00001");
+ assertEquals(fd.getTextClass(),"music entertainment");
+ assertEquals(fd.getAuthor(),"Peter Frankenfeld");
+ assertEquals(fd.getTitle(),"Wikipedia");
+ assertEquals(fd.getSubTitle(),"Die freie Enzyklopädie");
+ assertEquals(fd.getPubPlace(),"Bochum");
+ assertEquals(fd.getPubDate().toDisplay(),"2013-06-17");
+
+ KorapQuery kq = new KorapQuery("tokens");
+ KorapResult kr = ki.search((SpanQuery) kq.seq(kq._(3, kq.seg("s:b"))).toQuery());
+ KorapMatch km = kr.getMatch(0);
+
+ assertEquals(km.getPrimaryData(),"abc");
+ assertEquals(km.getCorpusID(),"WPD");
+ assertEquals(km.getID(),"WPD-AAA-00001");
+ assertEquals(km.getTextClass(),"music entertainment");
+ assertEquals(km.getAuthor(),"Peter Frankenfeld");
+ assertEquals(km.getTitle(),"Wikipedia");
+ assertEquals(km.getSubTitle(),"Die freie Enzyklopädie");
+ assertEquals(km.getPubPlace(),"Bochum");
+ assertEquals(km.getPubDate().toDisplay(),"2013-06-17");
+
+ assertEquals(km.getSnippetBrackets(),"a[{3:b}]c");
+ };
+
+ @Test
+ public void indexExample3 () throws IOException {
+
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+
+ // Indexing test files
+ for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
+ FieldDocument fd = ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+
+ // Start creating query
+ KorapQuery kq = new KorapQuery("tokens");
+
+ // within(<xip/const:NPA>, {1: {2: [cnx/p=A & mate/m=number:sg]}[opennlp/p=NN & tt/p=NN]})
+ SpanQuery query =
+ kq.within(
+ kq.tag("xip/const:NPA"),
+ kq._(1,
+ kq.seq(
+ kq._(2, kq.seg("cnx/p:A").with("mate/m:number:sg"))
+ ).append(
+ kq.seg("opennlp/p:NN").with("tt/p:NN")
+ )
+ )
+ ).toQuery();
+
+
+ KorapResult kr;
+ /*
+ kr = ki.search(query, 0, (short) 20, true, (short) 6, true, (short) 6);
+ System.err.println(kr.toJSON());
+ */
+
+ kr = ki.search(query, 0, (short) 5, true, (short) 2, false, (short) 5);
+ assertEquals(36, kr.totalResults());
+ assertEquals(5, kr.itemsPerPage());
+ assertEquals("... Texten eine [{1:{2:durchschnittliche} Häufigkeit}] von ...", kr.match(0).getSnippetBrackets());
+ assertEquals("... damit der [{1:{2:sechsthäufigste} Buchstabe}] in d ...", kr.match(1).getSnippetBrackets());
+ assertEquals("... A der [{1:{2:einzige} Buchstabe}] im D ...", kr.match(2).getSnippetBrackets());
+ assertEquals("... für den [offenen vorderen {1:{2:ungerundeten} Vokal}] a: A ...", kr.match(3).getSnippetBrackets());
+ assertEquals("... in eine [{1:{2:flache} Stellung}] nied ...", kr.match(4).getSnippetBrackets());
+
+
+ query = kq.seg("tt/l:Norwegen").toQuery();
+ kr = ki.search(query, 0, (short) 5, true, (short) 2, false, (short) 5);
+
+ assertEquals(3, kr.totalResults());
+ assertEquals("... Lofoten in [Norwegen], unt ...", kr.match(0).getSnippetBrackets());
+ assertEquals("WPD_AAA.00002", kr.match(0).getID());
+ assertEquals("... es in [Norwegen] noch ...", kr.match(1).getSnippetBrackets());
+ assertEquals("WPD_AAA.00002", kr.match(1).getID());
+ assertEquals("... Orte in [Norwegen]: Å i ...", kr.match(2).getSnippetBrackets());
+ assertEquals("WPD_AAA.00005", kr.match(2).getID());
+
+
+ query = kq.seg("tt/l:Vokal").without("mate/m:number:sg").toQuery();
+ kr = ki.search(query, 0, (short) 5, true, (short) 2, false, (short) 5);
+ assertEquals(1, kr.totalResults());
+ assertEquals("... reich an [Vokalen] war, ...", kr.match(0).getSnippetBrackets());
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestIndex.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
new file mode 100644
index 0000000..b74ebc1
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
@@ -0,0 +1,799 @@
+import java.util.*;
+import java.io.*;
+
+import de.ids_mannheim.korap.analysis.MultiTermToken;
+import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
+import de.ids_mannheim.korap.query.SpanWithinQuery;
+
+import static de.ids_mannheim.korap.Test.*;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.TermContext;
+
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.AtomicReaderContext;
+
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanNotQuery;
+import org.apache.lucene.search.spans.NearSpansOrdered;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.RegexpQuery;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.SimpleFSDirectory; // temporary
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestIndex { // extends LuceneTestCase {
+ // Create index in RAM
+ // private Directory index = new RAMDirectory();
+
+ private Directory index = new RAMDirectory();
+
+ @Test
+ public void multiTermToken () {
+ MultiTermToken test = new MultiTermToken("hunde", "pos:n", "m:gen:pl");
+ assertEquals(test.terms.get(0).term, "hunde");
+ assertEquals(test.terms.get(1).term, "pos:n");
+ assertEquals(test.terms.get(2).term, "m:gen:pl");
+ assertEquals(test.terms.get(0).posIncr, 1, 1);
+ assertEquals(test.terms.get(1).posIncr, 0, 1);
+ assertEquals(test.terms.get(2).posIncr, 0, 1);
+
+ test = new MultiTermToken("hunde", "pos:n", "m:gen:pl");
+ assertEquals(test.terms.get(0).term, "hunde");
+ assertEquals(test.terms.get(1).term, "pos:n");
+ assertEquals(test.terms.get(2).term, "m:gen:pl");
+ assertEquals(test.terms.get(0).posIncr, 1, 1);
+ assertEquals(test.terms.get(1).posIncr, 0, 1);
+ assertEquals(test.terms.get(2).posIncr, 0, 1);
+ };
+
+ private List initIndexer () throws IOException {
+ List<Map<String, String>> list = new ArrayList<>();
+
+ Map<String, String> d1 = new HashMap<String, String>();
+ d1.put("id", "w1");
+ d1.put("corpus", "wiki");
+ d1.put("author", "Nils Diewald");
+ d1.put("title", "Wikipedia");
+ d1.put("subtitle", "A test");
+ d1.put("pubDate", "20130701");
+ d1.put("pubPlace", "Mannheim");
+ d1.put("textClass", "news sports");
+ d1.put("textStr", "Er nahm den Hunden die Angst.");
+ d1.put("text", "Er#0-2|PPER|er|c:nom;p:3;n:sg;g:masc|<>:s#0-29$<i>7 " +
+ "nahm#3-7|VVFIN|nehmen|p:3;n:sg;t:past;m:ind| " +
+ "den#8-11|ART|der|c:acc;n:sg;g:masc| " +
+ "Hunden#12-18|NN|hund|c:acc;n:sg;g:masc| " +
+ "die#19-22|ART|der|c:nom;n:sg;g:fem| " +
+ "Angst#23-28|NN|angst|c:nom;n:sg;g:fem| " +
+ ".#28-29|$.|.||");
+ list.add(d1);
+
+ Map<String, String> d2 = new HashMap<String, String>();
+
+ d2.put("id", "w2");
+ d2.put("corpus", "wiki");
+ d2.put("author", "Peter Thomas");
+ d2.put("title", "Waldartikel");
+ d2.put("subtitle", "Another test");
+ d2.put("pubDate", "20130723");
+ d2.put("pubPlace", "Bielefeld");
+ d2.put("textClass", "news");
+ d2.put("textStr", "Sie liefen durch den Wald.");
+ d2.put("text", "Sie#0-3|PPER|sie|c:nom;p:3;n:pl;g:all|<>:s#0-26$<i>6 " +
+ "liefen#4-10|VVFIN|laufen|p:3;n:pl;t:past;m:ind| " +
+ "durch#11-16|APPR|durch|| " +
+ "den#17-20|ART|der|c:acc;n:sg;g:masc| " +
+ "Wald#21-25|NN|wald|c:acc;n:sg;g:masc| " +
+ ".#25-26|$.|.||");
+ list.add(d2);
+
+ Map<String, String> d3 = new HashMap<String, String>();
+ d3.put("id", "w3");
+ d3.put("corpus", "zeitung");
+ d3.put("author", "Michael Meier");
+ d3.put("title", "Angst");
+ d3.put("subtitle", "Starr vor Angst");
+ d3.put("pubDate", "20130713");
+ d3.put("pubPlace", "Bielefeld");
+ d3.put("textClass", "sports");
+ d3.put("textStr", "Er wagte nicht, sich zu ruehren. Er war starr vor Angst.");
+ d3.put("text", "Er#0-2|PPER|er|c:nom;n:sg;g:masc;p:3|<>:s#0-32$<i>8 " +
+ "wagte#3-8|VVFIN|wagen|p:3;n:sg;t:past;m:ind| " +
+ "nicht#9-14|PTKNEG|nicht|| " +
+ ",#14-15|$,|,|| " +
+ "sich#16-20|PRF|sich|c:acc;p:3;n:sg| " +
+ "zu#21-23|PTKZU|zu|| " +
+ "ruehren#24-31|VVFIN|ruehren|| " +
+ ".#31-32|$.|.|| " +
+ "Er#33-35|PPER|er|c:nom;p:3;n:sg;g:masc|<>:s#33-56$<i>14 " +
+ "war#36-39|VAFIN|sein|p:3;n:sg;t:past;m:ind| " +
+ "starr#40-45|ADJD|starr|comp:pos| " +
+ "vor#46-49|APPR|vor|| " +
+ "Angst#50-55|NN|angst|c:dat;n:sg;g:fem| " +
+ ".#55-56|$.|.||");
+ list.add(d3);
+
+ return list;
+ };
+
+ @Test
+ public void indexLucene () throws IOException {
+
+ // Base analyzer for searching and indexing
+ StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
+
+ // Based on
+ // http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/
+ // analysis/Analyzer.html?is-external=true
+
+ // Create configuration with base analyzer
+ IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
+
+ // Add a document 1 with the correct fields
+ IndexWriter w = new IndexWriter(index, config);
+
+ Collection docs = initIndexer();
+
+ @SuppressWarnings("unchecked")
+ Iterator<Map<String,String>> i = (Iterator<Map<String,String>>) docs.iterator();
+
+ for (; i.hasNext(); ) {
+ addDoc(w, i.next());
+ };
+
+ assertEquals(3, w.numDocs());
+
+ w.close();
+
+ // Check directory
+ DirectoryReader reader = DirectoryReader.open( index );
+ assertEquals(docs.size(), reader.maxDoc());
+ assertEquals(docs.size(), reader.numDocs());
+
+ // Check searcher
+ IndexSearcher searcher = new IndexSearcher( reader );
+
+ // textClass
+ // All texts of text class "news"
+ assertEquals(2,
+ searcher.search(
+ new TermQuery(
+ new Term("textClass", "news")
+ ), 10
+ ).totalHits
+ );
+
+ // textClass
+ // All texts of text class "sports"
+ assertEquals(2,
+ searcher.search(
+ new TermQuery(
+ new Term("textClass", "sports")
+ ), 10
+ ).totalHits
+ );
+
+ // TextIndex
+ // All docs containing "l:nehmen"
+ assertEquals(1,
+ searcher.search(
+ new TermQuery(
+ new Term("text", "l:nehmen")
+ ), 10
+ ).totalHits
+ );
+
+ // TextIndex
+ // All docs containing "s:den"
+ assertEquals(2,
+ searcher.search(
+ new TermQuery(
+ new Term("text", "s:den")
+ ), 10
+ ).totalHits
+ );
+
+ /*
+ assertEquals(3,
+ searcher.search(
+ new TermQuery(
+ new Term("text", "T")
+ ), 10
+ ).totalHits
+ );
+ */
+
+ // BooleanQuery
+ // All docs containing "s:den" and "l:sie"
+ TermQuery s_den = new TermQuery(new Term("text", "s:den"));
+ TermQuery l_sie = new TermQuery(new Term("text", "l:sie"));
+ BooleanQuery bool = new BooleanQuery();
+ bool.add(s_den, BooleanClause.Occur.MUST);
+ bool.add(l_sie, BooleanClause.Occur.MUST);
+
+ assertEquals(1, searcher.search(bool, 10).totalHits);
+
+ // BooleanQuery
+ // All docs containing "s:den" or "l:sie"
+ bool = new BooleanQuery();
+ bool.add(s_den, BooleanClause.Occur.SHOULD);
+ bool.add(l_sie, BooleanClause.Occur.SHOULD);
+ assertEquals(2, searcher.search(bool, 10).totalHits);
+
+
+ // RegexpQuery
+ // All docs containing ".{4}en" (liefen und Hunden)
+ RegexpQuery srquery = new RegexpQuery(
+ new Term("text", "s:.{4}en")
+ );
+ assertEquals(2, searcher.search(srquery, 10).totalHits);
+
+ // RegexpQuery
+ // All docs containing "E." (Er) (2x)
+ srquery = new RegexpQuery(
+ new Term("text", "s:E.")
+ );
+ assertEquals(2, searcher.search(srquery, 10).totalHits);
+
+ SpanRegexQueryWrapper ssrquery = new SpanRegexQueryWrapper("text", "s:E.");
+ assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
+
+
+ // RegexpQuery
+ // All docs containing "E." (er) (0x)
+ srquery = new RegexpQuery(
+ new Term("text", "s:e.")
+ );
+ assertEquals(0, searcher.search(srquery, 10).totalHits);
+
+ ssrquery = new SpanRegexQueryWrapper("text", "s:e.");
+ assertEquals(0, searcher.search(ssrquery.toQuery(), 10).totalHits);
+
+ // Check http://comments.gmane.org/gmane.comp.jakarta.lucene.user/52283
+ // for Carstens question on wildcards
+
+ // RegexpQuery
+ // All docs containing "E."/i ([Ee]r) (2x)
+ srquery = new RegexpQuery(
+ new Term("text", "i:e.")
+ );
+ assertEquals(2, searcher.search(srquery, 10).totalHits);
+
+ ssrquery = new SpanRegexQueryWrapper("text", "s:e.", true);
+ assertEquals("SpanMultiTermQueryWrapper(text:/i:e./)", ssrquery.toQuery().toString());
+ assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
+
+ // All docs containing "ng"/x (Angst) (2x)
+ srquery = new RegexpQuery(
+ new Term("text", "s:.*ng.*")
+ );
+ assertEquals(2, searcher.search(srquery, 10).totalHits);
+
+ // [base=angst]
+ SpanTermQuery stq = new SpanTermQuery(new Term("text", "l:angst"));
+ assertEquals(2, searcher.search(srquery, 10).totalHits);
+
+ // vor Angst
+ // [orth=vor][orth=Angst]
+ SpanNearQuery snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "s:vor")),
+ new SpanTermQuery(new Term("text", "s:Angst"))
+ },
+ 1,
+ true
+ );
+ assertEquals(1, searcher.search(snquery, 10).totalHits);
+
+ // Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
+ snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "p:VVFIN")),
+ new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery()
+ },
+ 5, // slop
+ true // inOrder
+ // Possible: CollectPayloads
+ );
+ assertEquals(1, searcher.search(snquery, 10).totalHits);
+
+
+ // Spannearquery [p:VVFIN][m:acc:sg:masc]
+ snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "p:VVFIN")),
+ new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "m:c:acc")),
+ new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "m:n:sg")),
+ new SpanTermQuery(new Term("text", "m:g:masc"))
+ },
+ -1,
+ false
+ )
+ },
+ -1, // slop
+ false // inOrder
+ // Possible: CollectPayloads
+ )
+ // new SpanTermQuery(new Term("text", "m:-acc:--sg:masc"))
+ },
+ 0, // slop
+ true // inOrder
+ // Possible: CollectPayloads
+ );
+ assertEquals(1, searcher.search(snquery, 10).totalHits);
+
+
+ // Spannearquery [p:VVFIN|m:3:sg:past:ind]
+ // Exact match!
+ snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "p:VVFIN")),
+ new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "m:p:3")),
+ new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "m:n:sg")),
+ new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "m:t:past")),
+ new SpanTermQuery(new Term("text", "m:m:ind")),
+ },
+ -1,
+ false
+ )
+ },
+ -1,
+ false
+ )
+ },
+ -1,
+ false
+ )
+ },
+ // new SpanTermQuery(new Term("text", "m:---3:--sg:past:-ind"))
+ -1, // slop
+ false // inOrder
+ // Possible: CollectPayloads
+ );
+ assertEquals(2, searcher.search(snquery, 10).totalHits);
+
+ // To make sure, this is not equal:
+ // Spannearquery [p:VVFIN & m:3:sg:past:ind]
+ // Exact match!
+ // Maybe it IS equal
+ snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "p:VVFIN")),
+ new SpanTermQuery(new Term("text", "m:p:3")),
+ new SpanTermQuery(new Term("text", "m:n:sg")),
+ new SpanTermQuery(new Term("text", "m:t:past")),
+ new SpanTermQuery(new Term("text", "m:m:ind")),
+ },
+ -1, // slop
+ false // inOrder
+ // Possible: CollectPayloads
+ );
+ assertNotEquals(2, searcher.search(snquery, 10).totalHits);
+ // assertEquals(2, searcher.search(snquery, 10).totalHits);
+
+ // Spannearquery [p:VVFIN & m:3:sg & past:ind]
+ SpanSegmentQueryWrapper sniquery = new SpanSegmentQueryWrapper(
+ "text",
+ "p:VVFIN",
+ "m:p:3",
+ "m:n:sg",
+ "m:t:past",
+ "m:m:ind"
+ );
+ assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
+
+
+ // Todo:
+
+ /*
+ sniquery = new SpanSegmentQuery(
+ "text",
+ "p:VVFIN",
+ "m:p:3",
+ "m:n:sg",
+ "m:t:past",
+ "m:m:ind"
+ );
+ */
+
+ // Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
+ snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "p:VVFIN")),
+ new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery()
+ },
+ 5, // slop
+ true // inOrder
+ // Possible: CollectPayloads
+ );
+ assertEquals(1, searcher.search(snquery, 10).totalHits);
+
+ sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past", "m:m:ind", "m:n:sg");
+ assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
+
+ // [p = VVFIN & m:p = 3 & m:t = past & m:n != pl] or
+ // [p = VVFIN & m:p = 3 & m:t = past & !m:n = pl]
+ // TODO: Problem: What should happen in case the category does not exist?
+ // pssible solution: & ( m:n != pl & exists(m:n))
+ sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past");
+ SpanQuery snqquery = new SpanNotQuery(sniquery.toQuery(), new SpanTermQuery(new Term("text", "m:n:pl")));
+ assertEquals(2, searcher.search(snqquery, 10).totalHits);
+
+ // [p = NN & (m:c: = dat | m:c = acc)]
+ snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery(new Term("text", "p:NN")),
+ new SpanOrQuery(
+ new SpanTermQuery( new Term("text", "m:c:nom" )),
+ new SpanTermQuery( new Term("text", "m:c:acc" ))
+ )
+ },
+ -1,
+ false
+ );
+
+ assertEquals(2, searcher.search(snqquery, 10).totalHits);
+
+ // [p = NN & !(m:c: = nom | m:c = acc)]
+ snqquery = new SpanNotQuery(
+ new SpanTermQuery(new Term("text", "p:NN")),
+ new SpanOrQuery(
+ new SpanTermQuery( new Term("text", "m:c:nom" )),
+ new SpanTermQuery( new Term("text", "m:c:acc" ))
+ )
+ );
+ assertEquals(1, searcher.search(snqquery, 10).totalHits);
+
+ // [p = NN & !(m:c = nom)]
+ snqquery = new SpanNotQuery(
+ new SpanTermQuery( new Term("text", "p:NN")),
+ new SpanTermQuery( new Term("text", "m:c:nom" ))
+ );
+ assertEquals(3, searcher.search(snqquery, 10).totalHits);
+
+ // [p=NN & !(m:c = acc)]
+ snqquery = new SpanNotQuery(
+ new SpanTermQuery( new Term("text", "p:NN")),
+ new SpanTermQuery( new Term("text", "m:c:acc" ))
+ );
+ assertEquals(2, searcher.search(snqquery, 10).totalHits);
+
+ // [p=PPER][][p=ART]
+ snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery( new Term("text", "p:PPER")),
+ new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery( new Term("text", "T")),
+ new SpanTermQuery( new Term("text", "p:ART"))
+ },
+ 0,
+ true),
+ },
+ 0,
+ true
+ );
+ assertEquals(1, searcher.search(snquery, 10).totalHits);
+
+
+ // Todo:
+ // [orth=się][]{2,4}[base=bać]
+ // [orth=się][orth!="[.!?,:]"]{,5}[base=bać]|[base=bać][base="on|ja|ty|my|wy"]?[orth=się]
+ // [pos=subst & orth="a.*"]{2}
+ // [tag=subst:sg:nom:n]
+ // [case==acc & case==gen] ??
+ // [case~acc & case~gen]
+ // [case~~acc]
+ // [base=bać][orth!=się]+[orth=się] within s
+
+ // [][][p:VAFIN] within s
+ // [][p:VAFIN] within s
+
+
+ // [][][p:VAFIN]
+ snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery( new Term("text", "T") ),
+ new SpanTermQuery( new Term("text", "T") )
+ },
+ 0,
+ true
+ ),
+ new SpanTermQuery( new Term("text", "p:VAFIN") )
+ },
+ 0,
+ true
+ );
+ assertEquals(1, searcher.search(snquery, 10).totalHits);
+
+/*
+http://stackoverflow.com/questions/1311199/finding-the-position-of-search-hits-from-lucene
+*/
+
+ StringBuilder payloadString = new StringBuilder();
+ Map<Term, TermContext> termContexts = new HashMap<>();
+ for (AtomicReaderContext atomic : reader.leaves()) {
+ Bits bitset = atomic.reader().getLiveDocs();
+ // Spans spans = NearSpansOrdered();
+ Spans spans = snquery.getSpans(atomic, bitset, termContexts);
+
+ while (spans.next()) {
+ int docid = atomic.docBase + spans.doc();
+ if (spans.isPayloadAvailable()) {
+ for (byte[] payload : spans.getPayload()) {
+ /* retrieve payload for current matching span */
+ payloadString.append(new String(payload));
+ payloadString.append(" | ");
+ };
+ };
+ };
+ };
+ // assertEquals(33, payloadString.length());
+ assertEquals(0, payloadString.length());
+
+
+
+ // [][][p:VAFIN]
+ // without collecting payloads
+ snquery = new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanTermQuery( new Term("text", "T") ),
+ new SpanTermQuery( new Term("text", "T") )
+ },
+ 0,
+ true,
+ false
+ ),
+ new SpanTermQuery( new Term("text", "p:VAFIN") )
+ },
+ 0,
+ true,
+ false
+ );
+ assertEquals(1, searcher.search(snquery, 10).totalHits);
+
+ payloadString = new StringBuilder();
+ termContexts = new HashMap<>();
+ for (AtomicReaderContext atomic : reader.leaves()) {
+ Bits bitset = atomic.reader().getLiveDocs();
+ // Spans spans = NearSpansOrdered();
+ Spans spans = snquery.getSpans(atomic, bitset, termContexts);
+
+ while (spans.next()) {
+ int docid = atomic.docBase + spans.doc();
+ for (byte[] payload : spans.getPayload()) {
+ /* retrieve payload for current matching span */
+ payloadString.append(new String(payload));
+ payloadString.append(" | ");
+ };
+ };
+ };
+ assertEquals(0, payloadString.length());
+
+
+ // [][][p:VAFIN] in s
+ //([e:s:<][]*[T] | [T & e:s:<]) [T] ([p:VAFIN & e:s:>] | [T][]*[e:s:>]
+
+ /*
+ SpanSegmentWithinQuery ssequery = new SpanSegmentWithinQuery(
+ "text","s", new SpanSegmentSequenceQuery("text", "T", "T", "p:VAFIN")
+ );
+ assertEquals(0, searcher.search(ssequery.toQuery(), 10).totalHits);
+
+ payloadString = new StringBuilder();
+ termContexts = new HashMap<>();
+ for (AtomicReaderContext atomic : reader.leaves()) {
+ Bits bitset = atomic.reader().getLiveDocs();
+ // Spans spans = NearSpansOrdered();
+ Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
+
+ while (spans.next()) {
+ int docid = atomic.docBase + spans.doc();
+ for (byte[] payload : spans.getPayload()) {
+ /// retrieve payload for current matching span
+ payloadString.append(new String(payload));
+ payloadString.append(" | ");
+ };
+ };
+ };
+ assertEquals(0, payloadString.length(), 1);
+
+ ssequery = new SpanSegmentWithinQuery(
+ "text","s", new SpanSegmentSequenceQuery("text", "T", "p:VAFIN")
+ );
+
+ assertEquals("for " + ssequery.toQuery(),
+ 1, searcher.search(ssequery.toQuery(), 10).totalHits);
+
+ payloadString = new StringBuilder();
+ termContexts = new HashMap<>();
+ for (AtomicReaderContext atomic : reader.leaves()) {
+ Bits bitset = atomic.reader().getLiveDocs();
+ // Spans spans = NearSpansOrdered();
+ Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
+
+ while (spans.next()) {
+ int docid = atomic.docBase + spans.doc();
+ for (byte[] payload : spans.getPayload()) {
+ // retrieve payload for current matching span
+ payloadString.append(new String(payload));
+ payloadString.append(" | ");
+ };
+ fail("Doc: " + docid + " with " + spans.start() + "-" + spans.end() + " || " + payloadString.toString());
+ };
+ };
+ assertEquals(20, payloadString.length());
+
+ */
+
+ // --------------------______>
+
+
+
+ // Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), ssequery.toQuery());
+ /*
+ TopDocs topDocs = is.search(snq, 1);
+ Set<String> payloadSet = new HashSet<String>();
+ for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+ while (spans.next()) {
+ Collection<byte[]> payloads = spans.getPayload();
+
+ for (final byte [] payload : payloads) {
+ payloadSet.add(new String(payload, "UTF-8"));
+ }
+ }
+ }
+ */
+
+
+ /*
+Alternativ:
+ IndexReader reader = writer.getReader();
+ writer.close();
+ IndexSearcher searcher = newSearcher(reader);
+
+ PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
+
+ Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
+ if(VERBOSE)
+ System.out.println("Num payloads:" + payloads.size());
+ for (final byte [] bytes : payloads) {
+ if(VERBOSE)
+ System.out.println(new String(bytes, "UTF-8"));
+ }
+*/
+
+
+
+ /* new: */
+
+ // PayloadHelper helper = new PayloadHelper();
+
+ // Map<Term, TermContext> termContexts = new HashMap<>();
+//Spans spans;
+//spans = snquery.getSpans(searcher.getIndexReader());
+// searcher = helper.setUp(similarity, 1000);
+ /*
+ IndexReader reader = search.getReader(querycontainer.getFoundry());
+ Spans luceneSpans;
+ Bits bitset = atomic.reader().getLiveDocs();
+ for (byte[] payload : luceneSpans.getPayload())
+
+ /* Iterate over all matching documents */
+ /*
+ while (luceneSpans.next() && total < config.getMaxhits()) {
+ Span matchSpan;
+ StringBuilder payloadString = new StringBuilder();
+ int docid = atomic.docBase + luceneSpans.doc();
+ String docname = search.retrieveDocname(docid,
+ querycontainer.getFoundry());
+ total++;
+
+ for (byte[] payload : luceneSpans.getPayload())
+ */
+ /* retrieve payload for current matching span */
+ // payloadString.append(new String(payload));
+
+ /* create span containing result */
+ /*
+ matchSpan = new Span(docname);
+ matchSpan.setIndexdocid(docid);
+ matchSpan.setLayer(querycontainer.getLayer());
+ matchSpan.storePayloads(payloadString.toString());
+ matchSpans.add(matchSpan);
+*/
+ /*
+ * topdocs = searcher.search(new ConstantScoreQuery(corpusQ add
+ * position to list of positions to be considered for later
+ * searches
+ */
+ /*
+ validValues.put(docname,
+ matchSpan.getPayload(config.getPrefix()));
+ }
+*/
+
+
+ // Todo: API made by add() typisiert für queries, strings
+
+ // SpanPayloadCheckQuery for sentences!
+
+ /* Support regular expression in SpanSegmentQuery */
+ // new Regexp();
+ // new Term();
+
+ /*
+ Vielleicht: spanSegmentQuery(new Term(), new Wildcard(), new Regex());
+ */
+
+ // And Not ->
+ // SpanTermDiffQuery
+
+ /*
+ SpanNearQuery poquery = new SpanNearQuery(
+
+ );
+ */
+
+ reader.close();
+
+
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java
new file mode 100644
index 0000000..a4ba6a2
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java
@@ -0,0 +1,75 @@
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+
+@RunWith(JUnit4.class)
+public class TestKorapIndex {
+
+ @Test
+ public void indexExample () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ FieldDocument fd = new FieldDocument();
+
+ fd.addString("name", "Peter");
+ fd.addInt("zahl1", 56);
+ fd.addInt("zahl2", "58");
+ fd.addText("teaser", "Das ist der Name der Rose");
+ fd.addTV("base", "ich bau", "[(0-3)s:ich|l:ich|p:PPER|-:sentences#-$<i>2][(4-7)s:bau|l:bauen|p:VVFIN]");
+ ki.addDoc(fd);
+
+ fd = new FieldDocument();
+
+ fd.addString("name", "Hans");
+ fd.addInt("zahl1", 14);
+ fd.addText("teaser", "Das Sein");
+
+ MultiTermTokenStream mtts = fd.newMultiTermTokenStream();
+ mtts.addMultiTermToken("s:wir#0-3", "l:wir", "p:PPER");
+ mtts.addMultiTermToken("s:sind#4-8", "l:sein", "p:VVFIN");
+ mtts.addMeta("sentences", (int) 5);
+ fd.addTV("base", "wir sind", mtts);
+
+ ki.addDoc(fd);
+
+ /* Save documents */
+ ki.commit();
+
+ assertEquals(2, ki.numberOf("documents"));
+ assertEquals(7, ki.numberOf("sentences"));
+
+
+ fd = new FieldDocument();
+
+ fd.addString("name", "Frank");
+ fd.addInt("zahl1", 59);
+ fd.addInt("zahl2", 65);
+ fd.addText("teaser", "Noch ein Versuch");
+ fd.addTV("base", "ich bau", "[(0-3)s:der|l:der|p:DET|-:sentences#-$<i>3][(4-8)s:baum|l:baum|p:NN]");
+ ki.addDoc(fd);
+
+ /* Save documents */
+ ki.commit();
+
+ assertEquals(3, ki.numberOf("documents"));
+ assertEquals(10, ki.numberOf("sentences"));
+
+
+ // KorapQuery kq = new KorapQuery("text");
+ // ki.search();
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
new file mode 100644
index 0000000..c081f7c
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
@@ -0,0 +1,186 @@
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.query.SpanMatchModifyQuery;
+import de.ids_mannheim.korap.query.SpanClassQuery;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.index.Term;
+
+// mvn -Dtest=TestWithinIndex#indexExample1 test
+
+// match is shrink and split
+
+@RunWith(JUnit4.class)
+public class TestMatchIndex {
+
+ @Test
+ public void indexExample1 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:b")),
+ new SpanClassQuery(
+ new SpanTermQuery(new Term("base", "s:a"))
+ )
+ );
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 1, kr.totalResults());
+ assertEquals("StartPos (0)", 7, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 9, kr.match(0).endPos);
+ assertEquals("SnippetBrackets (0)", "... bcabca[b{a}]c", kr.match(0).snippetBrackets());
+
+
+ sq = new SpanMatchModifyQuery(
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:b")),
+ new SpanClassQuery(
+ new SpanTermQuery(new Term("base", "s:a"))
+ )
+ )
+ );
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 1, kr.totalResults());
+ assertEquals("StartPos (0)", 8, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 9, kr.match(0).endPos);
+ assertEquals("SnippetBrackets (0)", "... cabcab[a]c", kr.match(0).snippetBrackets());
+
+ sq = new SpanMatchModifyQuery(
+ new SpanNextQuery(
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)
+ ), (byte) 3
+ );
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 3, kr.totalResults());
+ assertEquals("StartPos (0)", 1, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 2, kr.match(0).endPos);
+ assertEquals("SnippetBrackets (0)", "a[b]cabcab ...", kr.match(0).snippetBrackets());
+ assertEquals("StartPos (1)", 4, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 5, kr.match(1).endPos);
+ assertEquals("SnippetBrackets (1)", "abca[b]cabac", kr.match(1).snippetBrackets());
+ assertEquals("StartPos (2)", 7, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 8, kr.match(2).endPos);
+ assertEquals("SnippetBrackets (2)", "... bcabca[b]ac", kr.match(2).snippetBrackets());
+
+
+
+ // abcabcabac
+ sq = new SpanMatchModifyQuery(
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:a")),
+ new SpanClassQuery(
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:b")),
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")))
+ ), (byte) 2
+ )), (byte) 2);
+
+ kr = ki.search(sq, (short) 10);
+
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 1, kr.totalResults());
+ assertEquals("SnippetBrackets (0)", "... bcabca[b{a}]c", kr.match(0).snippetBrackets());
+ assertEquals("SnippetHTML (0)", "<span class=\"korap-more-left\"></span>bcabca<span class=\"korap-match\">b<span class=\"korap-highlight korap-class-0\">a</span></span>c", kr.match(0).snippetHTML());
+
+ // Offset tokens
+ kr = ki.search(sq, 0, (short) 10, true, (short) 2, true, (short) 2);
+ assertEquals("totalResults", 1, kr.totalResults());
+ assertEquals("SnippetBrackets (0)", "... ca[b{a}]c", kr.match(0).snippetBrackets());
+ // Offset Characters
+ kr = ki.search(sq, 0, (short) 10, false, (short) 1, false, (short) 0);
+ assertEquals("totalResults", 1, kr.totalResults());
+ assertEquals("SnippetBrackets (0)", "... a[b{a}] ...", kr.match(0).snippetBrackets());
+
+ // System.err.println(kr.toJSON());
+
+ sq = new SpanMatchModifyQuery(
+ new SpanNextQuery(
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 1),
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:c")), (byte) 2)
+ ), (byte) 3
+ );
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 2, kr.totalResults());
+ assertEquals("StartPos (0)", 1, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.match(0).endPos);
+ assertEquals("SnippetBrackets (0)", "a[{1:b}{2:c}]abcaba ...", kr.match(0).snippetBrackets());
+ assertEquals("StartPos (1)", 4, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 6, kr.match(1).endPos);
+ assertEquals("SnippetBrackets (1)", "abca[{1:b}{2:c}]abac", kr.match(1).snippetBrackets());
+
+ assertEquals("Document count", 1, ki.numberOf("documents"));
+ assertEquals("Token count", 10, ki.numberOf("t"));
+
+
+ sq = new SpanMatchModifyQuery(
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:a")),
+ new SpanClassQuery(
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:b")),
+ new SpanTermQuery(new Term("base", "s:c"))
+ )
+ )
+ )
+ );
+
+ kr = ki.search(sq, (short) 2);
+
+ assertEquals("totalResults", 2, kr.totalResults());
+ assertEquals("StartPos (0)", 1, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.match(0).endPos);
+ assertEquals("SnippetBrackets (0)", "a[bc]abcaba ...", kr.match(0).snippetBrackets());
+ assertEquals("StartPos (1)", 4, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 6, kr.match(1).endPos);
+ assertEquals("SnippetBrackets (1)", "abca[bc]abac", kr.match(1).snippetBrackets());
+
+ assertEquals(1, ki.numberOf("documents"));
+ assertEquals(10, ki.numberOf("t"));
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
new file mode 100644
index 0000000..a3860ea
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
@@ -0,0 +1,253 @@
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanWithinQuery;
+
+import org.apache.lucene.index.Term;
+
+// mvn -Dtest=TestWithinIndex#indexExample1 test
+
+@RunWith(JUnit4.class)
+public class TestNextIndex {
+
+ // Todo: primary data as a non-indexed field separated.
+
+ @Test
+ public void indexExample1 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:a")),
+ new SpanTermQuery(new Term("base", "s:b"))
+ );
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 3, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 2, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 3, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 5, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 6, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 8, kr.match(2).endPos);
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:b")),
+ new SpanTermQuery(new Term("base", "s:c"))
+ );
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 2, kr.totalResults());
+ assertEquals("StartPos (0)", 1, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 4, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 6, kr.match(1).endPos);
+
+ assertEquals(1, ki.numberOf("documents"));
+ assertEquals(10, ki.numberOf("t"));
+
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:a")),
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:b")),
+ new SpanTermQuery(new Term("base", "s:c"))
+ )
+ );
+
+ kr = ki.search(sq, (short) 2);
+
+ assertEquals("totalResults", 2, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 3, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 6, kr.match(1).endPos);
+
+ assertEquals(1, ki.numberOf("documents"));
+ assertEquals(10, ki.numberOf("t"));
+
+ };
+
+ @Test
+ public void indexExample2 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4|<>:x#3-7$<i>7]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:c")),
+ new SpanElementQuery("base", "x")
+ );
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals("ab[cabca]bac", kr.match(0).getSnippetBrackets());
+
+ };
+
+ @Test
+ public void indexExample3 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4|<>:x#3-7$<i>7]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanNextQuery(
+ new SpanElementQuery("base", "x"),
+ new SpanTermQuery(new Term("base", "s:b"))
+ );
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals("abc[abcab]ac", kr.match(0).getSnippetBrackets());
+ };
+
+ @Test
+ public void indexExample4 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4|<>:x#3-7$<i>7]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]<>:x#6-8$<i>8]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-2");
+ fd.addTV("base",
+ "xbzxbzxbxz",
+ "[(0-1)s:x|i:x|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:z|i:z|_2#2-3]" +
+ "[(3-4)s:x|i:x|_3#3-4|<>:x#3-7$<i>7]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:z|i:z|_5#5-6]" +
+ "[(6-7)s:x|i:x|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:x|i:x|_8#8-9]" +
+ "[(9-10)s:z|i:z|_9#9-10]");
+ ki.addDoc(fd);
+
+
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanNextQuery(
+ new SpanElementQuery("base", "x"),
+ new SpanTermQuery(new Term("base", "s:b"))
+ );
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals(2, kr.totalResults());
+ assertEquals("abc[abcab]ac", kr.match(0).getSnippetBrackets());
+ assertEquals("xbz[xbzxb]xz", kr.match(1).getSnippetBrackets());
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:c")),
+ new SpanElementQuery("base", "x")
+ );
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals(1, kr.totalResults());
+ assertEquals("ab[cabca]bac", kr.match(0).getSnippetBrackets());
+
+ sq = new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:z")),
+ new SpanElementQuery("base", "x")
+ );
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals(1, kr.totalResults());
+ assertEquals("xb[zxbzx]bxz", kr.match(0).getSnippetBrackets());
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestPosToOffset.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestPosToOffset.java
new file mode 100644
index 0000000..d656275
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestPosToOffset.java
@@ -0,0 +1,85 @@
+import java.util.*;
+import java.io.*;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanWithinQuery;
+import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.query.SpanClassQuery;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+import de.ids_mannheim.korap.index.PositionsToOffset;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.util.DocIdBitSet;
+import org.apache.lucene.util.Bits;
+
+import java.nio.ByteBuffer;
+
+
+@RunWith(JUnit4.class)
+public class TestPosToOffset {
+
+ @Test
+ public void indexExample1 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "a b c",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" +
+ "[(2-3)s:b|i:b|_1#2-3]" +
+ "[(4-5)s:c|i:c|_2#4-5]");
+ ki.addDoc(fd);
+
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z",
+ "[(0-1)s:x|i:x|_0#0-2|-:t$<i>3]" +
+ "[(3-4)s:y|i:y|_1#3-4]" +
+ "[(6-7)s:z|i:z|_2#6-7]"); // 3
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ String field = "base";
+
+ for (AtomicReaderContext atomic : ki.reader().leaves()) {
+ PositionsToOffset pto = new PositionsToOffset(atomic, field);
+
+ pto.add(0,1);
+ pto.add(0,2);
+ pto.add(1,2);
+ pto.add(1,1);
+ pto.add(1,20);
+
+ assertEquals("Start 0,1", pto.start(0,1), 2);
+ assertEquals("End 0,1", pto.end(0,1), 3);
+
+ assertEquals("Start 0,2", pto.start(0,2), 4);
+ assertEquals("End 0,2", pto.end(0,2), 5);
+
+ assertEquals("Start 1,2", pto.start(1,2), 6);
+ assertEquals("End 1,2", pto.end(1,2), 7);
+
+ assertEquals("Start 1,1", pto.start(1,1), 3);
+ assertEquals("End 1,1", pto.end(1,1), 4);
+
+ assertEquals("Start 1,20", pto.start(1,20), 0);
+ assertEquals("End 1,20", pto.end(1,20), -1);
+ };
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
new file mode 100644
index 0000000..b88a10b
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -0,0 +1,38 @@
+ /*
+
+within(x,y)
+
+SpanRelationQuery->
+rel("SUBJ", query1, query2)
+
+1. return all words that are subjects of (that are linked by the “SUBJ” relation to) the string “beginnt”
+xip/syntax-dep_rel:beginnt >[func=”SUBJ”] xip/syntax-dep_rel:.*
+-> rel("SUBJ", highlight(query1), new TermQuery("s:beginnt"))
+
+
+SUBJ ist modelliert mit offset für den gesamten Bereich
+
+https://de.wikipedia.org/wiki/Dependenzgrammatik
+
+im regiert Wasser
+dass die Kinder im Wasser gespielt haben
+3. im#16-18$
+3. >:COORD#16-25$3,4
+4. Wasser#19-25$
+4. <:COORD#16-25$3,4
+
+# okay: return all main verbs that have no “SUBJ” relation specified
+
+
+# Not okay: 5. return all verbs with (at least?) 3 outgoing relations [think of ditransitives such as give]
+
+xip/morph_pos:VERB & xip/token:.* & xip/token:.* & xip/token:.* & xip/token:.* & #1 _=_#2 & #2 >[func=$x] #3 & #2 >[func=$x]#4 & #2 >[func=$x] #5
+
+# Okay: return all verbs that have singular SUBJects and dative OBJects
+
+xip/morph_pos:VERB & mpt/morph_msd:Sg & mpt/morph_msd:Dat & #1 >[func=”SUBJ”] #2 & #1 >[func=”OBJ”] #3
+
+-> [p:VVFIN](>SUBJ[nr:sg] & >OBJ[c:dat])
+
+
+ */
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java b/trunk/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
new file mode 100644
index 0000000..2baa433
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
@@ -0,0 +1,949 @@
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanWithinQuery;
+import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.query.SpanClassQuery;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.index.Term;
+
+import java.nio.ByteBuffer;
+
+// mvn -Dtest=TestWithinIndex#indexExample1 test
+
+@RunWith(JUnit4.class)
+public class TestWithinIndex {
+
+ // Todo: primary data as a non-indexed field separated.
+
+ @Test
+ public void indexExample1a () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // <a>x<a>y<a>zhij</a>hij</a>hij</a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z h i j h i j h i j ",
+ "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
+ "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
+ "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
+ "[(9-12)s:h]" + // 4
+ "[(12-15)s:i]" + // 5
+ "[(15-18)s:j]" + // 6
+ "[(18-21)s:h]" + // 7
+ "[(21-24)s:i]" + // 8
+ "[(24-27)s:j]" + // 9
+ "[(27-30)s:h]" + // 10
+ "[(30-33)s:i]" + // 11
+ "[(33-36)s:j]"); // 12
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanTermQuery(new Term("base", "s:h"))
+ );
+
+ kr = ki.search(sq, (short) 10);
+
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 6, kr.totalResults());
+
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 12, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 12, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 12, kr.match(2).endPos);
+ assertEquals("StartPos (3)", 1, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 9, kr.match(3).endPos);
+ assertEquals("StartPos (4)", 1, kr.match(4).startPos);
+ assertEquals("EndPos (4)", 9, kr.match(4).endPos);
+ assertEquals("StartPos (5)", 2, kr.match(5).startPos);
+ assertEquals("EndPos (5)", 6, kr.match(5).endPos);
+
+ assertEquals(1, ki.numberOf("documents"));
+ };
+
+ @Test
+ public void indexExample1b () throws IOException {
+ // Cases 9, 12, 13
+ KorapIndex ki = new KorapIndex();
+
+ // <a>x<a>y<a>zhij</a>hij</a>hij</a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z h i j h i j h i j ",
+ "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
+ "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
+ "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
+ "[(9-12)s:h]" + // 4
+ "[(12-15)s:i]" + // 5
+ "[(15-18)s:j]" + // 6
+ "[(18-21)s:h]" + // 7
+ "[(21-24)s:i]" + // 8
+ "[(24-27)s:j]" + // 9
+ "[(27-30)s:h]" + // 10
+ "[(30-33)s:i]" + // 11
+ "[(33-36)s:j]"); // 12
+ ki.addDoc(fd);
+
+ // <a>x<a>y<a>zhij</a>hij</a>hij</a>
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z h i j h i j h i j ",
+ "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
+ "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
+ "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
+ "[(9-12)s:h]" + // 4
+ "[(12-15)s:i]" + // 5
+ "[(15-18)s:j]" + // 6
+ "[(18-21)s:h]" + // 7
+ "[(21-24)s:i]" + // 8
+ "[(24-27)s:j]" + // 9
+ "[(27-30)s:h]" + // 10
+ "[(30-33)s:i]" + // 11
+ "[(33-36)s:j]"); // 12
+ ki.addDoc(fd);
+
+
+
+ // Save documents
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanTermQuery(new Term("base", "s:h"))
+ );
+
+ kr = ki.search(sq, (short) 15);
+
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 12, kr.totalResults());
+
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 12, kr.match(0).endPos);
+ assertEquals("Doc (0)", 0, kr.match(0).internalDocID);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 12, kr.match(1).endPos);
+ assertEquals("Doc (1)", 0, kr.match(1).internalDocID);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 12, kr.match(2).endPos);
+ assertEquals("Doc (2)", 0, kr.match(2).internalDocID);
+ assertEquals("StartPos (3)", 1, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 9, kr.match(3).endPos);
+ assertEquals("Doc (3)", 0, kr.match(3).internalDocID);
+ assertEquals("StartPos (4)", 1, kr.match(4).startPos);
+ assertEquals("EndPos (4)", 9, kr.match(4).endPos);
+ assertEquals("Doc (4)", 0, kr.match(4).internalDocID);
+ assertEquals("StartPos (5)", 2, kr.match(5).startPos);
+ assertEquals("EndPos (5)", 6, kr.match(5).endPos);
+ assertEquals("Doc (5)", 0, kr.match(5).internalDocID);
+
+ assertEquals("StartPos (6)", 0, kr.match(6).startPos);
+ assertEquals("EndPos (6)", 12, kr.match(6).endPos);
+ assertEquals("Doc (6)", 1, kr.match(6).internalDocID);
+ assertEquals("StartPos (7)", 0, kr.match(7).startPos);
+ assertEquals("EndPos (7)", 12, kr.match(7).endPos);
+ assertEquals("Doc (7)", 1, kr.match(7).internalDocID);
+ assertEquals("StartPos (8)", 0, kr.match(8).startPos);
+ assertEquals("EndPos (8)", 12, kr.match(8).endPos);
+ assertEquals("Doc (8)", 1, kr.match(8).internalDocID);
+ assertEquals("StartPos (9)", 1, kr.match(9).startPos);
+ assertEquals("EndPos (9)", 9, kr.match(9).endPos);
+ assertEquals("Doc (9)", 1, kr.match(9).internalDocID);
+ assertEquals("StartPos (10)", 1, kr.match(10).startPos);
+ assertEquals("EndPos (10)", 9, kr.match(10).endPos);
+ assertEquals("Doc (10)", 1, kr.match(10).internalDocID);
+ assertEquals("StartPos (11)", 2, kr.match(11).startPos);
+ assertEquals("EndPos (11)", 6, kr.match(11).endPos);
+ assertEquals("Doc (11)", 1, kr.match(11).internalDocID);
+
+ assertEquals(2, ki.numberOf("documents"));
+ };
+
+
+ @Test
+ public void indexExample1c () throws IOException {
+ // Cases 9, 12, 13
+ KorapIndex ki = new KorapIndex();
+
+ // <a>x<a>y<a>zhij</a>hij</a>hij</a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z h i j h i j h i j ",
+ "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
+ "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
+ "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
+ "[(9-12)s:h]" + // 4
+ "[(12-15)s:i]" + // 5
+ "[(15-18)s:j]" + // 6
+ "[(18-21)s:h]" + // 7
+ "[(21-24)s:i]" + // 8
+ "[(24-27)s:j]" + // 9
+ "[(27-30)s:h]" + // 10
+ "[(30-33)s:i]" + // 11
+ "[(33-36)s:j]"); // 12
+ ki.addDoc(fd);
+
+ // <a>x<a>y<a>zabc</a>abc</a>abc</a>
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z a b c a b c a b c ",
+ "[(0-3)s:x|<>:a#0-36$<i>12]" + // 1
+ "[(3-6)s:y|<>:a#3-27$<i>9]" + // 2
+ "[(6-9)s:z|<>:a#6-18$<i>6]" + // 3
+ "[(9-12)s:a]" + // 4
+ "[(12-15)s:b]" + // 5
+ "[(15-18)s:c]" + // 6
+ "[(18-21)s:a]" + // 7
+ "[(21-24)s:b]" + // 8
+ "[(24-27)s:c]" + // 9
+ "[(27-30)s:a]" + // 10
+ "[(30-33)s:b]" + // 11
+ "[(33-36)s:c]"); // 12
+ ki.addDoc(fd);
+
+
+ // Save documents
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanTermQuery(new Term("base", "s:h"))
+ );
+
+ kr = ki.search(sq, (short) 15);
+
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 6, kr.totalResults());
+
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 12, kr.match(0).endPos);
+ assertEquals("Doc (0)", 0, kr.match(0).internalDocID);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 12, kr.match(1).endPos);
+ assertEquals("Doc (1)", 0, kr.match(1).internalDocID);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 12, kr.match(2).endPos);
+ assertEquals("Doc (2)", 0, kr.match(2).internalDocID);
+ assertEquals("StartPos (3)", 1, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 9, kr.match(3).endPos);
+ assertEquals("Doc (3)", 0, kr.match(3).internalDocID);
+ assertEquals("StartPos (4)", 1, kr.match(4).startPos);
+ assertEquals("EndPos (4)", 9, kr.match(4).endPos);
+ assertEquals("Doc (4)", 0, kr.match(4).internalDocID);
+ assertEquals("StartPos (5)", 2, kr.match(5).startPos);
+ assertEquals("EndPos (5)", 6, kr.match(5).endPos);
+ assertEquals("Doc (5)", 0, kr.match(5).internalDocID);
+
+ assertEquals(2, ki.numberOf("documents"));
+ };
+
+
+ @Test
+ public void indexExample2a () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // <a><a><a>h</a>hij</a>hij</a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "h i j h i j h i j ",
+ "[(0-3)s:h|<>:a#0-27$<i>7|<>:a#0-18$<i>4|<>:a#0-36$<i>10]" + // 1
+ "[(3-6)s:h]" + // 2
+ "[(12-15)s:i]" + // 3
+ "[(15-18)s:j]" + // 4
+ "[(18-21)s:h]" + // 5
+ "[(21-24)s:i]" + // 6
+ "[(24-27)s:j]" + // 7
+ "[(27-30)s:h]" + // 8
+ "[(30-33)s:i]" + // 9
+ "[(33-36)s:j]"); // 10
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 3, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 4, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 7, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 10, kr.match(2).endPos);
+
+ sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanTermQuery(new Term("base", "s:h"))
+ );
+
+ kr = ki.search(sq, (short) 10);
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 9, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 4, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 4, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 7, kr.match(2).endPos);
+ assertEquals("StartPos (3)", 0, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 7, kr.match(3).endPos);
+ assertEquals("StartPos (4)", 0, kr.match(4).startPos);
+ assertEquals("EndPos (4)", 7, kr.match(4).endPos);
+ assertEquals("StartPos (5)", 0, kr.match(5).startPos);
+ assertEquals("EndPos (5)", 10, kr.match(5).endPos);
+ assertEquals("StartPos (6)", 0, kr.match(6).startPos);
+ assertEquals("EndPos (6)", 10, kr.match(6).endPos);
+ assertEquals("StartPos (7)", 0, kr.match(7).startPos);
+ assertEquals("EndPos (7)", 10, kr.match(7).endPos);
+ assertEquals("StartPos (8)", 0, kr.match(8).startPos);
+ assertEquals("EndPos (8)", 10, kr.match(8).endPos);
+ };
+
+ @Test
+ public void indexExample2b () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // 6,9,12
+ // <a><a><a>h</a>hij</a>hij</a>h
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "h i j h i j h i j h ",
+ "[(0-3)s:h|<>:a#0-27$<i>7|<>:a#0-18$<i>4|<>:a#0-36$<i>10]" + // 1
+ "[(3-6)s:h]" + // 2
+ "[(12-15)s:i]" + // 3
+ "[(15-18)s:j]" + // 4
+ "[(18-21)s:h]" + // 5
+ "[(21-24)s:i]" + // 6
+ "[(24-27)s:j]" + // 7
+ "[(27-30)s:h]" + // 8
+ "[(30-33)s:i]" + // 9
+ "[(33-36)s:j]" + // 10
+ "[(37-40)s:h]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 3, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 4, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 7, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 10, kr.match(2).endPos);
+
+ sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanTermQuery(new Term("base", "s:h"))
+ );
+
+ kr = ki.search(sq, (short) 10);
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 9, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 4, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 4, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 7, kr.match(2).endPos);
+ assertEquals("StartPos (3)", 0, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 7, kr.match(3).endPos);
+ assertEquals("StartPos (4)", 0, kr.match(4).startPos);
+ assertEquals("EndPos (4)", 7, kr.match(4).endPos);
+ assertEquals("StartPos (5)", 0, kr.match(5).startPos);
+ assertEquals("EndPos (5)", 10, kr.match(5).endPos);
+ assertEquals("StartPos (6)", 0, kr.match(6).startPos);
+ assertEquals("EndPos (6)", 10, kr.match(6).endPos);
+ assertEquals("StartPos (7)", 0, kr.match(7).startPos);
+ assertEquals("EndPos (7)", 10, kr.match(7).endPos);
+ assertEquals("StartPos (8)", 0, kr.match(8).startPos);
+ assertEquals("EndPos (8)", 10, kr.match(8).endPos);
+ };
+
+
+ @Test
+ public void indexExample2c () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // 2, 6, 9, 12
+ // <a><a><a>h</a>hij</a>hij</a>h<a>i</i>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "h i j h i j h i j h i ",
+ "[(0-3)s:h|<>:a#0-27$<i>7|<>:a#0-18$<i>4|<>:a#0-36$<i>10]" + // 1
+ "[(3-6)s:h]" + // 2
+ "[(12-15)s:i]" + // 3
+ "[(15-18)s:j]" + // 4
+ "[(18-21)s:h]" + // 5
+ "[(21-24)s:i]" + // 6
+ "[(24-27)s:j]" + // 7
+ "[(27-30)s:h]" + // 8
+ "[(30-33)s:i]" + // 9
+ "[(33-36)s:j]" + // 10
+ "[(37-40)s:h]" + // 11
+ "[(40-43)s:i|<>:a#40-43$<i>12]"); // 12
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 4, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 4, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 7, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 10, kr.match(2).endPos);
+ assertEquals("StartPos (3)", 11, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 12, kr.match(3).endPos);
+
+ sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanTermQuery(new Term("base", "s:h"))
+ );
+
+ kr = ki.search(sq, (short) 10);
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 9, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 4, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 4, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 7, kr.match(2).endPos);
+ assertEquals("StartPos (3)", 0, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 7, kr.match(3).endPos);
+ assertEquals("StartPos (4)", 0, kr.match(4).startPos);
+ assertEquals("EndPos (4)", 7, kr.match(4).endPos);
+ assertEquals("StartPos (5)", 0, kr.match(5).startPos);
+ assertEquals("EndPos (5)", 10, kr.match(5).endPos);
+ assertEquals("StartPos (6)", 0, kr.match(6).startPos);
+ assertEquals("EndPos (6)", 10, kr.match(6).endPos);
+ assertEquals("StartPos (7)", 0, kr.match(7).startPos);
+ assertEquals("EndPos (7)", 10, kr.match(7).endPos);
+ assertEquals("StartPos (8)", 0, kr.match(8).startPos);
+ assertEquals("EndPos (8)", 10, kr.match(8).endPos);
+ };
+
+
+ @Test
+ public void indexExample2d () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // 2, 6, 9, 12, 7
+ // <a><a><a>h</a>hij</a>hij</a>h<a>h</h>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "h i j h i j h i j h i ",
+ "[(0-3)s:h|<>:a#0-27$<i>7|<>:a#0-18$<i>4|<>:a#0-36$<i>10]" + // 1
+ "[(3-6)s:h]" + // 2
+ "[(12-15)s:i]" + // 3
+ "[(15-18)s:j]" + // 4
+ "[(18-21)s:h]" + // 5
+ "[(21-24)s:i]" + // 6
+ "[(24-27)s:j]" + // 7
+ "[(27-30)s:h]" + // 8
+ "[(30-33)s:i]" + // 9
+ "[(33-36)s:j]" + // 10
+ "[(37-40)s:h]" + // 11
+ "[(40-43)s:h|<>:a#40-43$<i>12]"); // 12
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 4, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 4, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 7, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 10, kr.match(2).endPos);
+ assertEquals("StartPos (3)", 11, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 12, kr.match(3).endPos);
+
+ sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanTermQuery(new Term("base", "s:h"))
+ );
+
+ kr = ki.search(sq, (short) 15);
+ // System.err.println(kr.toJSON());
+
+ assertEquals("totalResults", 10, kr.totalResults());
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 4, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 4, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 7, kr.match(2).endPos);
+ assertEquals("StartPos (3)", 0, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 7, kr.match(3).endPos);
+ assertEquals("StartPos (4)", 0, kr.match(4).startPos);
+ assertEquals("EndPos (4)", 7, kr.match(4).endPos);
+ assertEquals("StartPos (5)", 0, kr.match(5).startPos);
+ assertEquals("EndPos (5)", 10, kr.match(5).endPos);
+ assertEquals("StartPos (6)", 0, kr.match(6).startPos);
+ assertEquals("EndPos (6)", 10, kr.match(6).endPos);
+ assertEquals("StartPos (7)", 0, kr.match(7).startPos);
+ assertEquals("EndPos (7)", 10, kr.match(7).endPos);
+ assertEquals("StartPos (8)", 0, kr.match(8).startPos);
+ assertEquals("EndPos (8)", 10, kr.match(8).endPos);
+ assertEquals("StartPos (9)", 11, kr.match(9).startPos);
+ assertEquals("EndPos (9)", 12, kr.match(9).endPos);
+ };
+
+
+ @Test
+ public void indexExample3 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // <a><a><a>u</a></a></a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "xyz",
+ "[(0-3)s:xyz|<>:a#0-3$<i>0|<>:a#0-3$<i>0|<>:a#0-3$<i>0|<>:b#0-3$<i>0]");
+ ki.addDoc(fd);
+
+ // <a><b>x<a>y<a>zcde</a>cde</a>cde</b></a>
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z c d e c d e c d e ",
+ "[(0-3)s:x|<>:a#0-36$<i>12|<>:b#0-36$<i>12]" +
+ "[(3-6)s:y|<>:a#3-27$<i>9]" +
+ "[(6-9)s:z|<>:a#6-18$<i>6]" +
+ "[(9-12)s:c]" +
+ "[(12-15)s:d]" +
+ "[(15-18)s:e]" +
+ "[(18-21)s:c]" +
+ "[(21-24)s:d]" +
+ "[(24-27)s:e]" +
+ "[(27-30)s:c]" +
+ "[(30-33)s:d]" +
+ "[(33-36)s:e]");
+ ki.addDoc(fd);
+
+ // xyz
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z ",
+ "[(0-3)s:x]" +
+ "[(3-6)s:y]" +
+ "[(6-9)s:z]");
+ ki.addDoc(fd);
+
+ // <a>x<a><b>y<a>zcde</a>cde</b></a>cde</a>
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "x y z k l m k l m k l m ",
+ "[(0-3)s:x|<>:a#0-3$<i>12]" +
+ "[(3-6)s:y|<>:a#3-6$<i>9|<>:b#3-6$<i>9]" +
+ "[(6-9)s:z|<>:a#6-9$<i>6]" +
+ "[(9-12)s:k]" +
+ "[(12-15)s:l]" +
+ "[(15-18)s:m]" +
+ "[(18-21)s:k]" +
+ "[(21-24)s:l]" +
+ "[(24-27)s:m]" +
+ "[(27-30)s:k]" +
+ "[(30-33)s:l]" +
+ "[(33-36)s:m]");
+ ki.addDoc(fd);
+
+ // <a><a><a>h</a>hhij</a>hij</a>hij</a>
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "h i j h i j h i j ",
+ "[(0-3)s:h|<>:a#0-27$<i>6|<>:a#0-18$<i>3|<>:a#0-36$<i>9]" +
+ "[(3-6)s:h]" +
+ "[(12-15)s:i]" +
+ "[(15-18)s:j]" +
+ "[(18-21)s:h]" +
+ "[(21-24)s:i]" +
+ "[(24-27)s:j]" +
+ "[(27-30)s:h]" +
+ "[(30-33)s:i]" +
+ "[(33-36)s:j]");
+ ki.addDoc(fd);
+
+ // xyz
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "a b c ",
+ "[(0-3)s:a]" +
+ "[(3-6)s:b]" +
+ "[(6-9)s:c]");
+ ki.addDoc(fd);
+
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(6, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanElementQuery("base", "a");
+
+ KorapResult kr = ki.search(sq, (short) 15);
+
+ // System.err.println(ki.search(sq, (short) 10).toJSON());
+ assertEquals("totalResults", 12, kr.totalResults());
+
+ assertEquals("StartPos (0)", 0, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 0, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 0, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 0, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 0, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 0, kr.match(2).endPos);
+
+ assertEquals("StartPos (3)", 0, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 12, kr.match(3).endPos);
+ assertEquals("StartPos (4)", 1, kr.match(4).startPos);
+ assertEquals("EndPos (4)", 9, kr.match(4).endPos);
+ assertEquals("StartPos (5)", 2, kr.match(5).startPos);
+ assertEquals("EndPos (5)", 6, kr.match(5).endPos);
+
+ assertEquals("StartPos (6)", 0, kr.match(6).startPos);
+ assertEquals("EndPos (6)", 12, kr.match(6).endPos);
+ assertEquals("StartPos (7)", 1, kr.match(7).startPos);
+ assertEquals("EndPos (7)", 9, kr.match(7).endPos);
+ assertEquals("StartPos (8)", 2, kr.match(8).startPos);
+ assertEquals("EndPos (8)", 6, kr.match(8).endPos);
+
+ assertEquals("StartPos (9)", 0, kr.match(9).startPos);
+ assertEquals("EndPos (9)", 3, kr.match(9).endPos);
+ assertEquals("StartPos (10)", 0, kr.match(10).startPos);
+ assertEquals("EndPos (10)", 6, kr.match(10).endPos);
+ assertEquals("StartPos (11)", 0, kr.match(11).startPos);
+ assertEquals("EndPos (11)", 9, kr.match(11).endPos);
+ };
+
+ @Test
+ public void indexExample3Offsets () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // Er schrie: <s>"Das war ich!"</s>
+ FieldDocument fd = new FieldDocument();
+ fd = new FieldDocument();
+ fd.addTV("base",
+ "Er schrie: \"Das war ich!\" und ging.",
+ "[(0-2)s:Er|_0#0-3]" +
+ "[(3-9)s:schrie|_1#3-9]" +
+ "[(12-15)s:Das|_2#12-15|<>:sentence#11-25$<i>5]" +
+ "[(16-19)s:war|_3#16-19]" +
+ "[(20-23)s:ich|_4#20-23]" +
+ "[(26-29)s:und|_5#26-29]" +
+ "[(30-34)s:ging|_6#30-34]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ SpanQuery sq = new SpanClassQuery(new SpanElementQuery("base", "sentence"), (byte)3);
+ KorapResult kr;
+ kr = ki.search(sq, 0, (short) 15, true, (short) 1, true, (short) 1);
+ assertEquals("totalResults", 1, kr.totalResults());
+
+ assertEquals("... schrie: [\"{3:Das war ich}!\"] und ...",kr.match(0).snippetBrackets());
+ assertEquals("<span class=\"korap-more-left\"></span>schrie: <span class=\"korap-match\">"<span class=\"korap-highlight korap-class-3\">Das war ich</span>!"</span> und<span class=\"korap-more-right\"></span>",kr.match(0).snippetHTML());
+
+
+ kr = ki.search(sq, 0, (short) 15, true, (short) 0, true, (short) 0);
+ assertEquals("... [\"{3:Das war ich}!\"] ...",kr.match(0).snippetBrackets());
+ assertEquals("totalResults", 1, kr.totalResults());
+
+
+ kr = ki.search(sq, 0, (short) 15, true, (short) 6, true, (short) 6);
+ assertEquals("Er schrie: [\"{3:Das war ich}!\"] und ging.",kr.match(0).snippetBrackets());
+ assertEquals("totalResults", 1, kr.totalResults());
+
+ kr = ki.search(sq, 0, (short) 15, true, (short) 2, true, (short) 2);
+ assertEquals("Er schrie: [\"{3:Das war ich}!\"] und ging ...",kr.match(0).snippetBrackets());
+ assertEquals("totalResults", 1, kr.totalResults());
+
+
+ sq = new SpanClassQuery(
+ new SpanWithinQuery(
+ new SpanElementQuery("base", "sentence"),
+ new SpanClassQuery(
+ new SpanTermQuery(new Term("base", "s:Das")), (byte) 2
+ )
+ ), (byte) 1);
+
+ kr = ki.search(sq, (short) 15);
+ assertEquals("Er schrie: [\"{1:{2:Das} war ich}!\"] und ging.",kr.match(0).snippetBrackets());
+ assertEquals("totalResults", 1, kr.totalResults());
+
+ sq = new SpanClassQuery(
+ new SpanWithinQuery(
+ new SpanElementQuery("base", "sentence"),
+ new SpanClassQuery(
+ new SpanTermQuery(new Term("base", "s:war")), (byte) 2
+ )
+ ), (byte) 1);
+
+ kr = ki.search(sq, (short) 15);
+ assertEquals("Er schrie: [\"{1:Das {2:war} ich}!\"] und ging.",kr.match(0).snippetBrackets());
+ assertEquals("totalResults", 1, kr.totalResults());
+
+ sq = new SpanClassQuery(
+ new SpanWithinQuery(
+ new SpanElementQuery("base", "sentence"),
+ new SpanClassQuery(
+ new SpanTermQuery(new Term("base", "s:ich")), (byte) 2
+ )
+ ), (byte) 1);
+
+ kr = ki.search(sq, (short) 15);
+ assertEquals("Er schrie: [\"{1:Das war {2:ich}}!\"] und ging.",kr.match(0).snippetBrackets());
+ assertEquals("totalResults", 1, kr.totalResults());
+
+ sq = new SpanClassQuery(
+ new SpanWithinQuery(
+ new SpanElementQuery("base", "sentence"),
+ new SpanClassQuery(
+ new SpanTermQuery(new Term("base", "s:und")), (byte) 2
+ )
+ ), (byte) 1);
+
+ kr = ki.search(sq, (short) 15);
+ assertEquals("totalResults", 0, kr.totalResults());
+
+ sq = new SpanClassQuery(
+ new SpanWithinQuery(
+ new SpanElementQuery("base", "sentence"),
+ new SpanClassQuery(
+ new SpanTermQuery(new Term("base", "s:schrie")), (byte) 2
+ )
+ ), (byte) 1);
+
+ kr = ki.search(sq, (short) 15);
+ assertEquals("totalResults", 0, kr.totalResults());
+
+ };
+
+
+ //!! Offset is 1 token tooo long
+
+ @Test
+ public void indexExample4 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // Case 1, 6, 7, 13
+ // xy<a><a>x</a>b<a>c</a></a>x
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "x y x b c x ",
+ "[(0-3)s:x]" +
+ "[(3-6)s:y]" +
+ "[(6-9)s:x|<>:a#6-15$<i>5|<>:a#6-9$<i>3]" +
+ "[(9-12)s:b]" +
+ "[(12-15)s:c|<>:a#12-15$<i>5]" +
+ "[(15-18)s:x]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanTermQuery(new Term("base", "s:x"))
+ );
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 2, kr.totalResults());
+ assertEquals("StartPos (0)", 2, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 3, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 2, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 5, kr.match(1).endPos);
+ };
+
+
+ @Test
+ public void indexExample5 () throws IOException {
+ // 1,2,3,6,9,10,12
+ KorapIndex ki = new KorapIndex();
+
+ // hij<a>hi<a>h<a>ij</a></a>hi</a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "hijhihijhi",
+ "[(0-1)s:h|i:h|_0#0-1|-:a$<i>3|-:t$<i>10]" +
+ "[(1-2)s:i|i:i|_1#1-2]" +
+ "[(2-3)s:j|i:j|_2#2-3]" +
+ "[(3-4)s:h|i:h|_3#3-4|<>:a#3-10$<i>10]" +
+ "[(4-5)s:i|i:i|_4#4-5]" +
+ "[(5-6)s:h|i:h|_5#5-6|<>:a#5-8$<i>8]" +
+ "[(6-7)s:i|i:i|_6#6-7|<>:a#6-8$<i>8]" +
+ "[(7-8)s:j|i:j|_7#7-8]" +
+ "[(8-9)s:h|i:h|_8#8-9]" +
+ "[(9-10)s:i|i:i|_9#9-10]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:h")),
+ new SpanTermQuery(new Term("base", "s:i"))
+ )
+ );
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 4, kr.totalResults());
+
+ assertEquals("StartPos (0)", 3, kr.match(0).startPos);
+ assertEquals("EndPos (0)", 10, kr.match(0).endPos);
+ assertEquals("StartPos (1)", 3, kr.match(1).startPos);
+ assertEquals("EndPos (1)", 10, kr.match(1).endPos);
+ assertEquals("StartPos (2)", 3, kr.match(2).startPos);
+ assertEquals("EndPos (2)", 10, kr.match(2).endPos);
+ assertEquals("StartPos (3)", 5, kr.match(3).startPos);
+ assertEquals("EndPos (3)", 8, kr.match(3).endPos);
+ };
+
+ @Test
+ public void indexExample6 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+ // 2,5,8,12,13
+ // h<a><a>i</a>j</a><a>h</a>i j<a>h i</a>j
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "hijhi jh ij",
+ "[(0-1)s:h|i:h|_0#0-1|-:a$<i>4|-:t$<i>9]" +
+ "[(1-2)s:i|i:i|_1#1-2|<>:a#1-2$<i>2|<>:a#1-3$<i>3]" +
+ "[(2-3)s:j|i:j|_2#2-3]" +
+ "[(3-4)s:h|i:h|_3#3-4|<>:a#3-4$<i>4]" +
+ "[(4-5)s:i|i:i|_4#4-5]" +
+ "[(6-7)s:j|i:j|_5#6-7]" +
+ "[(7-8)s:h|i:h|_6#7-8|<>:a#7-10$<i>8]" +
+ "[(9-10)s:i|i:i|_7#9-10]" +
+ "[(10-11)s:j|i:j|_8#10-11]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:h")),
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:i")),
+ new SpanTermQuery(new Term("base", "s:j"))
+ )
+ )
+ );
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 0, kr.totalResults());
+ };
+
+
+ @Test
+ public void indexExample7 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+ // 4,5,11,13
+ // x<a>x h</a>i j h<a>i j</a>
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "xx hi j hi j",
+ "[(0-1)s:x|i:x|_0#0-1|-:a$<i>2|-:t$<i>8]" +
+ "[(1-2)s:x|i:x|_1#1-2|<>:a#1-4$<i>3]" +
+ "[(3-4)s:h|i:h|_2#3-4]" +
+ "[(4-5)s:i|i:i|_3#4-5]" +
+ "[(6-7)s:j|i:j|_4#6-7]" +
+ "[(8-9)s:h|i:h|_5#8-9]" +
+ "[(9-10)s:i|i:i|_6#9-10|<>:a#9-12$<i>8]" +
+ "[(11-12)s:j|i:j|_7#11-12]");
+ ki.addDoc(fd);
+
+ // Save documents
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("documents"));
+
+ SpanQuery sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "a"),
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:h")),
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:i")),
+ new SpanTermQuery(new Term("base", "s:j"))
+ )
+ )
+ );
+
+ KorapResult kr = ki.search(sq, (short) 10);
+
+ assertEquals("totalResults", 0, kr.totalResults());
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/query/TestKorapQuery.java b/trunk/src/test/java/de/ids_mannheim/korap/query/TestKorapQuery.java
new file mode 100644
index 0000000..d1dc5b3
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/query/TestKorapQuery.java
@@ -0,0 +1,160 @@
+import java.util.*;
+import org.apache.lucene.search.spans.SpanQuery;
+import de.ids_mannheim.korap.KorapQuery;
+
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestKorapQuery {
+
+ @Test
+ public void korapQuerySegment () {
+ SpanQuery sq = new KorapQuery("field1").seg("a").with("b").toQuery();
+ assertEquals("spanNear([field1:a, field1:b], -1, false)", sq.toString());
+
+ sq = new KorapQuery("field2").seg("a", "b").with("c").toQuery();
+ assertEquals("spanNear([spanNear([field2:a, field2:b], -1, false), field2:c], -1, false)", sq.toString());
+ };
+
+ @Test
+ public void korapQueryRegexSegment () {
+ KorapQuery kq = new KorapQuery("field1");
+ SpanQuery sq = kq.seg("a").with(kq.re("b.*c")).toQuery();
+ assertEquals("spanNear([field1:a, SpanMultiTermQueryWrapper(field1:/b.*c/)], -1, false)", sq.toString());
+
+ kq = new KorapQuery("field2");
+ sq = kq.seg(kq.re("a.*")).with("b").toQuery();
+ assertEquals("spanNear([SpanMultiTermQueryWrapper(field2:/a.*/), field2:b], -1, false)", sq.toString());
+ };
+
+ @Test
+ public void korapQueryRegexSegment2 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.seg("a").with(kq.or("b").or("c")).toQuery();
+ assertEquals("spanNear([field:a, spanOr([field:b, field:c])], -1, false)", sq.toString());
+
+ kq = new KorapQuery("field");
+ sq = kq.seg("a").with(kq.or("b", "c")).toQuery();
+ assertEquals("spanNear([field:a, spanOr([field:b, field:c])], -1, false)", sq.toString());
+
+
+ kq = new KorapQuery("field");
+ // [ a & (b | /c.*d/) ]
+ sq = kq.seg("a").with(kq.or("b").or(kq.re("c.*d"))).toQuery();
+ assertEquals("spanNear([field:a, spanOr([field:b, SpanMultiTermQueryWrapper(field:/c.*d/)])], -1, false)", sq.toString());
+ };
+
+ @Test
+ public void korapQuerySequenceSegment () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.seq(kq.seg("a").with(kq.or("b", "c"))).append("d").append(kq.re("e.?f")).toQuery();
+ assertEquals("spanNext(spanNext(spanNear([field:a, spanOr([field:b, field:c])], -1, false), field:d), SpanMultiTermQueryWrapper(field:/e.?f/))", sq.toString());
+ };
+
+ @Test
+ public void KorapTagQuery () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.tag("np").toQuery();
+ assertEquals("<field:np />", sq.toString());
+ };
+
+ @Test
+ public void KorapTagQuery2 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.or(kq.tag("np"), kq.tag("vp")).toQuery();
+ assertEquals("spanOr([<field:np />, <field:vp />])", sq.toString());
+ };
+
+ @Test
+ public void KorapTagQuery3 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.seq(kq.tag("np"), kq.tag("vp")).toQuery();
+ assertEquals("spanNext(<field:np />, <field:vp />)", sq.toString());
+ };
+
+ @Test
+ public void KorapTagQuery4 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.seq(kq.tag("np"), kq.tag("vp")).append("test").toQuery();
+ assertEquals("spanNext(spanNext(<field:np />, <field:vp />), field:test)", sq.toString());
+ };
+
+ @Test
+ public void KorapTagQuery5 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.within(kq.tag("s"), kq.tag("np")).toQuery();
+ assertEquals("spanWithin(<field:s />, <field:np />)", sq.toString());
+ };
+
+ @Test
+ public void KorapTagQuery6 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.seq(kq.seg("tree"), kq.within(kq.tag("s"), kq.tag("np")), kq.re("hey.*")).toQuery();
+ assertEquals("spanNext(spanNext(field:tree, spanWithin(<field:s />, <field:np />)), SpanMultiTermQueryWrapper(field:/hey.*/))", sq.toString());
+ };
+
+
+ @Test
+ public void KorapClassQuery () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.seq(kq.seg("tree"), kq._(1, kq.within(kq.tag("s"), kq.tag("np"))), kq.re("hey.*")).toQuery();
+ assertEquals("spanNext(spanNext(field:tree, {1: spanWithin(<field:s />, <field:np />)}), SpanMultiTermQueryWrapper(field:/hey.*/))", sq.toString());
+ };
+
+ @Test
+ public void KorapClassQuery2 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq._(kq.seg("base:test")).toQuery();
+ assertEquals("{0: field:base:test}", sq.toString());
+ };
+
+ @Test
+ public void KorapClassQuery3 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.seq(kq.seg("tree"), kq.within(kq.tag("s"), kq._(kq.tag("np"))), kq.re("hey.*")).toQuery();
+ assertEquals("spanNext(spanNext(field:tree, spanWithin(<field:s />, {0: <field:np />})), SpanMultiTermQueryWrapper(field:/hey.*/))", sq.toString());
+ };
+
+ @Test
+ public void KorapShrinkQuery () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.shrink(kq.tag("np")).toQuery();
+ assertEquals("shrink(0: <field:np />)", sq.toString());
+ };
+
+ @Test
+ public void KorapShrinkQuery1 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.shrink(1, kq.tag("np")).toQuery();
+ assertEquals("shrink(1: <field:np />)", sq.toString());
+ };
+
+ @Test
+ public void KorapShrinkQuery2 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.shrink(1, kq._(1, kq.tag("np"))).toQuery();
+ assertEquals("shrink(1: {1: <field:np />})", sq.toString());
+ };
+
+ @Test
+ public void KorapShrinkQuery3 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.shrink(1, kq._(1, kq.seq(kq.tag("np"), kq._(kq.seg("test").without("no"))))).toQuery();
+ assertEquals("shrink(1: {1: spanNext(<field:np />, {0: spanNot(field:test, field:no)})})", sq.toString());
+ };
+
+ @Test
+ public void KorapShrinkQuery4 () {
+ KorapQuery kq = new KorapQuery("field");
+ SpanQuery sq = kq.seq(kq.seg("try1"), kq.shrink(1, kq._(1, kq.seg("try2"))), kq.seg("try3")).toQuery();
+ assertEquals("spanNext(spanNext(field:try1, shrink(1: {1: field:try2})), field:try3)", sq.toString());
+ };
+
+ // kq.seg("a").append(kq.ANY).append("b:c");
+ // kq.repeat(kq.seg("a", "b"), 5)
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanAlterQuery.java b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanAlterQuery.java
new file mode 100644
index 0000000..28ef2b4
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanAlterQuery.java
@@ -0,0 +1,54 @@
+import java.util.*;
+import de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestSpanAlterQuery {
+ @Test
+ public void spanAlterQuery () {
+
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("b");
+ assertEquals("field:b", ssaquery.toQuery().toString());
+ };
+
+ @Test
+ public void spanAlterQuery2 () {
+
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("b").or("c");
+ assertEquals("spanOr([field:b, field:c])", ssaquery.toQuery().toString());
+ };
+
+ @Test
+ public void spanAlterQuery3 () {
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("b").or("c").or("d");
+ assertEquals("spanOr([field:b, field:c, field:d])", ssaquery.toQuery().toString());
+ };
+
+
+ @Test
+ public void spanAlterQuery4 () {
+ SpanSegmentQueryWrapper segquery = new SpanSegmentQueryWrapper("field", "a", "b", "c");
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("d").or(segquery).or("e");
+ assertEquals("spanOr([field:d, spanNear([spanNear([field:a, field:b], -1, false), field:c], -1, false), field:e])", ssaquery.toQuery().toString());
+ };
+
+ @Test
+ public void spanAlterQuery5 () {
+ SpanRegexQueryWrapper srequery = new SpanRegexQueryWrapper("field", "a[bc]d.?e");
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("f").or(srequery).or("g");
+ assertEquals("spanOr([field:f, SpanMultiTermQueryWrapper(field:/a[bc]d.?e/), field:g])", ssaquery.toQuery().toString());
+ };
+
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanElementQuery.java b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanElementQuery.java
new file mode 100644
index 0000000..e72fa36
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanElementQuery.java
@@ -0,0 +1,26 @@
+import java.util.*;
+import org.apache.lucene.index.Term;
+
+import de.ids_mannheim.korap.query.SpanElementQuery;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestSpanElementQuery {
+
+ @Test
+ public void spanElementQuery () {
+ SpanElementQuery sequery = new SpanElementQuery("field", "b");
+ assertEquals("<field:b />", sequery.toString());
+ };
+
+ @Test
+ public void spanElement2Query () {
+ SpanElementQuery sequery = new SpanElementQuery("field", "xyz");
+ assertEquals("<field:xyz />", sequery.toString());
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanSegmentAlterQuery.java b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanSegmentAlterQuery.java
new file mode 100644
index 0000000..f780072
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanSegmentAlterQuery.java
@@ -0,0 +1,54 @@
+import java.util.*;
+import de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestSpanSegmentAlterQuery {
+ @Test
+ public void spanAlterQuery () {
+
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("b");
+ assertEquals("field:b", ssaquery.toQuery().toString());
+ };
+
+ @Test
+ public void spanAlterQuery2 () {
+
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("b").or("c");
+ assertEquals("spanOr([field:b, field:c])", ssaquery.toQuery().toString());
+ };
+
+ @Test
+ public void spanAlterQuery3 () {
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("b").or("c").or("d");
+ assertEquals("spanOr([field:b, field:c, field:d])", ssaquery.toQuery().toString());
+ };
+
+
+ @Test
+ public void spanAlterQuery4 () {
+ SpanSegmentQueryWrapper segquery = new SpanSegmentQueryWrapper("field", "a", "b", "c");
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("d").or(segquery).or("e");
+ assertEquals("spanOr([field:d, spanNear([spanNear([field:a, field:b], -1, false), field:c], -1, false), field:e])", ssaquery.toQuery().toString());
+ };
+
+ @Test
+ public void spanAlterQuery5 () {
+ SpanRegexQueryWrapper srequery = new SpanRegexQueryWrapper("field", "a[bc]d.?e");
+ SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
+ ssaquery.or("f").or(srequery).or("g");
+ assertEquals("spanOr([field:f, SpanMultiTermQueryWrapper(field:/a[bc]d.?e/), field:g])", ssaquery.toQuery().toString());
+ };
+
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanSegmentQuery.java b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanSegmentQuery.java
new file mode 100644
index 0000000..c4fd3d1
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanSegmentQuery.java
@@ -0,0 +1,99 @@
+import java.util.*;
+import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestSpanSegmentQuery {
+ @Test
+ public void spanSegmentQuery () {
+
+ SpanSegmentQueryWrapper ssquery = new SpanSegmentQueryWrapper("field","a");
+ assertEquals("field:a", ssquery.toQuery().toString());
+
+ ssquery = new SpanSegmentQueryWrapper("field", "a", "b");
+ assertEquals("spanNear([field:a, field:b], -1, false)", ssquery.toQuery().toString());
+
+ ssquery = new SpanSegmentQueryWrapper("field","a", "b", "c");
+ assertEquals("spanNear([spanNear([field:a, field:b], -1, false), field:c], -1, false)", ssquery.toQuery().toString());
+ };
+
+ @Test
+ public void spanSegmentQueryExclusive () {
+
+ SpanSegmentQueryWrapper ssquery = new SpanSegmentQueryWrapper("field","a");
+ assertEquals("field:a", ssquery.toQuery().toString());
+
+ ssquery = new SpanSegmentQueryWrapper("field", "a", "b");
+ assertEquals("spanNear([field:a, field:b], -1, false)", ssquery.toQuery().toString());
+
+ ssquery.without("c");
+ assertEquals("spanNot(spanNear([field:a, field:b], -1, false), field:c)", ssquery.toQuery().toString());
+
+ ssquery.without("d");
+ assertEquals("spanNot(spanNear([field:a, field:b], -1, false), spanOr([field:c, field:d]))", ssquery.toQuery().toString());
+ };
+
+
+ @Test
+ public void spanSegmentRegexQuery () {
+ SpanSegmentQueryWrapper ssquery = new SpanSegmentQueryWrapper("field");
+ assertNull(ssquery.toQuery());
+ ssquery.with("a");
+ assertEquals("field:a", ssquery.toQuery().toString());
+
+ ssquery.with(new SpanRegexQueryWrapper("field", "a.*b"));
+
+ assertEquals("spanNear([field:a, SpanMultiTermQueryWrapper(field:/a.*b/)], -1, false)", ssquery.toQuery().toString());
+
+ ssquery.with("c");
+
+ assertEquals("spanNear([spanNear([field:a, SpanMultiTermQueryWrapper(field:/a.*b/)], -1, false), field:c], -1, false)", ssquery.toQuery().toString());
+
+ ssquery.with("d").with("e");
+
+ assertEquals("spanNear([spanNear([spanNear([spanNear([field:a, SpanMultiTermQueryWrapper(field:/a.*b/)], -1, false), field:c], -1, false), field:d], -1, false), field:e], -1, false)", ssquery.toQuery().toString());
+
+ ssquery.without(new SpanRegexQueryWrapper("field", "x.?y"));
+
+ assertEquals("spanNot(spanNear([spanNear([spanNear([spanNear([field:a, SpanMultiTermQueryWrapper(field:/a.*b/)], -1, false), field:c], -1, false), field:d], -1, false), field:e], -1, false), SpanMultiTermQueryWrapper(field:/x.?y/))", ssquery.toQuery().toString());
+
+ ssquery.without(new SpanRegexQueryWrapper("field", "z{5,9}"));
+
+ assertEquals("spanNot(spanNear([spanNear([spanNear([spanNear([field:a, SpanMultiTermQueryWrapper(field:/a.*b/)], -1, false), field:c], -1, false), field:d], -1, false), field:e], -1, false), spanOr([SpanMultiTermQueryWrapper(field:/x.?y/), SpanMultiTermQueryWrapper(field:/z{5,9}/)]))", ssquery.toQuery().toString());
+
+ };
+
+ @Test
+ public void spanSegmentAlterQuery () {
+ SpanSegmentQueryWrapper ssquery = new SpanSegmentQueryWrapper("field");
+ assertNull(ssquery.toQuery());
+
+ ssquery.with("a");
+ assertEquals("field:a", ssquery.toQuery().toString());
+ ssquery.with(new SpanAlterQueryWrapper("field", "c", "d"));
+ ssquery.with(new SpanRegexQueryWrapper("field", "a.*b"));
+
+ assertEquals("spanNear([spanNear([field:a, spanOr([field:c, field:d])], -1, false), SpanMultiTermQueryWrapper(field:/a.*b/)], -1, false)", ssquery.toQuery().toString());
+ };
+
+
+ @Test
+ public void spanSegmentCloneQuery () {
+ SpanSegmentQueryWrapper ssquery = new SpanSegmentQueryWrapper("field", "a", "b");
+ assertEquals("spanNear([field:a, field:b], -1, false)", ssquery.toQuery().toString());
+
+ SpanSegmentQueryWrapper ssquery2 = new SpanSegmentQueryWrapper("field", ssquery);
+ assertEquals(ssquery.toQuery().toString(), ssquery2.toQuery().toString());
+
+ SpanSegmentQueryWrapper ssquery3 = ssquery2.clone();
+ assertEquals(ssquery.toQuery().toString(), ssquery3.toQuery().toString());
+ assertEquals(ssquery2.toQuery().toString(), ssquery3.toQuery().toString());
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanSegmentSequenceQuery.java b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanSegmentSequenceQuery.java
new file mode 100644
index 0000000..840f9f3
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanSegmentSequenceQuery.java
@@ -0,0 +1,80 @@
+import java.util.*;
+import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestSpanSegmentSequenceQuery {
+
+ @Test
+ public void spanSegmentSequenceQuery () {
+ SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper("field");
+
+ assertNull(sssq.toQuery());
+
+ sssq.append("a").append("b");
+
+ assertEquals("spanNext(field:a, field:b)", sssq.toQuery().toString());
+
+ sssq.append("c");
+
+ assertEquals("spanNext(spanNext(field:a, field:b), field:c)", sssq.toQuery().toString());
+ };
+
+ @Test
+ public void spanSegmentSequenceQuery2 () {
+ SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper("field", "-c", "-d", "-e");
+ SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper("field", "a", "b");
+
+ sssq.append(ssq);
+
+ assertEquals("spanNext(spanNext(field:a, field:b), spanNear([spanNear([field:-c, field:-d], -1, false), field:-e], -1, false))", sssq.toQuery().toString());
+
+ };
+
+ @Test
+ public void spanSegmentSequenceQuery3 () {
+ SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper("field", "a", "b");
+ SpanRegexQueryWrapper ssreq = new SpanRegexQueryWrapper("field", "c.?d");
+
+ sssq.append(ssreq);
+
+ assertEquals("spanNext(spanNext(field:a, field:b), SpanMultiTermQueryWrapper(field:/c.?d/))", sssq.toQuery().toString());
+ };
+
+ @Test
+ public void spanSegmentSequenceQueryPrepend () {
+ SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper("field", "b", "c");
+
+ sssq.prepend("a");
+
+ assertEquals("spanNext(spanNext(field:a, field:b), field:c)", sssq.toQuery().toString());
+ };
+
+ @Test
+ public void spanSegmentSequenceQueryPrepend2 () {
+ SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper("field", "d", "e");
+ SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper("field", "-a", "-b", "-c");
+
+ sssq.prepend(ssq);
+
+ assertEquals("spanNext(spanNext(spanNear([spanNear([field:-a, field:-b], -1, false), field:-c], -1, false), field:d), field:e)", sssq.toQuery().toString());
+ };
+
+ @Test
+ public void spanSegmentSequenceQueryPrepend3 () {
+ SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper("field", "c", "d");
+ SpanRegexQueryWrapper ssreq = new SpanRegexQueryWrapper("field", "a.?b");
+
+ sssq.prepend(ssreq);
+
+ assertEquals("spanNext(spanNext(SpanMultiTermQueryWrapper(field:/a.?b/), field:c), field:d)", sssq.toQuery().toString());
+ };
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanWithinQuery.java b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanWithinQuery.java
new file mode 100644
index 0000000..012a8f4
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/query/TestSpanWithinQuery.java
@@ -0,0 +1,27 @@
+import java.util.*;
+import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
+import de.ids_mannheim.korap.query.SpanWithinQuery;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestSpanWithinQuery {
+ @Test
+ public void spanSegmentWithinQuery () {
+
+ SpanSequenceQueryWrapper ssquery = new SpanSequenceQueryWrapper("field", "a", "b", "c");
+ SpanWithinQuery ssequery = new SpanWithinQuery("s", ssquery.toQuery());
+
+ assertEquals("spanWithin(<field:s />, spanNext(spanNext(field:a, field:b), field:c))", ssequery.toString());
+
+ ssquery = new SpanSequenceQueryWrapper("field", "a", "b");
+ ssequery = new SpanWithinQuery("p", ssquery.toQuery());
+ assertEquals("spanWithin(<field:p />, spanNext(field:a, field:b))", ssequery.toString());
+
+ };
+
+};
diff --git a/trunk/src/test/java/de/ids_mannheim/korap/util/TestArray.java b/trunk/src/test/java/de/ids_mannheim/korap/util/TestArray.java
new file mode 100644
index 0000000..85190d7
--- /dev/null
+++ b/trunk/src/test/java/de/ids_mannheim/korap/util/TestArray.java
@@ -0,0 +1,36 @@
+import java.util.*;
+import static de.ids_mannheim.korap.util.KorapArray.*;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+
+@RunWith(JUnit4.class)
+public class TestArray {
+
+ @Test
+ public void StringJoin1 () {
+ String[] test = new String[]{"a", "bc", "def"};
+ assertEquals(join(",", test), "a,bc,def");
+ };
+
+ @Test
+ public void StringJoin2 () {
+ assertEquals(join(",", "a", "bc", "def"), "a,bc,def");
+ };
+
+ @Test
+ public void StringJoin3 () {
+ assertEquals(join(',', "a", "bc", "def"), "a,bc,def");
+ };
+
+ @Test
+ public void StringJoin4 () {
+ assertEquals(join("--", "a", "bc", "def"), "a--bc--def");
+ };
+
+
+};