Added some failures and a new exception
diff --git a/Changes b/Changes
index 2bfc523..cc49dee 100644
--- a/Changes
+++ b/Changes
@@ -7,6 +7,8 @@
- [feature] Presorting of element terms in the index for coherent
SpanQuery sorting; Removed buffering of element candidates (diewald)
Warning: This is a breaking change!
+ - [feature] Added CorpusDataException to deal with data importing bugs;
+ made some imports break more easily though (diewald)
- [performance] Payloads in ElementSpans can now be lazily loaded,
MatchModifyClassSpans no longer rely on payload copies (diewald)
- [cleanup] Renamed /filter to /collection,
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index c0815ec..8a643eb 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -5,6 +5,7 @@
import de.ids_mannheim.korap.model.MultiTermToken;
import de.ids_mannheim.korap.KorapDocument;
import de.ids_mannheim.korap.util.KorapDate;
+import de.ids_mannheim.korap.util.CorpusDataException;
import com.fasterxml.jackson.annotation.*;
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -135,16 +136,22 @@
// Iterate over all tokens in stream
for (ArrayList<String> token : (ArrayList<ArrayList<String>>) node.get("stream")) {
- // Initialize MultiTermToken
- MultiTermToken mtt = new MultiTermToken(token.remove(0));
+ try {
+ // Initialize MultiTermToken
+ MultiTermToken mtt = new MultiTermToken(token.remove(0));
- // Add rest of the list
- for (String term : token) {
- mtt.add(term);
+ // Add rest of the list
+ for (String term : token) {
+ mtt.add(term);
+ };
+
+ // Add MultiTermToken to stream
+ mtts.addMultiTermToken(mtt);
+
+ }
+ catch (CorpusDataException cde) {
+ this.addError(cde.getErrorCode(), cde.getMessage());
};
-
- // Add MultiTermToken to stream
- mtts.addMultiTermToken(mtt);
};
// Add tokenstream to fielddocument
@@ -178,13 +185,18 @@
for (ArrayList<String> token : (ArrayList<ArrayList<String>>) field.get("data")) {
- MultiTermToken mtt = new MultiTermToken(token.remove(0));
+ try {
+ MultiTermToken mtt = new MultiTermToken(token.remove(0));
- for (String term : token) {
- mtt.add(term);
+ for (String term : token) {
+ mtt.add(term);
+ };
+
+ mtts.addMultiTermToken(mtt);
+ }
+ catch (CorpusDataException cde) {
+ this.addError(cde.getErrorCode(), cde.getMessage());
};
-
- mtts.addMultiTermToken(mtt);
};
// TODO: This is normally dependend to the tokenization!
diff --git a/src/main/java/de/ids_mannheim/korap/model/MultiTerm.java b/src/main/java/de/ids_mannheim/korap/model/MultiTerm.java
index 3ba5748..007f9f1 100644
--- a/src/main/java/de/ids_mannheim/korap/model/MultiTerm.java
+++ b/src/main/java/de/ids_mannheim/korap/model/MultiTerm.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.model;
import static de.ids_mannheim.korap.util.KorapArray.*;
+import de.ids_mannheim.korap.util.CorpusDataException;
import org.apache.lucene.util.BytesRef;
import java.nio.ByteBuffer;
import java.util.*;
@@ -8,6 +9,9 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+/*
+ * Don't use ByteBuffer!
+ */
/**
* A MultiTerm represents a single term (e.g. a word, an annotation, a relation)
* that can be part of a MultiTermToken.
@@ -64,7 +68,7 @@
*
* @param term The term surface (see synopsis).
*/
- public MultiTerm (String term) {
+ public MultiTerm (String term) throws CorpusDataException {
_fromString(term);
};
@@ -84,7 +88,7 @@
* @param prefix A special prefix for the term.
* @param term The term surface (see synopsis).
*/
- public MultiTerm (char prefix, String term) {
+ public MultiTerm (char prefix, String term) throws CorpusDataException {
StringBuilder sb = new StringBuilder();
_fromString(sb.append(prefix).append(':').append(term).toString());
};
@@ -350,7 +354,7 @@
/*
* Deserialize MultiTerm from string representation.
*/
- private void _fromString (String term) {
+ private void _fromString (String term) throws CorpusDataException {
String[] termSurface = term.split("\\$", 2);
// Payload is given
@@ -431,9 +435,17 @@
}
catch (NumberFormatException e) {
- if (DEBUG)
- log.warn("Offset not a number: {}", term);
+ throw new CorpusDataException(
+ 952,
+ "Given offset information is not numeric"
+ );
};
+ }
+ else {
+ throw new CorpusDataException(
+ 953,
+ "Given offset information is incomplete"
+ );
};
};
this.term = stringOffset[0];
diff --git a/src/main/java/de/ids_mannheim/korap/model/MultiTermToken.java b/src/main/java/de/ids_mannheim/korap/model/MultiTermToken.java
index 21e106f..d72433e 100644
--- a/src/main/java/de/ids_mannheim/korap/model/MultiTermToken.java
+++ b/src/main/java/de/ids_mannheim/korap/model/MultiTermToken.java
@@ -1,7 +1,13 @@
package de.ids_mannheim.korap.model;
+import de.ids_mannheim.korap.util.CorpusDataException;
+
import java.util.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
/**
*
@@ -23,6 +29,10 @@
private short i = 0;
private boolean sorted = false;
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+ private final Logger log = LoggerFactory.getLogger(MultiTermTokenStream.class);
+
/**
* Construct a new MultiTermToken by passing a stream of
* {@link MultiTerm MultiTerms}.
@@ -53,10 +63,15 @@
this.terms = new ArrayList<MultiTerm>(16);
// Create a new MultiTerm
- MultiTerm term = new MultiTerm(prefix, surface);
+ try {
+ MultiTerm term = new MultiTerm(prefix, surface);
- // First word element
- terms.add( term );
+ // First word element
+ terms.add( term );
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
};
@@ -66,18 +81,23 @@
*
* @param terms Take at least one {@link MultiTerm} string for a token.
*/
- public MultiTermToken (String terms, String ... moreTerms) {
+ public MultiTermToken (String terms, String ... moreTerms) throws CorpusDataException {
this.terms = new ArrayList<MultiTerm>(16);
MultiTerm term = new MultiTerm(terms);
- // First word element
- this.terms.add( term );
+ try {
+ // First word element
+ this.terms.add( term );
- // Further elements on same position
- for (i = 0; i < moreTerms.length; i++) {
- term = new MultiTerm( moreTerms[i] );
- this.terms.add(term);
+ // Further elements on same position
+ for (i = 0; i < moreTerms.length; i++) {
+ term = new MultiTerm( moreTerms[i] );
+ this.terms.add(term);
+ };
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
};
};
@@ -101,10 +121,18 @@
* @param term A MultiTerm represented as a surface string.
* @return The {@link MultiTermToken} object for chaining.
*/
- public MultiTermToken add (String term) {
+ public MultiTermToken add (String term) throws CorpusDataException {
if (term.length() == 0)
return this;
- return this.add(new MultiTerm(term));
+
+ try {
+ this.add(new MultiTerm(term));
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
+
+ return this;
};
@@ -118,7 +146,15 @@
public MultiTermToken add (char prefix, String term) {
if (term.length() == 0)
return this;
- return this.add(new MultiTerm(prefix, term));
+
+ try {
+ this.add(new MultiTerm(prefix, term));
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
+
+ return this;
};
diff --git a/src/main/java/de/ids_mannheim/korap/model/MultiTermTokenStream.java b/src/main/java/de/ids_mannheim/korap/model/MultiTermTokenStream.java
index 9ebb11b..bcc3512 100644
--- a/src/main/java/de/ids_mannheim/korap/model/MultiTermTokenStream.java
+++ b/src/main/java/de/ids_mannheim/korap/model/MultiTermTokenStream.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.model;
import static de.ids_mannheim.korap.util.KorapByte.*;
+import de.ids_mannheim.korap.util.CorpusDataException;
import org.apache.lucene.util.BytesRef;
import java.util.*;
@@ -72,7 +73,13 @@
*/
public MultiTermTokenStream (String stream) {
this();
- this._fromString(stream);
+ try {
+ this._fromString(stream);
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
+ return;
};
@@ -92,7 +99,13 @@
while ((j = stream.read(buf)) > 0)
sb.append(buf, 0, j);
- this._fromString(sb.toString());
+ try {
+ this._fromString(sb.toString());
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
+ return;
};
@@ -129,8 +142,7 @@
* @param surface A surface string of a {@link MultiTerm}.
* @return The {@link MultiTermTokenStream} object for chaining.
*/
- public MultiTermTokenStream addMultiTermToken
- (char prefix, String surface) {
+ public MultiTermTokenStream addMultiTermToken (char prefix, String surface) {
return this.addMultiTermToken(new MultiTermToken(prefix, surface));
};
@@ -145,10 +157,18 @@
*/
public MultiTermTokenStream addMultiTermToken
(String surface, String ... moreTerms) {
- return this.addMultiTermToken(new MultiTermToken(surface, moreTerms));
+ try {
+ this.addMultiTermToken(new MultiTermToken(surface, moreTerms));
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
+ return this;
};
+
+
/**
* Add meta information to the MultiTermTokenStream.
*
@@ -159,9 +179,14 @@
* @return The {@link MultiTermTokenStream} object for chaining.
*/
public MultiTermTokenStream addMeta (String key, String value) {
- MultiTerm mt = new MultiTerm('-', key);
- mt.setPayload(value);
- this.multiTermTokens.get(0).add(mt);
+ try {
+ MultiTerm mt = new MultiTerm('-', key);
+ mt.setPayload(value);
+ this.multiTermTokens.get(0).add(mt);
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
return this;
};
@@ -176,9 +201,14 @@
* @return The {@link MultiTermTokenStream} object for chaining.
*/
public MultiTermTokenStream addMeta (String key, byte[] value) {
- MultiTerm mt = new MultiTerm('-', key);
- mt.setPayload(value);
- this.multiTermTokens.get(0).add(mt);
+ try {
+ MultiTerm mt = new MultiTerm('-', key);
+ mt.setPayload(value);
+ this.multiTermTokens.get(0).add(mt);
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
return this;
};
@@ -193,9 +223,14 @@
* @return The {@link MultiTermTokenStream} object for chaining.
*/
public MultiTermTokenStream addMeta (String key, short value) {
- MultiTerm mt = new MultiTerm('-', key);
- mt.setPayload(value);
- this.multiTermTokens.get(0).add(mt);
+ try {
+ MultiTerm mt = new MultiTerm('-', key);
+ mt.setPayload(value);
+ this.multiTermTokens.get(0).add(mt);
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
return this;
};
@@ -210,9 +245,14 @@
* @return The {@link MultiTermTokenStream} object for chaining.
*/
public MultiTermTokenStream addMeta (String key, long value) {
- MultiTerm mt = new MultiTerm('-', key);
- mt.setPayload(value);
- this.multiTermTokens.get(0).add(mt);
+ try {
+ MultiTerm mt = new MultiTerm('-', key);
+ mt.setPayload(value);
+ this.multiTermTokens.get(0).add(mt);
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
return this;
};
@@ -227,9 +267,15 @@
* @return The {@link MultiTermTokenStream} object for chaining.
*/
public MultiTermTokenStream addMeta (String key, int value) {
- MultiTerm mt = new MultiTerm('-', key);
- mt.setPayload(value);
- this.multiTermTokens.get(0).add(mt);
+ try {
+ MultiTerm mt = new MultiTerm('-', key);
+ mt.setPayload(value);
+ this.multiTermTokens.get(0).add(mt);
+ }
+ catch (CorpusDataException cde) {
+ log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
+ };
+
return this;
};
@@ -273,7 +319,7 @@
// Deserialize a string
- private void _fromString (String stream) {
+ private void _fromString (String stream) throws CorpusDataException {
Matcher matcher = pattern.matcher(stream);
while (matcher.find()) {
@@ -282,7 +328,7 @@
for (i = 1; i < seg.length; i++)
mtt.add(seg[i]);
-
+
this.addMultiTermToken(mtt);
};
};
@@ -293,7 +339,7 @@
* This overrides the function in Lucene's TokenStream.
*/
@Override
- public final boolean incrementToken() throws IOException {
+ public final boolean incrementToken () throws IOException {
this.payloadAttr.setPayload(null);
// Last token reached
@@ -376,8 +422,8 @@
};
@Override
- public void reset() {
+ public void reset () {
this.mttIndex = 0;
- this.mtIndex = 0;
+ this.mtIndex = 0;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
index 9a1da9f..6235342 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
@@ -110,7 +110,8 @@
@Override
public boolean next() throws IOException {
- if (DEBUG) log.trace("Forward next match");
+ if (DEBUG) log.trace("Forward next match in {}",
+ this.doc());
// Next span
while (spans.next()) {
@@ -186,6 +187,7 @@
// Todo: Check for this on document boundaries!
@Override
public boolean skipTo (int target) throws IOException {
+ if (DEBUG) log.trace("Skip MatchSpans");
return spans.skipTo(target);
};
diff --git a/src/main/java/de/ids_mannheim/korap/util/CorpusDataException.java b/src/main/java/de/ids_mannheim/korap/util/CorpusDataException.java
new file mode 100644
index 0000000..9b5ed23
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/util/CorpusDataException.java
@@ -0,0 +1,81 @@
+package de.ids_mannheim.korap.util;
+
+/**
+ * Exception class for corpus data processing problems.
+ *
+ * @author diewald
+ */
+public class CorpusDataException extends Exception {
+
+ private int errorCode = 0;
+
+ /**
+ * Construct a new CorpusDataException.
+ */
+ public CorpusDataException() {
+ super();
+ };
+
+
+ /**
+ * Construct a new CorpusDataException.
+ *
+ * @param message Exception message.
+ */
+ public CorpusDataException (String message) {
+ super(message);
+ };
+
+
+ /**
+ * Construct a new CorpusDataException.
+ *
+ * @param code An integer value as an error code.
+ * @param message Exception message.
+ */
+ public CorpusDataException (int code, String message) {
+ super(message);
+ this.setErrorCode(code);
+ };
+
+
+ /**
+ * Construct a new CorpusDataException.
+ *
+ * @param message Exception message.
+ * @param cause A {@link Throwable} object.
+ */
+ public CorpusDataException (String message, Throwable cause) {
+ super(message, cause);
+ };
+
+
+ /**
+ * Construct a new CorpusDataException.
+ *
+ * @param cause A {@link Throwable} object.
+ */
+ public CorpusDataException (Throwable cause) {
+ super(cause);
+ };
+
+
+ /**
+ * Get the error code of the exception.
+ *
+ * @return The error code of the exception as an integer.
+ */
+ public int getErrorCode() {
+ return this.errorCode;
+ };
+
+
+ /**
+ * Set the error code of the exception.
+ *
+ * @param code The error code of the exception as an integer.
+ */
+ public void setErrorCode (int code) {
+ this.errorCode = code;
+ };
+};
diff --git a/src/test/java/de/ids_mannheim/korap/Test.java b/src/test/java/de/ids_mannheim/korap/Test.java
index e8837f3..f0c5960 100644
--- a/src/test/java/de/ids_mannheim/korap/Test.java
+++ b/src/test/java/de/ids_mannheim/korap/Test.java
@@ -3,8 +3,11 @@
import java.util.*;
import java.io.IOException;
+import static org.junit.Assert.*;
+
import de.ids_mannheim.korap.model.MultiTermTokenStream;
import de.ids_mannheim.korap.model.MultiTermToken;
+import de.ids_mannheim.korap.util.CorpusDataException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
@@ -15,83 +18,87 @@
import org.apache.lucene.index.IndexWriter;
/**
- * @author Nils Diewald
- *
* Helper class for testing the KorapIndex framework (Normal).
+ *
+ * @author diewald
*/
public class Test {
public static void addDoc(IndexWriter w, Map<String, String> m) throws IOException {
- Document doc = new Document();
- String[] strInt = { "pubDate" };
- String[] strStr = { "id", "corpus", "pubPlace" };
- String[] strTxt = { "title", "subtitle", "textClass" };
+ Document doc = new Document();
+ String[] strInt = { "pubDate" };
+ String[] strStr = { "id", "corpus", "pubPlace" };
+ String[] strTxt = { "title", "subtitle", "textClass" };
- // Text fields
- for (String s : strTxt) {
- doc.add(new TextField(s, m.get(s), Field.Store.YES));
- };
+ // Text fields
+ for (String s : strTxt) {
+ doc.add(new TextField(s, m.get(s), Field.Store.YES));
+ };
- // String fields
- for (String s : strStr) {
- doc.add(new StringField(s, m.get(s), Field.Store.YES));
- };
+ // String fields
+ for (String s : strStr) {
+ doc.add(new StringField(s, m.get(s), Field.Store.YES));
+ };
- // Integer fields
- for (String s : strInt) {
- doc.add(new IntField(s, Integer.parseInt(m.get(s)), Field.Store.YES));
- };
+ // Integer fields
+ for (String s : strInt) {
+ doc.add(new IntField(s, Integer.parseInt(m.get(s)), Field.Store.YES));
+ };
- FieldType textFieldWithTermVectors = new FieldType(TextField.TYPE_STORED);
- textFieldWithTermVectors.setStoreTermVectors(true);
- textFieldWithTermVectors.setStoreTermVectorOffsets(true);
- textFieldWithTermVectors.setStoreTermVectorPositions(true);
- textFieldWithTermVectors.setStoreTermVectorPayloads(true);
+ FieldType textFieldWithTermVectors = new FieldType(TextField.TYPE_STORED);
+ textFieldWithTermVectors.setStoreTermVectors(true);
+ textFieldWithTermVectors.setStoreTermVectorOffsets(true);
+ textFieldWithTermVectors.setStoreTermVectorPositions(true);
+ textFieldWithTermVectors.setStoreTermVectorPayloads(true);
+ Field textFieldAnalyzed = new Field(
+ "text",
+ m.get("textStr"),
+ textFieldWithTermVectors
+ );
- Field textFieldAnalyzed = new Field(
- "text",
- m.get("textStr"),
- textFieldWithTermVectors
- );
+ MultiTermTokenStream ts = getTermVector(m.get("text"));
- MultiTermTokenStream ts = getTermVector(m.get("text"));
+ textFieldAnalyzed.setTokenStream( ts );
- textFieldAnalyzed.setTokenStream( ts );
+ doc.add(textFieldAnalyzed);
- doc.add(textFieldAnalyzed);
-
- // Add document to writer
- w.addDocument(doc);
+ // Add document to writer
+ w.addDocument(doc);
};
public static MultiTermTokenStream getTermVector (String stream) {
- MultiTermTokenStream ts = new MultiTermTokenStream();
+ MultiTermTokenStream ts = new MultiTermTokenStream();
- int pos = 0;
- for (String seg : stream.split(" ")) {
+ int pos = 0;
+ for (String seg : stream.split(" ")) {
+
+ String[] tokseg = seg.split("\\|");
- String[] tokseg = seg.split("\\|");
+ try {
+ MultiTermToken mtt = new MultiTermToken('s', tokseg[0]);
+
+ mtt.add("T");
+ mtt.add('i', tokseg[0].toLowerCase());
+ mtt.add('p', tokseg[1]);
+ mtt.add('l', tokseg[2]);
- MultiTermToken mtt = new MultiTermToken('s', tokseg[0]);
-
- mtt.add("T");
- mtt.add('i', tokseg[0].toLowerCase());
- mtt.add('p', tokseg[1]);
- mtt.add('l', tokseg[2]);
-
- if (tokseg.length == 4) {
- for (String morph : tokseg[3].split(";")) {
- mtt.add('m', morph);
- }
- };
- if (tokseg.length == 5) {
- mtt.add('e', tokseg[4]);
- };
-
- ts.addMultiTermToken(mtt);
- };
-
- return ts;
+ if (tokseg.length == 4) {
+ for (String morph : tokseg[3].split(";")) {
+ mtt.add('m', morph);
+ }
+ };
+ if (tokseg.length == 5) {
+ mtt.add('e', tokseg[4]);
+ };
+
+ ts.addMultiTermToken(mtt);
+ }
+ catch (CorpusDataException cde) {
+ fail(cde.getErrorCode() + ": " + cde.getMessage());
+ };
+ };
+
+ return ts;
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/TestSimple.java b/src/test/java/de/ids_mannheim/korap/TestSimple.java
index 1dd85f3..786d7a5 100644
--- a/src/test/java/de/ids_mannheim/korap/TestSimple.java
+++ b/src/test/java/de/ids_mannheim/korap/TestSimple.java
@@ -9,6 +9,7 @@
import de.ids_mannheim.korap.model.*;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.util.CorpusDataException;
import static de.ids_mannheim.korap.util.KorapByte.*;
@@ -19,140 +20,145 @@
import org.apache.lucene.util.Bits;
/**
- * @author Nils Diewald
- *
* Helper class for testing the KorapIndex framework (Simple).
+ *
+ * @author diewald
*/
public class TestSimple {
// Add document
public static void addDoc(IndexWriter w, Map<String, String> m) throws IOException {
- Document doc = new Document();
+ Document doc = new Document();
- FieldType textFieldWithTermVectors = new FieldType(TextField.TYPE_STORED);
- textFieldWithTermVectors.setStoreTermVectors(true);
- /*
- No offsets are stored.
- textFieldWithTermVectors.setStoreTermVectorOffsets(true);
- */
- textFieldWithTermVectors.setStoreTermVectorPositions(true);
- textFieldWithTermVectors.setStoreTermVectorPayloads(true);
+ FieldType textFieldWithTermVectors = new FieldType(TextField.TYPE_STORED);
+ textFieldWithTermVectors.setStoreTermVectors(true);
+ /*
+ No offsets are stored.
+ textFieldWithTermVectors.setStoreTermVectorOffsets(true);
+ */
+ textFieldWithTermVectors.setStoreTermVectorPositions(true);
+ textFieldWithTermVectors.setStoreTermVectorPayloads(true);
- Field textFieldAnalyzed = new Field(
- "text",
- m.get("textStr"),
- textFieldWithTermVectors
- );
+ Field textFieldAnalyzed = new Field(
+ "text",
+ m.get("textStr"),
+ textFieldWithTermVectors
+ );
- MultiTermTokenStream ts = getTermVector(m.get("text"));
+ MultiTermTokenStream ts = getTermVector(m.get("text"));
- textFieldAnalyzed.setTokenStream( ts );
+ textFieldAnalyzed.setTokenStream( ts );
- doc.add(textFieldAnalyzed);
+ doc.add(textFieldAnalyzed);
- // Add document to writer
- w.addDocument(doc);
+ // Add document to writer
+ w.addDocument(doc);
};
// Get Term Vector
public static MultiTermTokenStream getTermVector (String stream) {
- MultiTermTokenStream ts = new MultiTermTokenStream();
+ MultiTermTokenStream ts = new MultiTermTokenStream();
- int pos = 0;
- for (String seg : stream.split(" ")) {
- // System.err.println("** Prepare " + seg);
- String[] tokens = seg.split("\\|");
+ int pos = 0;
+ for (String seg : stream.split(" ")) {
+ // System.err.println("** Prepare " + seg);
+ String[] tokens = seg.split("\\|");
- int i = 0;
+ int i = 0;
- while (tokens[i].length() == 0)
- i++;
+ while (tokens[i].length() == 0)
+ i++;
- MultiTermToken mtt = new MultiTermToken(tokens[i]);
- // System.err.println("** Add term " + tokens[i]);
- i++;
- for (; i < tokens.length; i++) {
- if (tokens[i].length() == 0)
- continue;
- mtt.add(tokens[i]);
- };
-
- ts.addMultiTermToken(mtt);
- };
-
- return ts;
+ try {
+ MultiTermToken mtt = new MultiTermToken(tokens[i]);
+ // System.err.println("** Add term " + tokens[i]);
+ i++;
+ for (; i < tokens.length; i++) {
+ if (tokens[i].length() == 0)
+ continue;
+ mtt.add(tokens[i]);
+ };
+ ts.addMultiTermToken(mtt);
+ }
+ catch (CorpusDataException cde) {
+ fail(cde.getErrorCode() + ": " + cde.getMessage());
+ };
+ };
+
+ return ts;
};
+
// Get query wrapper based on json file
public static SpanQueryWrapper getJSONQuery (String jsonFile) {
- SpanQueryWrapper sqwi;
+ SpanQueryWrapper sqwi;
- try {
- String json = getString(jsonFile);
- sqwi = new KorapQuery("tokens").fromJson(json);
- }
- catch (QueryException e) {
- fail(e.getMessage());
- sqwi = new KorapQuery("tokens").seg("???");
- };
- return sqwi;
+ try {
+ String json = getString(jsonFile);
+ sqwi = new KorapQuery("tokens").fromJson(json);
+ }
+ catch (QueryException e) {
+ fail(e.getMessage());
+ sqwi = new KorapQuery("tokens").seg("???");
+ };
+ return sqwi;
};
// Get string
public static String getString (String path) {
- StringBuilder contentBuilder = new StringBuilder();
- try {
- BufferedReader in = new BufferedReader(new FileReader(path));
- String str;
- while ((str = in.readLine()) != null) {
- contentBuilder.append(str);
- };
- in.close();
- } catch (IOException e) {
- fail(e.getMessage());
- }
- return contentBuilder.toString();
+ StringBuilder contentBuilder = new StringBuilder();
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(path));
+ String str;
+ while ((str = in.readLine()) != null) {
+ contentBuilder.append(str);
+ };
+ in.close();
+ } catch (IOException e) {
+ fail(e.getMessage());
+ }
+ return contentBuilder.toString();
};
// getSpan Info
public static List<String> getSpanInfo (IndexReader reader, SpanQuery query)
- throws IOException {
- Map<Term, TermContext> termContexts = new HashMap<>();
- List<String> spanArray = new ArrayList<>();
+ throws IOException {
+ Map<Term, TermContext> termContexts = new HashMap<>();
+ List<String> spanArray = new ArrayList<>();
+
+ for (AtomicReaderContext atomic : reader.leaves()) {
+ Bits bitset = atomic.reader().getLiveDocs();
+ // Spans spans = NearSpansOrdered();
+ Spans spans = query.getSpans(atomic, bitset, termContexts);
- for (AtomicReaderContext atomic : reader.leaves()) {
- Bits bitset = atomic.reader().getLiveDocs();
- // Spans spans = NearSpansOrdered();
- Spans spans = query.getSpans(atomic, bitset, termContexts);
-
- while (spans.next()) {
- StringBuffer payloadString = new StringBuffer();
- int docid = atomic.docBase + spans.doc();
- if (spans.isPayloadAvailable()) {
- for (byte[] payload : spans.getPayload()) {
- /* retrieve payload for current matching span */
-
- payloadString.append(byte2int(payload)).append(",");
- payloadString.append(byte2int(payload, 2));
- // payloadString.append(byte2int(payload, 1));
- payloadString.append(" (" + payload.length + ")");
- payloadString.append(" | ");
- };
- };
- spanArray.add(
- "Doc: " +
- docid +
- " with " +
- spans.start() +
- "-" +
- spans.end() +
- " || " +
- payloadString.toString()
- );
- };
- };
- return spanArray;
+ while (spans.next()) {
+ StringBuffer payloadString = new StringBuffer();
+ int docid = atomic.docBase + spans.doc();
+ if (spans.isPayloadAvailable()) {
+ for (byte[] payload : spans.getPayload()) {
+ /* retrieve payload for current matching span */
+
+ payloadString.append(byte2int(payload)).append(",");
+ payloadString.append(byte2int(payload, 2));
+ // payloadString.append(byte2int(payload, 1));
+ payloadString.append(" (" + payload.length + ")");
+ payloadString.append(" | ");
+ };
+ };
+ spanArray.add(
+ "Doc: " +
+ docid +
+ " with " +
+ spans.start() +
+ "-" +
+ spans.end() +
+ " || " +
+ payloadString.toString()
+ );
+ };
+ };
+ return spanArray;
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
index 94b7e8b..075b51c 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
@@ -9,6 +9,8 @@
import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
import de.ids_mannheim.korap.query.SpanWithinQuery;
+import de.ids_mannheim.korap.util.CorpusDataException;
+
import static de.ids_mannheim.korap.Test.*;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -73,7 +75,7 @@
private Directory index = new RAMDirectory();
@Test
- public void multiTermToken () {
+ public void multiTermToken () throws CorpusDataException {
MultiTermToken test = new MultiTermToken("hunde", "pos:n", "m:gen:pl");
assertEquals(test.terms.get(0).term, "hunde");
assertEquals(test.terms.get(1).term, "pos:n");
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java
index 417a8b4..1c07163 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java
@@ -23,84 +23,89 @@
@Test
public void indexExample () throws IOException {
- KorapIndex ki = new KorapIndex();
+ KorapIndex ki = new KorapIndex();
- assertEquals(0, ki.numberOf("base", "documents"));
- assertEquals(0, ki.numberOf("base", "tokens"));
- assertEquals(0, ki.numberOf("base", "sentences"));
- assertEquals(0, ki.numberOf("base", "paragraphs"));
+ assertEquals(0, ki.numberOf("base", "documents"));
+ assertEquals(0, ki.numberOf("base", "tokens"));
+ assertEquals(0, ki.numberOf("base", "sentences"));
+ assertEquals(0, ki.numberOf("base", "paragraphs"));
- FieldDocument fd = new FieldDocument();
+ FieldDocument fd = new FieldDocument();
- fd.addString("name", "Peter");
- fd.addInt("zahl1", 56);
- fd.addInt("zahl2", "58");
- fd.addText("teaser", "Das ist der Name der Rose");
- fd.addTV("base", "ich bau", "[(0-3)s:ich|l:ich|p:PPER|-:sentences#-$<i>2][(4-7)s:bau|l:bauen|p:VVFIN]");
- ki.addDoc(fd);
+ fd.addString("name", "Peter");
+ fd.addInt("zahl1", 56);
+ fd.addInt("zahl2", "58");
+ fd.addText("teaser", "Das ist der Name der Rose");
+ fd.addTV("base",
+ "ich bau",
+ "[(0-3)s:ich|l:ich|p:PPER|-:sentences$<i>2]" +
+ "[(4-7)s:bau|l:bauen|p:VVFIN]");
+ ki.addDoc(fd);
- fd = new FieldDocument();
+ fd = new FieldDocument();
- fd.addString("name", "Hans");
- fd.addInt("zahl1", 14);
- fd.addText("teaser", "Das Sein");
+ fd.addString("name", "Hans");
+ fd.addInt("zahl1", 14);
+ fd.addText("teaser", "Das Sein");
- MultiTermTokenStream mtts = fd.newMultiTermTokenStream();
- mtts.addMultiTermToken("s:wir#0-3", "l:wir", "p:PPER");
- mtts.addMultiTermToken("s:sind#4-8", "l:sein", "p:VVFIN");
- mtts.addMeta("sentences", (int) 5);
- fd.addTV("base", "wir sind", mtts);
+ MultiTermTokenStream mtts = fd.newMultiTermTokenStream();
+ mtts.addMultiTermToken("s:wir#0-3", "l:wir", "p:PPER");
+ mtts.addMultiTermToken("s:sind#4-8", "l:sein", "p:VVFIN");
+ mtts.addMeta("sentences", (int) 5);
+ fd.addTV("base", "wir sind", mtts);
+
+ ki.addDoc(fd);
- ki.addDoc(fd);
+ /* Save documents */
+ ki.commit();
- /* Save documents */
- ki.commit();
+ assertEquals(2, ki.numberOf("base", "documents"));
+ assertEquals(7, ki.numberOf("base", "sentences"));
- assertEquals(2, ki.numberOf("base", "documents"));
- assertEquals(7, ki.numberOf("base", "sentences"));
+ fd = new FieldDocument();
+ fd.addString("name", "Frank");
+ fd.addInt("zahl1", 59);
+ fd.addInt("zahl2", 65);
+ fd.addText("teaser", "Noch ein Versuch");
+ fd.addTV("base",
+ "ich bau",
+ "[(0-3)s:der|l:der|p:DET|-:sentences$<i>3]" +
+ "[(4-8)s:baum|l:baum|p:NN]");
+ ki.addDoc(fd);
- fd = new FieldDocument();
+ /* Save documents */
+ ki.commit();
- fd.addString("name", "Frank");
- fd.addInt("zahl1", 59);
- fd.addInt("zahl2", 65);
- fd.addText("teaser", "Noch ein Versuch");
- fd.addTV("base", "ich bau", "[(0-3)s:der|l:der|p:DET|-:sentences#-$<i>3][(4-8)s:baum|l:baum|p:NN]");
- ki.addDoc(fd);
+ assertEquals(3, ki.numberOf("base", "documents"));
+ assertEquals(10, ki.numberOf("base", "sentences"));
- /* Save documents */
- ki.commit();
-
- assertEquals(3, ki.numberOf("base", "documents"));
- assertEquals(10, ki.numberOf("base", "sentences"));
-
- // KorapQuery kq = new KorapQuery("text");
- // ki.search();
+ // KorapQuery kq = new KorapQuery("text");
+ // ki.search();
};
@Test
public void indexAlteration () throws IOException {
- KorapIndex ki = new KorapIndex();
+ KorapIndex ki = new KorapIndex();
+
+ assertEquals(0, ki.numberOf("base", "documents"));
- assertEquals(0, ki.numberOf("base", "documents"));
+ FieldDocument fd = new FieldDocument();
+ fd.addString("name", "Peter");
+ ki.addDoc(fd);
+
+ assertEquals(0, ki.numberOf("base", "documents"));
- FieldDocument fd = new FieldDocument();
- fd.addString("name", "Peter");
- ki.addDoc(fd);
+ fd = new FieldDocument();
+ fd.addString("name", "Michael");
+ ki.addDoc(fd);
- assertEquals(0, ki.numberOf("base", "documents"));
+ assertEquals(0, ki.numberOf("base", "documents"));
- fd = new FieldDocument();
- fd.addString("name", "Michael");
- ki.addDoc(fd);
+ ki.commit();
- assertEquals(0, ki.numberOf("base", "documents"));
+ assertEquals(2, ki.numberOf("base", "documents"));
- ki.commit();
-
- assertEquals(2, ki.numberOf("base", "documents"));
-
- // hasDeletions, hasPendingMerges
+ // hasDeletions, hasPendingMerges
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
index 59c8fce..f4e4a8d 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
@@ -17,11 +17,8 @@
import de.ids_mannheim.korap.KorapQuery;
import de.ids_mannheim.korap.KorapMatch;
import de.ids_mannheim.korap.KorapResult;
-import de.ids_mannheim.korap.query.SpanNextQuery;
-import de.ids_mannheim.korap.query.SpanElementQuery;
-import de.ids_mannheim.korap.query.SpanWithinQuery;
-import de.ids_mannheim.korap.query.SpanMatchModifyClassQuery;
-import de.ids_mannheim.korap.query.SpanClassQuery;
+import de.ids_mannheim.korap.KorapCollection;
+import de.ids_mannheim.korap.query.*;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.model.MultiTermTokenStream;
@@ -44,17 +41,17 @@
// abcabcabac
FieldDocument fd = new FieldDocument();
fd.addTV("base",
- "abcabcabac",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
- "[(1-2)s:b|i:b|_1#1-2]" +
- "[(2-3)s:c|i:c|_2#2-3]" +
- "[(3-4)s:a|i:a|_3#3-4]" +
- "[(4-5)s:b|i:b|_4#4-5]" +
- "[(5-6)s:c|i:c|_5#5-6]" +
- "[(6-7)s:a|i:a|_6#6-7]" +
- "[(7-8)s:b|i:b|_7#7-8]" +
- "[(8-9)s:a|i:a|_8#8-9]" +
- "[(9-10)s:c|i:c|_9#9-10]");
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
ki.addDoc(fd);
ki.commit();
@@ -214,200 +211,284 @@
@Test
public void indexExample2 () throws IOException {
- KorapIndex ki = new KorapIndex();
+ KorapIndex ki = new KorapIndex();
- // abcabcabac
- FieldDocument fd = new FieldDocument();
- fd.addTV("base",
- "abcabcabac",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
- "[(1-2)s:b|i:b|_1#1-2]" +
- "[(2-3)s:c|i:c|_2#2-3]" +
- "[(3-4)s:a|i:a|_3#3-4]" +
- "[(4-5)s:b|i:b|_4#4-5]" +
- "[(5-6)s:c|i:c|_5#5-6]" +
- "[(6-7)s:a|i:a|_6#6-7]" +
- "[(7-8)s:b|i:b|_7#7-8]" +
- "[(8-9)s:a|i:a|_8#8-9]" +
- "[(9-10)s:c|i:c|_9#9-10]");
- ki.addDoc(fd);
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+ ki.commit();
- ki.commit();
+ SpanQuery sq;
+ KorapResult kr;
- SpanQuery sq;
- KorapResult kr;
-
- // No contexts:
- sq = new SpanOrQuery(
+ // No contexts:
+ sq = new SpanOrQuery(
new SpanTermQuery(new Term("base", "s:a")),
- new SpanTermQuery(new Term("base", "s:c"))
+ new SpanTermQuery(new Term("base", "s:c"))
);
- kr = ki.search(sq, (short) 20);
+ kr = ki.search(sq, (short) 20);
- assertEquals("totalResults", kr.getTotalResults(), 7);
- assertEquals("SnippetBrackets (0)", "<span class=\"context-left\"></span><span class=\"match\">a</span><span class=\"context-right\">bcabca<span class=\"more\"></span></span>", kr.getMatch(0).getSnippetHTML());
- assertEquals("SnippetBrackets (0)", "[a]bcabca ...", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("totalResults", kr.getTotalResults(), 7);
+ assertEquals("SnippetBrackets (0)", "<span class=\"context-left\"></span><span class=\"match\">a</span><span class=\"context-right\">bcabca<span class=\"more\"></span></span>", kr.getMatch(0).getSnippetHTML());
+ assertEquals("SnippetBrackets (0)", "[a]bcabca ...", kr.getMatch(0).getSnippetBrackets());
- assertEquals("SnippetBrackets (1)", "ab[c]abcaba ...", kr.getMatch(1).getSnippetBrackets());
- assertEquals("SnippetBrackets (1)", "<span class=\"context-left\">ab</span><span class=\"match\">c</span><span class=\"context-right\">abcaba<span class=\"more\"></span></span>", kr.getMatch(1).getSnippetHTML());
+ assertEquals("SnippetBrackets (1)", "ab[c]abcaba ...", kr.getMatch(1).getSnippetBrackets());
+ assertEquals("SnippetBrackets (1)", "<span class=\"context-left\">ab</span><span class=\"match\">c</span><span class=\"context-right\">abcaba<span class=\"more\"></span></span>", kr.getMatch(1).getSnippetHTML());
+
+ assertEquals("SnippetBrackets (6)", "... abcaba[c]", kr.getMatch(6).getSnippetBrackets());
+ assertEquals("SnippetBrackets (6)", "<span class=\"context-left\"><span class=\"more\"></span>abcaba</span><span class=\"match\">c</span><span class=\"context-right\"></span>", kr.getMatch(6).getSnippetHTML());
- assertEquals("SnippetBrackets (6)", "... abcaba[c]", kr.getMatch(6).getSnippetBrackets());
- assertEquals("SnippetBrackets (6)", "<span class=\"context-left\"><span class=\"more\"></span>abcaba</span><span class=\"match\">c</span><span class=\"context-right\"></span>", kr.getMatch(6).getSnippetHTML());
+ kr = ki.search(sq, 0, (short) 20, true, (short) 0, true, (short) 0);
- kr = ki.search(sq, 0, (short) 20, true, (short) 0, true, (short) 0);
+ assertEquals("totalResults", kr.getTotalResults(), 7);
+ assertEquals("SnippetBrackets (0)", "[a] ...", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("SnippetHTML (0)", "<span class=\"context-left\"></span><span class=\"match\">a</span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.getMatch(0).getSnippetHTML());
- assertEquals("totalResults", kr.getTotalResults(), 7);
- assertEquals("SnippetBrackets (0)", "[a] ...", kr.getMatch(0).getSnippetBrackets());
- assertEquals("SnippetHTML (0)", "<span class=\"context-left\"></span><span class=\"match\">a</span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.getMatch(0).getSnippetHTML());
+ assertEquals("SnippetBrackets (1)", "... [c] ...", kr.getMatch(1).getSnippetBrackets());
+ assertEquals("SnippetHTML (1)", "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\">c</span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.getMatch(1).getSnippetHTML());
- assertEquals("SnippetBrackets (1)", "... [c] ...", kr.getMatch(1).getSnippetBrackets());
- assertEquals("SnippetHTML (1)", "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\">c</span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.getMatch(1).getSnippetHTML());
-
- assertEquals("SnippetBrackets (6)", "... [c]", kr.getMatch(6).getSnippetBrackets());
- assertEquals("SnippetBrackets (6)", "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\">c</span><span class=\"context-right\"></span>", kr.getMatch(6).getSnippetHTML());
+ assertEquals("SnippetBrackets (6)", "... [c]", kr.getMatch(6).getSnippetBrackets());
+ assertEquals("SnippetBrackets (6)", "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\">c</span><span class=\"context-right\"></span>", kr.getMatch(6).getSnippetHTML());
};
@Test
public void indexExample3 () throws Exception {
- KorapIndex ki = new KorapIndex();
+ KorapIndex ki = new KorapIndex();
- // abcabcabac
- FieldDocument fd = new FieldDocument();
- fd.addTV("base",
- "abcabcabac",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
- "[(1-2)s:b|i:b|_1#1-2]" +
- "[(2-3)s:c|i:c|_2#2-3]" +
- "[(3-4)s:a|i:a|_3#3-4]" +
- "[(4-5)s:b|i:b|_4#4-5]" +
- "[(5-6)s:c|i:c|_5#5-6]" +
- "[(6-7)s:a|i:a|_6#6-7]" +
- "[(7-8)s:b|i:b|_7#7-8]" +
- "[(8-9)s:a|i:a|_8#8-9]" +
- "[(9-10)s:c|i:c|_9#9-10]");
- ki.addDoc(fd);
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+ ki.commit();
- ki.commit();
+ KorapResult kr;
- KorapResult kr;
+ KorapQuery kq = new KorapQuery("base");
- KorapQuery kq = new KorapQuery("base");
+ SpanQuery sq = kq._(1,kq.seq(kq.seg("s:b")).append(kq.seg("s:a")).append(kq._(2,kq.seg("s:c")))).toQuery();
+
+ kr = ki.search(sq, 0, (short) 20, true, (short) 2, true, (short) 5);
- SpanQuery sq = kq._(1,kq.seq(kq.seg("s:b")).append(kq.seg("s:a")).append(kq._(2,kq.seg("s:c")))).toQuery();
-
- kr = ki.search(sq, 0, (short) 20, true, (short) 2, true, (short) 5);
-
- assertEquals("totalResults", kr.getTotalResults(), 1);
- assertEquals("SnippetBrackets (0)", "... ca[{1:ba{2:c}}]", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("totalResults", kr.getTotalResults(), 1);
+ assertEquals("SnippetBrackets (0)", "... ca[{1:ba{2:c}}]", kr.getMatch(0).getSnippetBrackets());
};
@Test
public void indexExampleExtend () throws IOException {
- KorapIndex ki = new KorapIndex();
+ KorapIndex ki = new KorapIndex();
- // abcabcabac
- FieldDocument fd = new FieldDocument();
- fd.addTV("base",
- "abcabcabac",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
- "[(1-2)s:b|i:b|_1#1-2]" +
- "[(2-3)s:c|i:c|_2#2-3]" +
- "[(3-4)s:a|i:a|_3#3-4]" +
- "[(4-5)s:b|i:b|_4#4-5]" +
- "[(5-6)s:c|i:c|_5#5-6]" +
- "[(6-7)s:a|i:a|_6#6-7]" +
- "[(7-8)s:b|i:b|_7#7-8]" +
- "[(8-9)s:a|i:a|_8#8-9]" +
- "[(9-10)s:c|i:c|_9#9-10]");
- ki.addDoc(fd);
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+ ki.commit();
- ki.commit();
+ SpanQuery sq;
+ KorapResult kr;
- SpanQuery sq;
- KorapResult kr;
-
- sq = new SpanMatchModifyClassQuery(
+ sq = new SpanMatchModifyClassQuery(
new SpanNextQuery(
- new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)
), (byte) 3
);
- kr = ki.search(sq, (short) 10);
+ kr = ki.search(sq, (short) 10);
- assertEquals("totalResults", kr.getTotalResults(), 3);
+ assertEquals("totalResults", kr.getTotalResults(), 3);
- KorapMatch km = kr.getMatch(0);
- assertEquals("StartPos (0)", 1, km.startPos);
- assertEquals("EndPos (0)", 2, km.endPos);
- assertEquals("SnippetBrackets (0)", "a[{3:b}]cabcab ...", km.getSnippetBrackets());
+ KorapMatch km = kr.getMatch(0);
+ assertEquals("StartPos (0)", 1, km.startPos);
+ assertEquals("EndPos (0)", 2, km.endPos);
+ assertEquals("SnippetBrackets (0)", "a[{3:b}]cabcab ...", km.getSnippetBrackets());
- sq = new SpanMatchModifyClassQuery(
- new SpanMatchModifyClassQuery(
+ sq = new SpanMatchModifyClassQuery(
+ new SpanMatchModifyClassQuery(
new SpanNextQuery(
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)
), (byte) 3
- ), (byte) 2
- );
+ ), (byte) 2
+ );
- kr = ki.search(sq, (short) 10);
+ kr = ki.search(sq, (short) 10);
- km = kr.getMatch(0);
- assertEquals("StartPos (0)", 0, km.startPos);
- assertEquals("EndPos (0)", 1, km.endPos);
- assertEquals("SnippetBrackets (0)", "[{2:a}]bcabca ...", km.getSnippetBrackets());
+ km = kr.getMatch(0);
+ assertEquals("StartPos (0)", 0, km.startPos);
+ assertEquals("EndPos (0)", 1, km.endPos);
+ assertEquals("SnippetBrackets (0)", "[{2:a}]bcabca ...", km.getSnippetBrackets());
-
- // TODO: Check ID
+ // TODO: Check ID
};
@Test
public void indexExampleFocusWithSpan () throws IOException {
- KorapIndex ki = new KorapIndex();
+ KorapIndex ki = new KorapIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]" +
+ "[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+ ki.commit();
- // abcabcabac
- FieldDocument fd = new FieldDocument();
- fd.addTV("base",
- "abcabcabac",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
- "[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]" +
- "[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]" +
- "[(3-4)s:a|i:a|_3#3-4]" +
- "[(4-5)s:b|i:b|_4#4-5]" +
- "[(5-6)s:c|i:c|_5#5-6]" +
- "[(6-7)s:a|i:a|_6#6-7]" +
- "[(7-8)s:b|i:b|_7#7-8]" +
- "[(8-9)s:a|i:a|_8#8-9]" +
- "[(9-10)s:c|i:c|_9#9-10]");
- ki.addDoc(fd);
-
- ki.commit();
-
- SpanQuery sq;
- KorapResult kr;
-
- sq = new SpanWithinQuery(
- new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2),
+ SpanQuery sq;
+ KorapResult kr;
+
+ sq = new SpanWithinQuery(
+ new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2),
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)
);
- kr = ki.search(sq, (short) 10);
- assertEquals(kr.getQuery(), "spanContain({2: <base:s />}, {3: base:s:b})");
- assertEquals(kr.getMatch(0).getSnippetBrackets(), "a[{2:{3:b}cab}]cabac");
+ kr = ki.search(sq, (short) 10);
+ assertEquals(kr.getQuery(), "spanContain({2: <base:s />}, {3: base:s:b})");
+ assertEquals(kr.getMatch(0).getSnippetBrackets(), "a[{2:{3:b}cab}]cabac");
- sq = new SpanMatchModifyClassQuery(
+ sq = new SpanMatchModifyClassQuery(
new SpanWithinQuery(
new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2),
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)
), (byte) 3
);
- kr = ki.search(sq, (short) 10);
- assertEquals(kr.getQuery(), "focus(3: spanContain({2: <base:s />}, {3: base:s:b}))");
- assertEquals(kr.getMatch(0).getSnippetBrackets(), "a[{3:b}]cabcab ...");
+ kr = ki.search(sq, (short) 10);
+ assertEquals(kr.getQuery(), "focus(3: spanContain({2: <base:s />}, {3: base:s:b}))");
+ assertEquals(kr.getMatch(0).getSnippetBrackets(), "a[{3:b}]cabcab ...");
};
+
+
+ @Test
+ public void indexExampleFocusWithSkip () throws IOException {
+ KorapIndex ki = new KorapIndex();
+
+ // abcabcabac
+ FieldDocument fd = new FieldDocument();
+ // The payload should be ignored
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + // |<>:p#0-10<i>9]" +
+ "[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]" +
+ "[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+
+ ki.addDoc(fd);
+ fd.addTV("base",
+ "gbcgbcgbgc",
+ "[(0-1)s:g|i:g|_0#0-1|-:t$<i>10|<>:p#0-10$<i>9]" +
+ "[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]" +
+ "[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]" +
+ "[(3-4)s:g|i:g|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:g|i:g|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:g|i:g|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+ fd.addTV("base",
+ "gbcgbcgbgc",
+ "[(0-1)s:g|i:g|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:c|i:c|_2#2-3]" +
+ "[(3-4)s:g|i:g|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:g|i:g|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:g|i:g|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+ fd.addTV("base",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10|<>:p#0-10$<i>9]" +
+ "[(1-2)s:b|i:b|_1#1-2|<>:s#1-5$<i>5]" +
+ "[(2-3)s:c|i:c|_2#2-3|<>:s#2-7$<i>7]" +
+ "[(3-4)s:a|i:a|_3#3-4]" +
+ "[(4-5)s:b|i:b|_4#4-5]" +
+ "[(5-6)s:c|i:c|_5#5-6]" +
+ "[(6-7)s:a|i:a|_6#6-7]" +
+ "[(7-8)s:b|i:b|_7#7-8]" +
+ "[(8-9)s:a|i:a|_8#8-9]" +
+ "[(9-10)s:c|i:c|_9#9-10]");
+ ki.addDoc(fd);
+ ki.commit();
+
+ SpanQuery sq;
+ KorapResult kr;
+ KorapCollection kc = new KorapCollection(ki);
+
+ assertEquals("Documents", 4, kc.numberOf("documents"));
+
+ sq = new SpanWithinQuery(
+ new SpanElementQuery("base", "p"),
+ new SpanMatchModifyClassQuery(
+ new SpanWithinQuery(
+ new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2),
+ new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 3)
+ ), (byte) 3
+ )
+ );
+
+ fail("Skipping may go horribly wrong! (Known issue)");
+ kr = kc.search(sq);
+ assertEquals(kr.getQuery(), "spanContain(<base:p />, focus(3: spanContain({2: <base:s />}, {3: base:s:a})))");
+ assertEquals(12, kr.getTotalResults());
+ assertEquals("[a{2:bc{3:a}b}cabac]", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("[ab{2:c{3:a}bcab}ac]", kr.getMatch(1).getSnippetBrackets());
+ assertEquals("[ab{2:cabc{3:a}}bac]", kr.getMatch(2).getSnippetBrackets());
+ };
+
};
diff --git a/src/test/java/de/ids_mannheim/korap/model/TestMultiTerm.java b/src/test/java/de/ids_mannheim/korap/model/TestMultiTerm.java
index 8ee1bd4..715c1f0 100644
--- a/src/test/java/de/ids_mannheim/korap/model/TestMultiTerm.java
+++ b/src/test/java/de/ids_mannheim/korap/model/TestMultiTerm.java
@@ -2,6 +2,7 @@
import java.util.*;
import de.ids_mannheim.korap.model.MultiTerm;
+import de.ids_mannheim.korap.util.CorpusDataException;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
@@ -15,7 +16,7 @@
@RunWith(JUnit4.class)
public class TestMultiTerm {
@Test
- public void multiTermSimple () {
+ public void multiTermSimple () throws CorpusDataException {
MultiTerm mt = new MultiTerm("test");
assertEquals(mt.term, "test");
assertNull(mt.payload);
@@ -24,7 +25,7 @@
};
@Test
- public void multiTermPayload () {
+ public void multiTermPayload () throws CorpusDataException {
MultiTerm mt = new MultiTerm("test$5");
assertEquals("test", mt.term);
assertEquals(new BytesRef("5"), mt.payload);
@@ -33,7 +34,7 @@
};
@Test
- public void multiTermOffset () {
+ public void multiTermOffset () throws CorpusDataException {
MultiTerm mt = new MultiTerm("versuch#2-34");
assertEquals(mt.term, "versuch");
assertNull(mt.payload);
@@ -42,7 +43,7 @@
};
@Test
- public void multiTermOffsetPayload () {
+ public void multiTermOffsetPayload () throws CorpusDataException {
MultiTerm mt = new MultiTerm("example#6-42$hihi");
assertEquals(mt.term, "example");
assertEquals(new BytesRef("hihi"), mt.payload);
@@ -51,7 +52,7 @@
};
@Test
- public void multiTermString () {
+ public void multiTermString () throws CorpusDataException {
MultiTerm mt = new MultiTerm("example#6-42$hihi");
assertEquals("example#6-42$hihi", mt.toString());
mt.term = "spassmacher";
@@ -59,7 +60,7 @@
};
@Test
- public void multiTermStringPayloadType () {
+ public void multiTermStringPayloadType () throws CorpusDataException {
MultiTerm mt = new MultiTerm("example$<i>4000");
assertEquals("example$<?>[0,0,f,a0]", mt.toString());
@@ -68,7 +69,7 @@
};
@Test
- public void multiTermStringPayloadType2 () {
+ public void multiTermStringPayloadType2 () throws CorpusDataException {
MultiTerm mt = new MultiTerm();
mt.setTerm("beispiel");
mt.setStart(40);
@@ -80,19 +81,19 @@
};
@Test
- public void multiTermStringPayloadType3 () {
+ public void multiTermStringPayloadType3 () throws CorpusDataException {
MultiTerm mt = new MultiTerm("example$<b>120");
assertEquals("example$x", mt.toString());
};
@Test
- public void multiTermStringPayloadType4 () {
+ public void multiTermStringPayloadType4 () throws CorpusDataException {
MultiTerm mt = new MultiTerm("example$<i>420<b>120");
assertEquals("example$<?>[0,0,1,a4,78]", mt.toString());
};
@Test
- public void multiTermStringPayloadType5 () {
+ public void multiTermStringPayloadType5 () throws CorpusDataException {
MultiTerm mt = new MultiTerm("example$<i>4000");
assertEquals("example$<?>[0,0,f,a0]", mt.toString());
@@ -104,7 +105,7 @@
};
@Test
- public void multiTermStringFail () {
+ public void multiTermStringFail () throws CorpusDataException {
MultiTerm mt = new MultiTerm("example#56-66");
assertEquals(56, mt.getStart());
assertEquals(66,mt.getEnd());
@@ -113,9 +114,15 @@
assertEquals(56, mt.getStart());
assertEquals(66, mt.getEnd());
- mt = new MultiTerm("example#56$<i>a");
- assertEquals(mt.getPayload(), null);
- assertEquals(mt.getStart(), 0);
- assertEquals(mt.getEnd(), 0);
+ try {
+ mt = new MultiTerm("example#56$<i>a");
+ assertEquals(mt.getPayload(), null);
+ assertEquals(mt.getStart(), 0);
+ assertEquals(mt.getEnd(), 0);
+ }
+ catch (CorpusDataException cde) {
+ // Works fine!
+ assertTrue(true);
+ };
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/model/TestMultiTermToken.java b/src/test/java/de/ids_mannheim/korap/model/TestMultiTermToken.java
index 3e301ee..0b36001 100644
--- a/src/test/java/de/ids_mannheim/korap/model/TestMultiTermToken.java
+++ b/src/test/java/de/ids_mannheim/korap/model/TestMultiTermToken.java
@@ -2,6 +2,7 @@
import java.util.*;
import de.ids_mannheim.korap.model.MultiTermToken;
+import de.ids_mannheim.korap.util.CorpusDataException;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
@@ -18,7 +19,7 @@
public class TestMultiTermToken {
@Test
- public void multiTermTokenSimple () {
+ public void multiTermTokenSimple () throws CorpusDataException {
MultiTermToken mtt = new MultiTermToken("t:test", "a:abbruch");
assertEquals("[t:test|a:abbruch]", mtt.toString());
mtt.add("b:banane");
@@ -38,7 +39,7 @@
};
@Test
- public void multiTermTokenOffsets () {
+ public void multiTermTokenOffsets () throws CorpusDataException {
MultiTermToken mtt = new MultiTermToken("t:test#23-27");
assertEquals("[t:test#23-27]", mtt.toString());
mtt.add("b:baum#34-45");
diff --git a/src/test/java/de/ids_mannheim/korap/model/TestMultiTermTokenStream.java b/src/test/java/de/ids_mannheim/korap/model/TestMultiTermTokenStream.java
index f1e3597..3a1fcff 100644
--- a/src/test/java/de/ids_mannheim/korap/model/TestMultiTermTokenStream.java
+++ b/src/test/java/de/ids_mannheim/korap/model/TestMultiTermTokenStream.java
Binary files differ
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestFrameConstraint.java b/src/test/java/de/ids_mannheim/korap/query/TestFrameConstraint.java
index 019c949..4600248 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestFrameConstraint.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestFrameConstraint.java
@@ -1,7 +1,12 @@
package de.ids_mannheim.korap.query;
+import java.io.IOException;
+import java.util.Collection;
+
import de.ids_mannheim.korap.query.FrameConstraint;
import de.ids_mannheim.korap.util.QueryException;
+import org.apache.lucene.search.spans.Spans;
+
import static org.junit.Assert.*;
import org.junit.Test;
@@ -197,4 +202,53 @@
assertEquals(fc1.check("succeedsDirectly"), true);
assertEquals(fc1.check("succeeds"), true);
};
+
+ private class TestSpans extends Spans {
+ private int s, e;
+
+ @Override
+ public int doc () {
+ return 0;
+ };
+
+ @Override
+ public int start () {
+ return this.s;
+ };
+
+ @Override
+ public int end () {
+ return this.e;
+ };
+
+ @Override
+ public boolean skipTo (int target) {
+ return true;
+ };
+
+ @Override
+ public boolean next () {
+ return true;
+ };
+
+ public Collection<byte[]> getPayload() throws IOException {
+ return null;
+ }
+
+ @Override
+ public boolean isPayloadAvailable() throws IOException {
+ return false;
+ };
+
+ @Override
+ public String toString () {
+ return "";
+ };
+
+ @Override
+ public long cost () {
+ return 1;
+ };
+
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java
index e1378fe..8545881 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestSpanSequenceQueryJSON.java
@@ -26,229 +26,229 @@
@Test
public void queryJSONseqEmpty () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty.jsonld");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty.jsonld");
- // []
- assertTrue(sqwi.isEmpty());
+ // []
+ assertTrue(sqwi.isEmpty());
};
@Test
public void queryJSONseqEmptyEnd () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-last.jsonld");
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{1, 1}, right)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-last.jsonld");
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{1, 1}, right)");
};
@Test
public void queryJSONseqEmptyEndClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-last-class.jsonld");
- // der{3:[]}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{1, 1}, right, class:3)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-last-class.jsonld");
+ // der{3:[]}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{1, 1}, right, class:3)");
};
@Test
public void queryJSONseqEmptyEndRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-last-repetition.jsonld");
- // der[]{3,5}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{3, 5}, right)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-last-repetition.jsonld");
+ // der[]{3,5}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:s:der, []{3, 5}, right)");
};
@Test
public void queryJSONseqEmptyStart () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-first.jsonld");
- // [][tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{1, 1}, left)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-first.jsonld");
+ // [][tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{1, 1}, left)");
};
@Test
public void queryJSONseqEmptyStartClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-first-class.jsonld");
- // {2:[]}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:2)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-first-class.jsonld");
+ // {2:[]}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:2)");
};
@Test
public void queryJSONseqEmptyStartRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-first-repetition.jsonld");
- // []{2,7}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{2, 7}, left)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-first-repetition.jsonld");
+ // []{2,7}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, []{2, 7}, left)");
};
@Test
public void queryJSONseqEmptyStartRepetition2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-first-repetition-2.jsonld");
- // []{0,0}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-first-repetition-2.jsonld");
+ // []{0,0}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
};
@Test
public void queryJSONseqEmptyMiddle () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-middle.jsonld");
- // der[][tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{1, 1}, left))");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-middle.jsonld");
+ // der[][tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{1, 1}, left))");
};
@Test
public void queryJSONseqEmptyMiddleClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-middle-class.jsonld");
- // der{1:[]}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:1))");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-middle-class.jsonld");
+ // der{1:[]}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:1))");
};
@Test
public void queryJSONseqEmptyMiddleRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-middle-repetition.jsonld");
- // der[]{4,8}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{4, 8}, left))");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-middle-repetition.jsonld");
+ // der[]{4,8}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanNext(tokens:s:der, spanExpansion(tokens:tt/p:NN, []{4, 8}, left))");
};
@Test
public void queryJSONseqEmptySurround () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-surround.jsonld");
- // [][tt/p=NN][]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{1, 1}, right)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround.jsonld");
+ // [][tt/p=NN][]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{1, 1}, right)");
};
@Test
public void queryJSONseqEmptySurroundClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-class.jsonld");
- // [][tt/p=NN]{2:[]}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{1, 1}, right, class:2)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-class.jsonld");
+ // [][tt/p=NN]{2:[]}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{1, 1}, right, class:2)");
};
@Test
public void queryJSONseqEmptySurroundClass2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-class-2.jsonld");
- // {3:[]}[tt/p=NN]{2:[]}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:3), []{1, 1}, right, class:2)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-class-2.jsonld");
+ // {3:[]}[tt/p=NN]{2:[]}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left, class:3), []{1, 1}, right, class:2)");
};
@Test
public void queryJSONseqEmptySurroundRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-repetition.jsonld");
- // [][tt/p=NN][]{2,7}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{2, 7}, right)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-repetition.jsonld");
+ // [][tt/p=NN][]{2,7}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{1, 1}, left), []{2, 7}, right)");
};
@Test
public void queryJSONseqEmptySurroundRepetition2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-repetition-2.jsonld");
- // []{3,5}[tt/p=NN][]{2,7}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{3, 5}, left), []{2, 7}, right)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-repetition-2.jsonld");
+ // []{3,5}[tt/p=NN][]{2,7}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{3, 5}, left), []{2, 7}, right)");
};
@Test
public void queryJSONseqEmptySurroundRepetitionClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-repetition-class.jsonld");
- // {1:[]}{3,8}[tt/p=NN]{2:[]{2,7}}
- // Ist gleichbedeutend mit
- // {1:[]{3,8}}[tt/p=NN]{2:[]}{2,7}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{3, 8}, left, class:1), []{2, 7}, right, class:2)");
+ SpanQueryWrapper sqwi = jsonQueryFile("empty-surround-repetition-class.jsonld");
+ // {1:[]}{3,8}[tt/p=NN]{2:[]{2,7}}
+ // Ist gleichbedeutend mit
+ // {1:[]{3,8}}[tt/p=NN]{2:[]}{2,7}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(spanExpansion(tokens:tt/p:NN, []{3, 8}, left, class:1), []{2, 7}, right, class:2)");
};
@Test
public void queryJSONseqNegative () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative.jsonld");
- // [tt/p!=NN]
- assertTrue(sqwi.isNegative());
+ SpanQueryWrapper sqwi = jsonQueryFile("negative.jsonld");
+ // [tt/p!=NN]
+ assertTrue(sqwi.isNegative());
};
@Test
public void queryJSONseqNegativeStart () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-first.jsonld");
- // [tt/p!=NN][tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, left)");
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-first.jsonld");
+ // [tt/p!=NN][tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, left)");
};
@Test
public void queryJSONseqNegativeEnd () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-last.jsonld");
- // [tt/p=NN][tt/p!=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right)");
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last.jsonld");
+ // [tt/p=NN][tt/p!=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right)");
};
@Test
public void queryJSONseqNegativeStartRepetition () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-first-repetition.jsonld");
- // [tt/p!=NN]{4,5}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, left)");
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-first-repetition.jsonld");
+ // [tt/p!=NN]{4,5}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, left)");
};
@Test
public void queryJSONseqNegativeStartRepetition2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-first-repetition-2.jsonld");
- // [tt/p!=NN]{0,5}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{0, 5}, left)");
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-first-repetition-2.jsonld");
+ // [tt/p!=NN]{0,5}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{0, 5}, left)");
};
@Test
public void queryJSONseqNegativeStartRepetition3 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-first-repetition-3.jsonld");
- // [tt/p!=NN]{0,0}[tt/p=NN]
- assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-first-repetition-3.jsonld");
+ // [tt/p!=NN]{0,0}[tt/p=NN]
+ assertEquals(sqwi.toQuery().toString(), "tokens:tt/p:NN");
};
@Test
public void queryJSONseqNegativeEndClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-last-class.jsonld");
- // [tt/p=NN]{2:[tt/p!=NN]}
- SpanQuery sq = sqwi.toQuery();
- assertEquals(sq.toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right, class:2)");
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-class.jsonld");
+ // [tt/p=NN]{2:[tt/p!=NN]}
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{1, 1}, right, class:2)");
};
@Test
public void queryJSONseqNegativeEndRepetitionClass () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-last-class-repetition.jsonld");
- // [tt/p=NN]{2:[tt/p!=NN]{4,5}}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-class-repetition.jsonld");
+ // [tt/p=NN]{2:[tt/p!=NN]{4,5}}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
};
@Test
public void queryJSONseqNegativeEndRepetitionClass2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-last-class-repetition-2.jsonld");
- // [tt/p=NN]{2:[tt/p!=NN]}{4,5}
- assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-class-repetition-2.jsonld");
+ // [tt/p=NN]{2:[tt/p!=NN]}{4,5}
+ assertEquals(sqwi.toQuery().toString(), "spanExpansion(tokens:tt/p:NN, !tokens:tt/p:NN{4, 5}, right, class:2)");
};
@Test
public void queryJSONseqNegativelastConstraint () {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-last-constraint.jsonld");
- try {
- sqwi.toQuery().toString();
- fail("Should throw an exception");
- }
- catch (QueryException qe) {
- assertEquals("Distance constraints not supported with empty or negative operands", qe.getMessage());
- };
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-constraint.jsonld");
+ try {
+ sqwi.toQuery().toString();
+ fail("Should throw an exception");
+ }
+ catch (QueryException qe) {
+ assertEquals("Distance constraints not supported with empty or negative operands", qe.getMessage());
+ };
};
@Test
public void queryJSONseqNegativeEndSequence () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-last-sequence.jsonld");
- // [tt/p=NN]([tt/p!=DET][tt/p!=NN])
- assertEquals("spanExpansion(spanExpansion(tokens:tt/p:NN, !tokens:tt/p:DET{1, 1}, right), !tokens:tt/p:ADJ{1, 1}, right)", sqwi.toQuery().toString());
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-sequence.jsonld");
+ // [tt/p=NN]([tt/p!=DET][tt/p!=NN])
+ assertEquals("spanExpansion(spanExpansion(tokens:tt/p:NN, !tokens:tt/p:DET{1, 1}, right), !tokens:tt/p:ADJ{1, 1}, right)", sqwi.toQuery().toString());
};
@Test
public void queryJSONseqNegativeEndSequence2 () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("negative-last-sequence-2.jsonld");
- // [tt/p!=NN]([tt/p!=DET][tt/p=NN])
+ SpanQueryWrapper sqwi = jsonQueryFile("negative-last-sequence-2.jsonld");
+ // [tt/p!=NN]([tt/p!=DET][tt/p=NN])
- // spanNext(tokens:tt/p:NN,
- assertEquals("spanExpansion(spanExpansion(tokens:tt/p:ADJ, !tokens:tt/p:DET{1, 1}, left), !tokens:tt/p:NN{1, 1}, left)", sqwi.toQuery().toString());
+ // spanNext(tokens:tt/p:NN,
+ assertEquals("spanExpansion(spanExpansion(tokens:tt/p:ADJ, !tokens:tt/p:DET{1, 1}, left), !tokens:tt/p:NN{1, 1}, left)", sqwi.toQuery().toString());
};
@Test
public void queryJSONseqMultipleDistances () throws QueryException {
- SpanQueryWrapper sqwi = jsonQueryFile("multiple-distances.jsonld");
- // er []{,10} kann []{1,10} sagte
+ SpanQueryWrapper sqwi = jsonQueryFile("multiple-distances.jsonld");
+ // er []{,10} kann []{1,10} sagte
- assertEquals("spanDistance(tokens:s:er, spanDistance(tokens:s:kann, tokens:s:sagte, [(w[2:11], ordered, notExcluded)]), [(w[1:11], ordered, notExcluded)])", sqwi.toQuery().toString());
+ assertEquals("spanDistance(tokens:s:er, spanDistance(tokens:s:kann, tokens:s:sagte, [(w[2:11], ordered, notExcluded)]), [(w[1:11], ordered, notExcluded)])", sqwi.toQuery().toString());
};
// get query wrapper based on json file
public SpanQueryWrapper jsonQueryFile (String filename) {
- return getJSONQuery(getClass().getResource(path + filename).getFile());
+ return getJSONQuery(getClass().getResource(path + filename).getFile());
};
};