Removed bad serialization experiment
diff --git a/src/main/java/de/ids_mannheim/korap/KorapDocument.java b/src/main/java/de/ids_mannheim/korap/KorapDocument.java
index 33e3f3b..e24828a 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapDocument.java
@@ -40,7 +40,6 @@
// newly added
creationDate;
-
private String
// No longer supported
@@ -76,10 +75,9 @@
/**
* Get the publication date of the document
- * as a KorapDate object.
+ * as a {@link KorapDate} object.
*
- * @return A KorapDate object for chaining.
- * @see KorapDate
+ * @return A {@link KorapDate} object for chaining.
*/
@JsonIgnore
public KorapDate getPubDate () {
@@ -91,8 +89,7 @@
* Get the publication date of the document
* as a string.
*
- * @return A string containing the KorapDate.
- * @see KorapDate
+ * @return A string containing the {@link KorapDate}.
*/
@JsonProperty("pubDate")
public String getPubDateString () {
@@ -105,9 +102,9 @@
/**
* Set the publication date of the document.
*
- * @param date The date as a KorapDate compatible string representation.
- * @return A KorapDate object for chaining.
- * @see KorapDate#Constructor(String)
+ * @param date The date as a {@link KorapDate}
+ * compatible string representation.
+ * @return A {@link KorapDate} object for chaining.
*/
public KorapDate setPubDate (String date) {
this.pubDate = new KorapDate(date);
@@ -118,9 +115,8 @@
/**
* Set the publication date of the document.
*
- * @param date The date as a KorapDate object.
- * @return A KorapDate object for chaining.
- * @see KorapDate
+ * @param date The date as a {@link KorapDate} object.
+ * @return A {@link KorapDate} object for chaining.
*/
public KorapDate setPubDate (KorapDate date) {
return (this.pubDate = date);
@@ -129,10 +125,9 @@
/**
* Get the creation date of the document
- * as a KorapDate object.
+ * as a {@link KorapDate} object.
*
- * @return A KorapDate object for chaining.
- * @see KorapDate
+ * @return A {@link KorapDate} object for chaining.
*/
@JsonIgnore
public KorapDate getCreationDate () {
@@ -144,8 +139,7 @@
* Get the creation date of the document
* as a string.
*
- * @return A string containing the KorapDate.
- * @see KorapDate
+ * @return A string containing the {@link KorapDate}.
*/
@JsonProperty("creationDate")
public String getCreationDateString () {
@@ -158,9 +152,9 @@
/**
* Set the creation date of the document.
*
- * @param date The date as a KorapDate compatible string representation.
- * @return A KorapDate object for chaining.
- * @see KorapDate#Constructor(String)
+ * @param date The date as a {@link KorapDate}
+ * compatible string representation.
+ * @return A {@link KorapDate} object for chaining.
*/
public KorapDate setCreationDate (String date) {
this.creationDate = new KorapDate(date);
@@ -171,9 +165,8 @@
/**
* Set the creation date of the document.
*
- * @param date The date as a KorapDate object.
- * @return A KorapDate object for chaining.
- * @see KorapDate
+ * @param date The date as a {@link KorapDate} object.
+ * @return A {@link KorapDate} object for chaining.
*/
public KorapDate setCreationDate (KorapDate date) {
return (this.creationDate = date);
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index a529a73..9781476 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -1,7 +1,5 @@
package de.ids_mannheim.korap;
-// Todo: ADD WORD COUNT AS A METADATA FIELD!
-
// Java classes
import java.util.*;
import java.util.zip.GZIPInputStream;
@@ -39,18 +37,11 @@
import org.slf4j.LoggerFactory;
/*
-
+ TODO: Add word count as a meta data field!
TODO: Validate document import!
-
TODO: DON'T STORE THE TEXT IN THE TOKENS FIELD!
It has only to be lifted for match views!!!
-
TODO: Support layer for specific foundries (IMPORTANT)
- TODO: Implement timeout!!!
- - https://lucene.apache.org/core/2_9_4/api/all/org/apache/lucene/search/TimeLimitingCollector.html
- - https://lucene.apache.org/core/2_9_4/api/all/org/apache/lucene/search/TimeLimitingCollector.html
- - http://stackoverflow.com/questions/19557476/timing-out-a-query-in-solr
-
TODO: Use FieldCache!
TODO: Reuse the indexreader everywhere - it should be threadsafe!
@@ -72,10 +63,22 @@
*/
/**
- * KorapIndex implements a simple API for searching in and writing to a
- * Lucene index and equesting several information but the index's nature.
*
- * @author Nils Diewald
+ * KorapIndex implements a simple API for searching in and writing to a
+ * Lucene index and requesting several information about the index' nature.
+ * <br />
+ *
+ * <pre>
+ * KorapIndex ki = new KorapIndex(
+ * new MMapDirectory(new File("/myindex"))
+ * );
+ * </pre>
+ *
+ * Properties can be stored in a properies file called 'index.properties'.
+ * Relevant properties are <code>lucene.version</code> and
+ * <code>lucene.name</code>.
+ *
+ * @author diewald
*/
public class KorapIndex {
@@ -93,6 +96,7 @@
private IndexWriterConfig config;
private IndexSearcher searcher;
private boolean readerOpen = false;
+
// The commit counter is only there for
// counting unstaged changes per thread (for bulk insertions)
// It does not represent real unstaged documents.
@@ -102,9 +106,11 @@
private String version, name;
private byte[] pl = new byte[4];
- private static ByteBuffer bb = ByteBuffer.allocate(4),
- bbOffset = ByteBuffer.allocate(8),
- bbTerm = ByteBuffer.allocate(16);
+ private static ByteBuffer
+ bb = ByteBuffer.allocate(4),
+ bbOffset = ByteBuffer.allocate(8),
+ bbTerm = ByteBuffer.allocate(16);
+
// Logger
private final static Logger log = LoggerFactory.getLogger(KorapIndex.class);
@@ -112,124 +118,154 @@
public static final boolean DEBUG = false;
{
- Properties prop = new Properties();
- URL file = getClass().getClassLoader().getResource("index.properties");
+ Properties prop = new Properties();
+ URL file = getClass().getClassLoader().getResource("index.properties");
- if (file != null) {
- String f = file.getFile();
- try {
- InputStream fr = new FileInputStream(f);
- prop.load(fr);
- this.version = prop.getProperty("lucene.version");
- this.name = prop.getProperty("lucene.name");
- }
- catch (FileNotFoundException e) {
- log.warn(e.getLocalizedMessage());
- };
- };
+ // File found
+ if (file != null) {
+ String f = file.getFile();
+ // Read property file
+ try {
+ InputStream fr = new FileInputStream(f);
+ prop.load(fr);
+ this.version = prop.getProperty("lucene.version");
+ this.name = prop.getProperty("lucene.name");
+ }
+
+ // Unable to read property file
+ catch (FileNotFoundException e) {
+ log.warn(e.getLocalizedMessage());
+ };
+ };
};
- // Create a new in-memory index
+
+ /**
+ * Constructs a new KorapIndex in-memory.
+ *
+ * @throws IOException
+ */
public KorapIndex () throws IOException {
this((Directory) new RAMDirectory());
};
- // Connect to index in file system
+
+ /**
+ * Constructs a new KorapIndex bound to a persistant index.
+ *
+ * @param index Path to an {@link FSDirectory} index
+ * @throws IOException
+ */
public KorapIndex (String index) throws IOException {
- this(FSDirectory.open(new File( index )));
+ this(FSDirectory.open(new File( index )));
};
- // Connect to index in file system
+ /**
+ * Constructs a new KorapIndex bound to a persistant index.
+ *
+ * @param directory A {@link Directory} pointing to an index
+ * @throws IOException
+ */
public KorapIndex (Directory directory) throws IOException {
- this.directory = directory;
+ this.directory = directory;
- // Base analyzer for searching and indexing
- // StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
-
- // TODO: Why is this here?
- Map<String,Analyzer> analyzerPerField = new HashMap<String,Analyzer>();
- analyzerPerField.put("textClass", new WhitespaceAnalyzer(Version.LUCENE_CURRENT));
- analyzerPerField.put("foundries", new WhitespaceAnalyzer(Version.LUCENE_CURRENT));
- PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
+ // TODO: Shouldn't be here
+ // Add analyzers
+ Map<String,Analyzer> analyzerPerField = new HashMap<String,Analyzer>();
+ analyzerPerField.put("textClass", new WhitespaceAnalyzer(Version.LUCENE_CURRENT));
+ analyzerPerField.put("foundries", new WhitespaceAnalyzer(Version.LUCENE_CURRENT));
+ PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
new StandardAnalyzer(Version.LUCENE_CURRENT),
analyzerPerField
);
- // Create configuration with base analyzer
- this.config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);
+ // Create configuration with base analyzer
+ this.config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);
};
- // Get system version
+ /**
+ * Get the version number of the index.
+ *
+ * @return A string containing the version number.
+ */
public String getVersion () {
- return this.version;
+ return this.version;
};
- // Get system name
+
+ /**
+ * Get the name of the index.
+ *
+ * @return A string containing the name of the index.
+ */
public String getName () {
- return this.name;
+ return this.name;
};
- // Close connection to index
+ /**
+ * Close the connections of the index reader and the writer.
+ * @throws IOException
+ */
public void close () throws IOException {
- this.closeReader();
- this.closeWriter();
+ this.closeReader();
+ this.closeWriter();
};
// Get index reader object
public IndexReader reader () {
- if (!readerOpen)
- this.openReader();
- // Todo: Maybe use DirectoryReader.openIfChanged(DirectoryReader)
-
- return this.reader;
+ if (!readerOpen)
+ this.openReader();
+ // Todo: Maybe use DirectoryReader.openIfChanged(DirectoryReader)
+
+ return this.reader;
};
-
+
// Get index searcher object
public IndexSearcher searcher () {
- if (this.searcher == null) {
- this.searcher = new IndexSearcher(this.reader());
- };
- return this.searcher;
+ if (this.searcher == null) {
+ this.searcher = new IndexSearcher(this.reader());
+ };
+ return this.searcher;
};
// Close index writer
public void closeWriter () throws IOException {
- if (this.writer != null)
- this.writer.close();
+ if (this.writer != null)
+ this.writer.close();
};
// Open index reader
public void openReader () {
- try {
+ try {
- // open reader
- this.reader = DirectoryReader.open(this.directory);
- readerOpen = true;
- if (this.searcher != null)
- this.searcher = new IndexSearcher(reader);
- }
+ // open reader
+ this.reader = DirectoryReader.open(this.directory);
+ readerOpen = true;
+ if (this.searcher != null)
+ this.searcher = new IndexSearcher(reader);
+ }
- // Failed to open reader
- catch (IOException e) {
- //e.printStackTrace();
- log.warn( e.getLocalizedMessage() );
- };
+ // Failed to open reader
+ catch (IOException e) {
+ //e.printStackTrace();
+ log.warn( e.getLocalizedMessage() );
+ };
};
// Close index reader
public void closeReader () throws IOException {
- if (readerOpen) {
- this.reader.close();
- readerOpen = false;
- };
+ if (readerOpen) {
+ this.reader.close();
+ readerOpen = false;
+ };
};
/*
@@ -237,374 +273,410 @@
* as they were added while the API changed slowly.
*/
-
// Add document to index as FieldDocument
public FieldDocument addDoc (FieldDocument fd) {
+ try {
+ // Open writer if not already opened
+ if (this.writer == null)
+ this.writer = new IndexWriter(this.directory, this.config);
- try {
-
- // Open writer if not already opened
- if (this.writer == null)
- this.writer = new IndexWriter(this.directory, this.config);
+ // Add document to writer
+ this.writer.addDocument( fd.doc );
+ if (++commitCounter > autoCommit) {
+ this.commit();
+ commitCounter = 0;
+ };
+ }
- // Add document to writer
- this.writer.addDocument( fd.doc );
- if (++commitCounter > autoCommit) {
- this.commit();
- commitCounter = 0;
- };
- }
-
- // Failed to add document
- catch (IOException e) {
- log.error("File json not found");
- };
- return fd;
+ // Failed to add document
+ catch (IOException e) {
+ log.error("File json not found");
+ };
+ return fd;
};
// Add document to index as JSON object with a unique ID
public FieldDocument addDoc (int uid, String json) throws IOException {
- FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
- fd.setUID(uid);
- return this.addDoc(fd);
+ FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
+ fd.setUID(uid);
+ return this.addDoc(fd);
};
// Add document to index as JSON object
public FieldDocument addDoc (String json) throws IOException {
- FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
- return this.addDoc(fd);
+ FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
+ return this.addDoc(fd);
};
// Add document to index as JSON file
public FieldDocument addDoc (File json) {
- try {
- FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
- return this.addDoc(fd);
- }
- catch (IOException e) {
- log.error("File json not parseable");
- };
- return (FieldDocument) null;
+ try {
+ FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
+ return this.addDoc(fd);
+ }
+ catch (IOException e) {
+ log.error("File json not parseable");
+ };
+ return (FieldDocument) null;
};
// Add document to index as JSON file
public FieldDocument addDocFile(String json) {
- return this.addDocFile(json, false);
+ return this.addDocFile(json, false);
};
+
private FieldDocument _addDocfromFile (String json, boolean gzip) {
- try {
- if (gzip) {
+ try {
+ if (gzip) {
- // Create json field document
- FieldDocument fd = this.mapper.readValue(
- new GZIPInputStream(new FileInputStream(json)),
- FieldDocument.class
- );
+ // Create json field document
+ FieldDocument fd = this.mapper.readValue(
+ new GZIPInputStream(new FileInputStream(json)),
+ FieldDocument.class
+ );
+ return fd;
+ };
+ return this.mapper.readValue(json, FieldDocument.class);
+ }
- return fd;
- };
- return this.mapper.readValue(json, FieldDocument.class);
- }
-
- // Fail to add json object
- catch (IOException e) {
- log.error("File json not found");
- };
- return (FieldDocument) null;
+ // Fail to add json object
+ catch (IOException e) {
+ log.error("File json not found");
+ };
+ return (FieldDocument) null;
};
+
// Add document to index as JSON file (possibly gzipped)
public FieldDocument addDocFile(String json, boolean gzip) {
- return this.addDoc(this._addDocfromFile(json, gzip));
+ return this.addDoc(this._addDocfromFile(json, gzip));
};
+
// Add document to index as JSON file (possibly gzipped)
public FieldDocument addDocFile(int uid, String json, boolean gzip) {
- FieldDocument fd = this._addDocfromFile(json, gzip);
- if (fd != null) {
- fd.setUID(uid);
- return this.addDoc(fd);
- };
- return fd;
+ FieldDocument fd = this._addDocfromFile(json, gzip);
+ if (fd != null) {
+ fd.setUID(uid);
+ return this.addDoc(fd);
+ };
+ return fd;
};
+
// Commit changes to the index
public void commit (boolean force) throws IOException {
-
- // There is something to commit
- if (commitCounter > 0 || !force) {
- this.commit();
- };
+ // There is something to commit
+ if (commitCounter > 0 || !force)
+ this.commit();
};
+
+ // Commit changes to the index
public void commit () throws IOException {
+ // Open writer if not already opened
+ if (this.writer == null)
+ this.writer = new IndexWriter(this.directory, this.config);
- // Open writer if not already opened
- if (this.writer == null)
- this.writer = new IndexWriter(this.directory, this.config);
-
- // Force commit
- this.writer.commit();
- commitCounter = 0;
- this.closeReader();
+ // Force commit
+ this.writer.commit();
+ commitCounter = 0;
+ this.closeReader();
};
+
// Get autoCommit valiue
public int autoCommit () {
- return this.autoCommit;
+ return this.autoCommit;
};
// Set autoCommit value
public void autoCommit (int number) {
- this.autoCommit = number;
+ this.autoCommit = number;
};
// Search for meta information in term vectors
private long numberOfAtomic (Bits docvec,
- AtomicReaderContext atomic,
- Term term) throws IOException {
+ AtomicReaderContext atomic,
+ Term term) throws IOException {
+ // This reimplements docsAndPositionsEnum with payloads
+ final Terms terms = atomic.reader().fields().terms(term.field());
- // This reimplements docsAndPositionsEnum with payloads
- final Terms terms = atomic.reader().fields().terms(term.field());
+ // No terms were found
+ if (terms != null) {
+ // Todo: Maybe reuse a termsEnum!
+ final TermsEnum termsEnum = terms.iterator(null);
- // No terms were found
- if (terms != null) {
- // Todo: Maybe reuse a termsEnum!
- final TermsEnum termsEnum = terms.iterator(null);
+ // Set the position in the iterator to the term that is seeked
+ if (termsEnum.seekExact(term.bytes())) {
- // Set the position in the iterator to the term that is seeked
- if (termsEnum.seekExact(term.bytes())) {
+ // Start an iterator to fetch all payloads of the term
+ DocsAndPositionsEnum docs = termsEnum.docsAndPositions(
+ docvec,
+ null,
+ DocsAndPositionsEnum.FLAG_PAYLOADS
+ );
- // Start an iterator to fetch all payloads of the term
- DocsAndPositionsEnum docs = termsEnum.docsAndPositions(
- docvec,
- null,
- DocsAndPositionsEnum.FLAG_PAYLOADS
- );
+ // Iterator is empty
+ // TODO: Maybe this is an error ...
+ if (docs.docID() == DocsAndPositionsEnum.NO_MORE_DOCS) {
+ return 0;
+ };
- // Iterator is empty
- // TODO: Maybe this is an error ...
- if (docs.docID() == DocsAndPositionsEnum.NO_MORE_DOCS) {
- return 0;
- };
+ // Init some variables for data copying
+ long occurrences = 0;
+ BytesRef payload;
- // Init some variables for data copying
- long occurrences = 0;
- BytesRef payload;
+ // Init nextDoc()
+ while (docs.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
- // Init nextDoc()
- while (docs.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
+ // Initialize (go to first term)
+ docs.nextPosition();
- // Initialize (go to first term)
- docs.nextPosition();
+ // Copy payload with the offset of the BytesRef
+ payload = docs.getPayload();
+ System.arraycopy(payload.bytes, payload.offset, pl, 0, 4);
- // Copy payload with the offset of the BytesRef
- payload = docs.getPayload();
- System.arraycopy(payload.bytes, payload.offset, pl, 0, 4);
+ // Add payload as integer
+ occurrences += bb.wrap(pl).getInt();
+ };
- // Add payload as integer
- occurrences += bb.wrap(pl).getInt();
- };
+ // Return the sum of all occurrences
+ return occurrences;
+ };
+ };
- // Return the sum of all occurrences
- return occurrences;
- };
- };
-
- // Nothing found
- return 0;
+ // Nothing found
+ return 0;
};
/**
* Search for the number of occurrences of different types,
- * e.g. "documents", "sentences" etc.
+ * e.g. <i>documents</i>, <i>sentences</i> etc.
*
- * @param field The field containing the textual data and the annotations.
- * @param type The type of meta information, e.g. "documents" or "sentences".
+ * @param collection The scope of the numbering by means of a
+ * {@link KorapCollection}
+ * @param field The field containing the textual data and the
+ * annotations as a string.
+ * @param type The type of meta information,
+ * e.g. <i>documents</i> or <i>sentences</i> as a string.
+ * @return The number of the occurrences.
*/
public long numberOf (KorapCollection collection,
- String field,
- String type) {
- // Short cut for documents
- // This will be only "texts" in the future
- if (type.equals("documents") || type.equals("base/texts")) {
- if (collection.getCount() <= 0) {
- try {
- return (long) this.reader().numDocs();
- }
- catch (Exception e) {
- log.warn(e.getLocalizedMessage());
- };
- return (long) 0;
- };
+ String field,
+ String type) {
+ // Short cut for documents
+ // This will be only "texts" in the future
+ if (type.equals("documents") || type.equals("base/texts")) {
+ if (collection.getCount() <= 0) {
+ try {
+ return (long) this.reader().numDocs();
+ }
+ catch (Exception e) {
+ log.warn(e.getLocalizedMessage());
+ };
+ return (long) 0;
+ };
- long docCount = 0;
- int i = 1;
- try {
- for (AtomicReaderContext atomic : this.reader().leaves()) {
- docCount += collection.bits(atomic).cardinality();
- i++;
- };
- }
- catch (IOException e) {
- log.warn(e.getLocalizedMessage());
- };
- return docCount;
- };
+ long docCount = 0;
+ int i = 1;
+ try {
+ for (AtomicReaderContext atomic : this.reader().leaves()) {
+ docCount += collection.bits(atomic).cardinality();
+ i++;
+ };
+ }
+ catch (IOException e) {
+ log.warn(e.getLocalizedMessage());
+ };
+ return docCount;
+ };
- // Create search term
- // This may be prefixed by foundries
- Term term = new Term(field, "-:" + type);
+ // Create search term
+ // This may be prefixed by foundries
+ Term term = new Term(field, "-:" + type);
- long occurrences = 0;
- try {
- // Iterate over all atomic readers and collect occurrences
- for (AtomicReaderContext atomic : this.reader().leaves()) {
- occurrences += this.numberOfAtomic(
- collection.bits(atomic),
- atomic,
- term
- );
- };
- }
+ long occurrences = 0;
+ try {
+ // Iterate over all atomic readers and collect occurrences
+ for (AtomicReaderContext atomic : this.reader().leaves()) {
+ occurrences += this.numberOfAtomic(
+ collection.bits(atomic),
+ atomic,
+ term
+ );
+ };
+ }
- // Something went wrong
- catch (Exception e) {
- log.warn( e.getLocalizedMessage() );
- };
+ // Something went wrong
+ catch (Exception e) {
+ log.warn( e.getLocalizedMessage() );
+ };
- return occurrences;
+ return occurrences;
};
+
+
public long numberOf (String field, String type) {
- return this.numberOf(new KorapCollection(this), field, type);
+ return this.numberOf(new KorapCollection(this), field, type);
};
/**
* Search for the number of occurrences of different types,
- * e.g. "documents", "sentences" etc., in the base foundry.
+ * e.g. <i>documents<i>, <i>sentences</i> etc., in the
+ * <i>base</i> foundry.
*
- * @param type The type of meta information, e.g. "documents" or "sentences".
- *
- * @see #numberOf(String, String)
+ * @param type The type of meta information,
+ * e.g. <i>documents</i> or <i>sentences</i> as a string.
+ * @return The number of the occurrences.
*/
public long numberOf (String type) {
- return this.numberOf("tokens", type);
+ return this.numberOf("tokens", type);
};
/**
* Search for the number of occurrences of different types,
- * e.g. "documents", "sentences" etc., in a specific set of documents.
+ * e.g. <i>documents</i>, <i>sentences</i> etc.
*
- * @param docvec The document vector for filtering the search space.
- * @param field The field containing the textual data and the annotations.
- * @param type The type of meta information, e.g. "documents" or "sentences".
- *
- * @see #numberOf(String, String)
+ * @param docvec The scope of the numbering by means of a
+ * {@link Bits} vector
+ * @param field The field containing the textual data and the
+ * annotations as a string.
+ * @param type The type of meta information,
+ * e.g. <i>documents</i> or <i>sentences</i> as a string.
+ * @return The number of the occurrences.
+ * @throws IOException
*/
public long numberOf (Bits docvec, String field, String type) throws IOException {
-
- // Shortcut for documents
- if (type.equals("documents")) {
- OpenBitSet os = (OpenBitSet) docvec;
- return os.cardinality();
- };
+ // Shortcut for documents
+ if (type.equals("documents")) {
+ OpenBitSet os = (OpenBitSet) docvec;
+ return os.cardinality();
+ };
- Term term = new Term(field, "-:" + type);
+ Term term = new Term(field, "-:" + type);
- int occurrences = 0;
- try {
- for (AtomicReaderContext atomic : this.reader().leaves()) {
- occurrences += this.numberOfAtomic(docvec, atomic, term);
- };
- }
- catch (IOException e) {
- log.warn( e.getLocalizedMessage() );
- };
-
- return occurrences;
+ int occurrences = 0;
+ try {
+ for (AtomicReaderContext atomic : this.reader().leaves()) {
+ occurrences += this.numberOfAtomic(docvec, atomic, term);
+ };
+ }
+ catch (IOException e) {
+ log.warn( e.getLocalizedMessage() );
+ };
+
+ return occurrences;
};
+
@Deprecated
public long countDocuments () throws IOException {
- log.warn("countDocuments() is DEPRECATED in favor of numberOf(\"documents\")!");
- return this.numberOf("documents");
+ log.warn("countDocuments() is DEPRECATED in favor of numberOf(\"documents\")!");
+ return this.numberOf("documents");
};
@Deprecated
public long countAllTokens () throws IOException {
- log.warn("countAllTokens() is DEPRECATED in favor of numberOf(\"tokens\")!");
- return this.numberOf("tokens");
+ log.warn("countAllTokens() is DEPRECATED in favor of numberOf(\"tokens\")!");
+ return this.numberOf("tokens");
};
public String getMatchIDWithContext (String id) {
- /*
- No includeHighlights
- */
-
- return "";
+ /* No includeHighlights */
+ return "";
};
public KorapMatch getMatch (String id) throws QueryException {
- return this.getMatchInfo(
+ return this.getMatchInfo(
id, // MatchID
- "tokens", // field
- false, // info
- (ArrayList) null, // foundry
- (ArrayList) null, // layer
- false, // includeSpans
- true, // includeHighlights
- false // extendToSentence
- );
+ "tokens", // field
+ false, // info
+ (ArrayList) null, // foundry
+ (ArrayList) null, // layer
+ false, // includeSpans
+ true, // includeHighlights
+ false // extendToSentence
+ );
};
+
// There is a good chance that some of these methods will die ...
public KorapMatch getMatchInfo (String id,
- String field,
- String foundry,
- String layer,
- boolean includeSpans,
- boolean includeHighlights) throws QueryException {
- return this.getMatchInfo(id, field, true, foundry, layer, includeSpans, includeHighlights, false);
+ String field,
+ String foundry,
+ String layer,
+ boolean includeSpans,
+ boolean includeHighlights) throws QueryException {
+ return this.getMatchInfo(
+ id,
+ field,
+ true,
+ foundry,
+ layer,
+ includeSpans,
+ includeHighlights,
+ false
+ );
+ };
+
+
+ public KorapMatch getMatchInfo (String id,
+ String field,
+ String foundry,
+ String layer,
+ boolean includeSpans,
+ boolean includeHighlights,
+ boolean extendToSentence) throws QueryException {
+ return this.getMatchInfo(
+ id,
+ field,
+ true,
+ foundry,
+ layer,
+ includeSpans,
+ includeHighlights,
+ extendToSentence
+ );
};
public KorapMatch getMatchInfo (String id,
- String field,
- String foundry,
- String layer,
- boolean includeSpans,
- boolean includeHighlights,
- boolean extendToSentence) throws QueryException {
- return this.getMatchInfo(id, field, true, foundry, layer, includeSpans, includeHighlights, extendToSentence);
- };
-
- public KorapMatch getMatchInfo (String id,
- String field,
- boolean info,
- String foundry,
- String layer,
- boolean includeSpans,
- boolean includeHighlights,
- boolean extendToSentence) throws QueryException {
+ String field,
+ boolean info,
+ String foundry,
+ String layer,
+ boolean includeSpans,
+ boolean includeHighlights,
+ boolean extendToSentence) throws QueryException {
ArrayList<String> foundryList = new ArrayList<>(1);
- if (foundry != null)
- foundryList.add(foundry);
- ArrayList<String> layerList = new ArrayList<>(1);
- if (layer != null)
- layerList.add(layer);
- return this.getMatchInfo(id, field, info, foundryList, layerList, includeSpans, includeHighlights, extendToSentence);
+ if (foundry != null)
+ foundryList.add(foundry);
+ ArrayList<String> layerList = new ArrayList<>(1);
+ if (layer != null)
+ layerList.add(layer);
+ return this.getMatchInfo(
+ id,
+ field,
+ info,
+ foundryList,
+ layerList,
+ includeSpans,
+ includeHighlights,
+ extendToSentence
+ );
};
@@ -616,287 +688,288 @@
per position in the match.
*/
public KorapMatch getMatchInfo (String idString,
- String field,
- boolean info,
- List<String> foundry,
- List<String> layer,
- boolean includeSpans,
- boolean includeHighlights,
- boolean extendToSentence) throws QueryException {
+ String field,
+ boolean info,
+ List<String> foundry,
+ List<String> layer,
+ boolean includeSpans,
+ boolean includeHighlights,
+ boolean extendToSentence) throws QueryException {
- KorapMatch match = new KorapMatch(idString, includeHighlights);
+ KorapMatch match = new KorapMatch(idString, includeHighlights);
- if (this.getVersion() != null)
- match.setVersion(this.getVersion());
+ if (this.getVersion() != null)
+ match.setVersion(this.getVersion());
- if (this.getName() != null)
- match.setName(this.getName());
+ if (this.getName() != null)
+ match.setName(this.getName());
- if (match.getStartPos() == -1)
- return match;
+ if (match.getStartPos() == -1)
+ return match;
- // Create a filter based on the corpusID and the docID
- BooleanQuery bool = new BooleanQuery();
- bool.add(new TermQuery(new Term("ID", match.getDocID())), BooleanClause.Occur.MUST);
- bool.add(new TermQuery(new Term("corpusID", match.getCorpusID())), BooleanClause.Occur.MUST);
- Filter filter = (Filter) new QueryWrapperFilter(bool);
-
- CompiledAutomaton fst = null;
-
- if (info) {
- /* Create an automaton for prefixed terms of interest.
- * You can define the necessary foundry, the necessary layer,
- * in case the foundry is given, and if span annotations
- * are of interest.
- */
- StringBuilder regex = new StringBuilder();
- // TODO: Make these static
- Pattern harmlessFoundry = Pattern.compile("^[-a-zA-Z0-9_]+$");
- Pattern harmlessLayer = Pattern.compile("^[-a-zA-Z0-9_:]+$");
- Iterator<String> iter;
- int i = 0;
+ // Create a filter based on the corpusID and the docID
+ BooleanQuery bool = new BooleanQuery();
+ bool.add(new TermQuery(new Term("ID", match.getDocID())), BooleanClause.Occur.MUST);
+ bool.add(new TermQuery(new Term("corpusID", match.getCorpusID())), BooleanClause.Occur.MUST);
+ Filter filter = (Filter) new QueryWrapperFilter(bool);
+
+ CompiledAutomaton fst = null;
+
+ if (info) {
+ /* Create an automaton for prefixed terms of interest.
+ * You can define the necessary foundry, the necessary layer,
+ * in case the foundry is given, and if span annotations
+ * are of interest.
+ */
+ StringBuilder regex = new StringBuilder();
+ // TODO: Make these static
+ Pattern harmlessFoundry = Pattern.compile("^[-a-zA-Z0-9_]+$");
+ Pattern harmlessLayer = Pattern.compile("^[-a-zA-Z0-9_:]+$");
+ Iterator<String> iter;
+ int i = 0;
- if (includeSpans)
- regex.append("((\">\"|\"<\"\">\")\":\")?");
+ if (includeSpans)
+ regex.append("((\">\"|\"<\"\">\")\":\")?");
+
+ // There is a foundry given
+ if (foundry != null && foundry.size() > 0) {
- // There is a foundry given
- if (foundry != null && foundry.size() > 0) {
+ // Filter out bad foundries
+ for (i = foundry.size() - 1; i >= 0 ; i--) {
+ if (!harmlessFoundry.matcher(foundry.get(i)).matches()) {
+ throw new QueryException("Invalid foundry requested: '" + foundry.get(i) + "'");
+ // foundry.remove(i);
+ };
+ };
- // Filter out bad foundries
- for (i = foundry.size() - 1; i >= 0 ; i--) {
- if (!harmlessFoundry.matcher(foundry.get(i)).matches()) {
- throw new QueryException("Invalid foundry requested: '" + foundry.get(i) + "'");
- // foundry.remove(i);
- };
- };
+ // Build regex for multiple foundries
+ if (foundry.size() > 0) {
+ regex.append("(");
+ iter = foundry.iterator();
+ while (iter.hasNext()) {
+ regex.append(iter.next()).append("|");
+ };
+ regex.replace(regex.length() - 1, regex.length(), ")");
+ regex.append("\"/\"");
- // Build regex for multiple foundries
- if (foundry.size() > 0) {
- regex.append("(");
- iter = foundry.iterator();
- while (iter.hasNext()) {
- regex.append(iter.next()).append("|");
- };
- regex.replace(regex.length() - 1, regex.length(), ")");
- regex.append("\"/\"");
+ // There is a filter given
+ if (layer != null && layer.size() > 0) {
- // There is a filter given
- if (layer != null && layer.size() > 0) {
+ // Filter out bad layers
+ for (i = layer.size() - 1; i >= 0 ; i--) {
+ if (!harmlessLayer.matcher(layer.get(i)).matches()) {
+ throw new QueryException("Invalid layer requested: " + layer.get(i));
+ // layer.remove(i);
+ };
+ };
- // Filter out bad layers
- for (i = layer.size() - 1; i >= 0 ; i--) {
- if (!harmlessLayer.matcher(layer.get(i)).matches()) {
- throw new QueryException("Invalid layer requested: " + layer.get(i));
- // layer.remove(i);
- };
- };
+ // Build regex for multiple layers
+ if (layer.size() > 0) {
+ regex.append("(");
+ iter = layer.iterator();
+ while (iter.hasNext()) {
+ regex.append(iter.next()).append("|");
+ };
+ regex.replace(regex.length() - 1, regex.length(), ")");
+ regex.append("\":\"");
+ };
+ };
+ };
+ }
+ else if (includeSpans) {
+ // No foundries - but spans
+ regex.append("([^-is]|[-is][^:])");
+ }
+ else {
+ // No foundries - no spans
+ regex.append("([^-is<>]|[-is>][^:]|<[^:>])");
+ };
+ regex.append("(.){1,}|_[0-9]+");
+
+ if (DEBUG)
+ log.trace("The final regexString is {}", regex.toString());
+ RegExp regexObj = new RegExp(regex.toString(), RegExp.COMPLEMENT);
+ fst = new CompiledAutomaton(regexObj.toAutomaton());
+ if (DEBUG)
+ log.trace("The final regexObj is {}", regexObj.toString());
+ };
- // Build regex for multiple layers
- if (layer.size() > 0) {
- regex.append("(");
- iter = layer.iterator();
- while (iter.hasNext()) {
- regex.append(iter.next()).append("|");
- };
- regex.replace(regex.length() - 1, regex.length(), ")");
- regex.append("\":\"");
- };
- };
- };
- }
- else if (includeSpans) {
- // No foundries - but spans
- regex.append("([^-is]|[-is][^:])");
- }
- else {
- // No foundries - no spans
- regex.append("([^-is<>]|[-is>][^:]|<[^:>])");
- };
- regex.append("(.){1,}|_[0-9]+");
+ try {
+ // Iterate over all atomic indices and find the matching document
+ for (AtomicReaderContext atomic : this.reader().leaves()) {
- if (DEBUG)
- log.trace("The final regexString is {}", regex.toString());
- RegExp regexObj = new RegExp(regex.toString(), RegExp.COMPLEMENT);
- fst = new CompiledAutomaton(regexObj.toAutomaton());
- if (DEBUG)
- log.trace("The final regexObj is {}", regexObj.toString());
- };
+ // Retrieve the single document of interest
+ DocIdSet filterSet = filter.getDocIdSet(
+ atomic,
+ atomic.reader().getLiveDocs()
+ );
+ // Create a bitset for the correct document
+ Bits bitset = filterSet.bits();
- try {
- // Iterate over all atomic indices and find the matching document
- for (AtomicReaderContext atomic : this.reader().leaves()) {
+ DocIdSetIterator filterIterator = filterSet.iterator();
- // Retrieve the single document of interest
- DocIdSet filterSet = filter.getDocIdSet(
- atomic,
- atomic.reader().getLiveDocs()
- );
+ // No document found
+ if (filterIterator == null)
+ continue;
- // Create a bitset for the correct document
- Bits bitset = filterSet.bits();
+ // Go to the matching doc - and remember its ID
+ int localDocID = filterIterator.nextDoc();
- DocIdSetIterator filterIterator = filterSet.iterator();
+ if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+ continue;
- // No document found
- if (filterIterator == null)
- continue;
+ // We've found the correct document! Hurray!
+ if (DEBUG)
+ log.trace("We've found a matching document");
- // Go to the matching doc - and remember its ID
- int localDocID = filterIterator.nextDoc();
+ HashSet<String> fields = (HashSet<String>)
+ new KorapSearch().getFields().clone();
+ fields.add(field);
- if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
- continue;
+ // Get terms from the document
+ Terms docTerms = atomic.reader().getTermVector(localDocID, field);
- // We've found the correct document! Hurray!
- if (DEBUG)
- log.trace("We've found a matching document");
+ // Load the necessary fields of the document
+ Document doc = atomic.reader().document(localDocID, fields);
- HashSet<String> fields = (HashSet<String>)
- new KorapSearch().getFields().clone();
- fields.add(field);
+ // Put some more information to the match
+ PositionsToOffset pto = new PositionsToOffset(atomic, field);
+ match.setPositionsToOffset(pto);
+ match.setLocalDocID(localDocID);
+ match.populateDocument(doc, field, fields);
+ if (DEBUG)
+ log.trace("The document has the id '{}'", match.getDocID());
- // Get terms from the document
- Terms docTerms = atomic.reader().getTermVector(localDocID, field);
+ SearchContext context = match.getContext();
- // Load the necessary fields of the document
- Document doc = atomic.reader().document(localDocID, fields);
+ // Search for minimal surrounding sentences
+ if (extendToSentence) {
+ int [] spanContext = match.expandContextToSpan("s");
+ match.setStartPos(spanContext[0]);
+ match.setEndPos(spanContext[1]);
+ match.startMore = false;
+ match.endMore = false;
+ }
+ else {
+ if (DEBUG)
+ log.trace("Don't expand context");
+ };
+
+ context.left.setToken(true).setLength(0);
+ context.right.setToken(true).setLength(0);
- // Put some more information to the match
- PositionsToOffset pto = new PositionsToOffset(atomic, field);
- match.setPositionsToOffset(pto);
- match.setLocalDocID(localDocID);
- match.populateDocument(doc, field, fields);
- if (DEBUG)
- log.trace("The document has the id '{}'", match.getDocID());
+ if (!info)
+ break;
- SearchContext context = match.getContext();
+ // Limit the terms to all the terms of interest
+ TermsEnum termsEnum = docTerms.intersect(fst, null);
- // Search for minimal surrounding sentences
- if (extendToSentence) {
- int [] spanContext = match.expandContextToSpan("s");
- match.setStartPos(spanContext[0]);
- match.setEndPos(spanContext[1]);
- match.startMore = false;
- match.endMore = false;
- }
- else {
- if (DEBUG)
- log.trace("Don't expand context");
- };
-
- context.left.setToken(true).setLength(0);
- context.right.setToken(true).setLength(0);
+ DocsAndPositionsEnum docs = null;
- if (!info)
- break;
+ // List of terms to populate
+ SpanInfo termList = new SpanInfo(pto, localDocID);
+
+ // Iterate over all terms in the document
+ while (termsEnum.next() != null) {
+
+ // Get the positions and payloads of the term in the document
+ // The bitvector may look different (don't know why)
+ // and so the local ID may differ.
+ // That's why the requesting bitset is null.
+ docs = termsEnum.docsAndPositions(
+ null,
+ docs,
+ DocsAndPositionsEnum.FLAG_PAYLOADS
+ );
- // Limit the terms to all the terms of interest
- TermsEnum termsEnum = docTerms.intersect(fst, null);
+ // Init document iterator
+ docs.nextDoc();
- DocsAndPositionsEnum docs = null;
+ // Should never happen ... but hell.
+ if (docs.docID() == DocIdSetIterator.NO_MORE_DOCS)
+ continue;
- // List of terms to populate
- SpanInfo termList = new SpanInfo(pto, localDocID);
+ // How often does this term occur in the document?
+ int termOccurrences = docs.freq();
+
+ // String representation of the term
+ String termString = termsEnum.term().utf8ToString();
- // Iterate over all terms in the document
- while (termsEnum.next() != null) {
+ // Iterate over all occurrences
+ for (int i = 0; i < termOccurrences; i++) {
- // Get the positions and payloads of the term in the document
- // The bitvector may look different (don't know why)
- // and so the local ID may differ.
- // That's why the requesting bitset is null.
- docs = termsEnum.docsAndPositions(
- null,
- docs,
- DocsAndPositionsEnum.FLAG_PAYLOADS
- );
+ // Init positions and get the current
+ int pos = docs.nextPosition();
- // Init document iterator
- docs.nextDoc();
+ // Check, if the position of the term is in the area of interest
+ if (pos >= match.getStartPos() && pos < match.getEndPos()) {
- // Should never happen ... but hell.
- if (docs.docID() == DocIdSetIterator.NO_MORE_DOCS)
- continue;
+ if (DEBUG)
+ log.trace(
+ ">> {}: {}-{}-{}",
+ termString,
+ docs.freq(),
+ pos,
+ docs.getPayload()
+ );
- // How often does this term occur in the document?
- int termOccurrences = docs.freq();
+ BytesRef payload = docs.getPayload();
- // String representation of the term
- String termString = termsEnum.term().utf8ToString();
+ // Copy the payload
+ bbTerm.clear();
+ if (payload != null) {
+ bbTerm.put(
+ payload.bytes,
+ payload.offset,
+ payload.length
+ );
+ };
+ TermInfo ti = new TermInfo(termString, pos, bbTerm).analyze();
+ if (ti.getEndPos() < match.getEndPos()) {
+ if (DEBUG)
+ log.trace("Add {}", ti.toString());
+ termList.add(ti);
+ };
+ };
+ };
+ };
- // Iterate over all occurrences
- for (int i = 0; i < termOccurrences; i++) {
+ // Add annotations based on the retrieved infos
+ for (TermInfo t : termList.getTerms()) {
+ if (DEBUG)
+ log.trace(
+ "Add term {}/{}:{} to {}({})-{}({})",
+ t.getFoundry(),
+ t.getLayer(),
+ t.getValue(),
+ t.getStartChar(),
+ t.getStartPos(),
+ t.getEndChar(),
+ t.getEndPos()
+ );
- // Init positions and get the current
- int pos = docs.nextPosition();
+ if (t.getType() == "term" || t.getType() == "span")
+ match.addAnnotation(t.getStartPos(), t.getEndPos(), t.getAnnotation());
+ else if (t.getType() == "relSrc")
+ match.addRelation(t.getStartPos(), t.getEndPos(), t.getAnnotation());
+ };
+
+ break;
+ };
+ }
+ catch (IOException e) {
+ log.warn(e.getLocalizedMessage());
+ match.setError(e.getLocalizedMessage());
+ };
- // Check, if the position of the term is in the area of interest
- if (pos >= match.getStartPos() && pos < match.getEndPos()) {
-
- if (DEBUG)
- log.trace(
- ">> {}: {}-{}-{}",
- termString,
- docs.freq(),
- pos,
- docs.getPayload()
- );
-
- BytesRef payload = docs.getPayload();
-
- // Copy the payload
- bbTerm.clear();
- if (payload != null) {
- bbTerm.put(
- payload.bytes,
- payload.offset,
- payload.length
- );
- };
- TermInfo ti = new TermInfo(termString, pos, bbTerm).analyze();
- if (ti.getEndPos() < match.getEndPos()) {
- if (DEBUG)
- log.trace("Add {}", ti.toString());
- termList.add(ti);
- };
- };
- };
- };
-
- // Add annotations based on the retrieved infos
- for (TermInfo t : termList.getTerms()) {
- if (DEBUG)
- log.trace("Add term {}/{}:{} to {}({})-{}({})",
- t.getFoundry(),
- t.getLayer(),
- t.getValue(),
- t.getStartChar(),
- t.getStartPos(),
- t.getEndChar(),
- t.getEndPos());
-
- if (t.getType() == "term" || t.getType() == "span")
- match.addAnnotation(t.getStartPos(), t.getEndPos(), t.getAnnotation());
- else if (t.getType() == "relSrc")
- match.addRelation(t.getStartPos(), t.getEndPos(), t.getAnnotation());
- };
-
- break;
- };
- }
- catch (IOException e) {
- log.warn(e.getLocalizedMessage());
- match.setError(e.getLocalizedMessage());
- };
-
- return match;
+ return match;
};
@Deprecated
public HashMap getTermRelation (String field) throws Exception {
- return this.getTermRelation(new KorapCollection(this), field);
+ return this.getTermRelation(new KorapCollection(this), field);
};
@@ -905,90 +978,86 @@
*/
@Deprecated
public HashMap getTermRelation (KorapCollection kc, String field) throws Exception {
- HashMap<String,Long> map = new HashMap<>(100);
- long docNumber = 0, checkNumber = 0;
+ HashMap<String,Long> map = new HashMap<>(100);
+ long docNumber = 0, checkNumber = 0;
+
+ try {
+ if (kc.getCount() <= 0) {
+ checkNumber = (long) this.reader().numDocs();
+ };
- try {
- if (kc.getCount() <= 0) {
- checkNumber = (long) this.reader().numDocs();
- };
+ for (AtomicReaderContext atomic : this.reader().leaves()) {
+ HashMap<String,FixedBitSet> termVector = new HashMap<>(20);
+
+ FixedBitSet docvec = kc.bits(atomic);
+ if (docvec != null) {
+ docNumber += docvec.cardinality();
+ };
- for (AtomicReaderContext atomic : this.reader().leaves()) {
+ Terms terms = atomic.reader().fields().terms(field);
- HashMap<String,FixedBitSet> termVector = new HashMap<>(20);
-
- FixedBitSet docvec = kc.bits(atomic);
- if (docvec != null) {
- docNumber += docvec.cardinality();
- };
-
- Terms terms = atomic.reader().fields().terms(field);
-
- if (terms == null) {
- continue;
- };
+ if (terms == null) {
+ continue;
+ };
- int docLength = atomic.reader().maxDoc();
- FixedBitSet bitset = new FixedBitSet(docLength);
+ int docLength = atomic.reader().maxDoc();
+ FixedBitSet bitset = new FixedBitSet(docLength);
- // Iterate over all tokens in this field
- TermsEnum termsEnum = terms.iterator(null);
+ // Iterate over all tokens in this field
+ TermsEnum termsEnum = terms.iterator(null);
- while (termsEnum.next() != null) {
+ while (termsEnum.next() != null) {
+
+ String termString = termsEnum.term().utf8ToString();
- String termString = termsEnum.term().utf8ToString();
-
- bitset.clear(0,docLength);
+ bitset.clear(0,docLength);
- // Get frequency
- bitset.or((DocIdSetIterator) termsEnum.docs((Bits) docvec, null));
+ // Get frequency
+ bitset.or((DocIdSetIterator) termsEnum.docs((Bits) docvec, null));
+
+ long value = 0;
+ if (map.containsKey(termString))
+ value = map.get(termString);
- long value = 0;
- if (map.containsKey(termString))
- value = map.get(termString);
+ map.put(termString, value + bitset.cardinality());
+
+ termVector.put(termString, bitset.clone());
+ };
+
+ int keySize = termVector.size();
+ String[] keys = termVector.keySet().toArray(new String[keySize]);
+ java.util.Arrays.sort(keys);
- map.put(termString, value + bitset.cardinality());
+ if (keySize > maxTermRelations) {
+ throw new Exception(
+ "termRelations are limited to " + maxTermRelations + " sets" +
+ " (requested were at least " + keySize + " sets)"
+ );
+ };
+
+ for (int i = 0; i < keySize; i++) {
+ for (int j = i+1; j < keySize; j++) {
+ FixedBitSet comby = termVector.get(keys[i]).clone();
+ comby.and(termVector.get(keys[j]));
- termVector.put(termString, bitset.clone());
- };
-
- int keySize = termVector.size();
- String[] keys = termVector.keySet().toArray(new String[keySize]);
- java.util.Arrays.sort(keys);
-
-
- if (keySize > maxTermRelations) {
- throw new Exception(
- "termRelations are limited to " + maxTermRelations + " sets" +
- " (requested were at least " + keySize + " sets)"
- );
- };
-
- for (int i = 0; i < keySize; i++) {
- for (int j = i+1; j < keySize; j++) {
- FixedBitSet comby = termVector.get(keys[i]).clone();
- comby.and(termVector.get(keys[j]));
-
- StringBuilder sb = new StringBuilder();
- sb.append("#__").append(keys[i]).append(":###:").append(keys[j]);
- String combString = sb.toString();
-
- long cap = (long) comby.cardinality();
- if (map.containsKey(combString)) {
- cap += map.get(combString);
- };
- map.put(combString, cap);
- };
- };
- };
-
- map.put("-docs", checkNumber != 0 ? checkNumber : docNumber);
-
- }
- catch (IOException e) {
- log.warn(e.getMessage());
- };
- return map;
+ StringBuilder sb = new StringBuilder();
+ sb.append("#__").append(keys[i]).append(":###:").append(keys[j]);
+ String combString = sb.toString();
+
+ long cap = (long) comby.cardinality();
+ if (map.containsKey(combString)) {
+ cap += map.get(combString);
+ };
+ map.put(combString, cap);
+ };
+ };
+ };
+ map.put("-docs", checkNumber != 0 ? checkNumber : docNumber);
+ }
+ catch (IOException e) {
+ log.warn(e.getMessage());
+ };
+ return map;
};
@@ -996,51 +1065,60 @@
* Search in the index.
*/
public KorapResult search (SpanQuery query) {
- return this.search(new KorapSearch(query));
+ return this.search(new KorapSearch(query));
};
+
public KorapResult search (SpanQuery query, short count) {
- return this.search(
- new KorapSearch(query).setCount(count)
+ return this.search(
+ new KorapSearch(query).setCount(count)
);
};
- // This should probably be deprecated
+
@Deprecated
public KorapResult search (SpanQuery query,
- int startIndex,
- short count,
- boolean leftTokenContext,
- short leftContext,
- boolean rightTokenContext,
- short rightContext) {
+ int startIndex,
+ short count,
+ boolean leftTokenContext,
+ short leftContext,
+ boolean rightTokenContext,
+ short rightContext) {
- KorapSearch ks = new KorapSearch(query);
- ks.setStartIndex(startIndex).setCount(count);
- ks.setContext(new SearchContext(leftTokenContext, leftContext, rightTokenContext, rightContext));
- return this.search(ks);
+ KorapSearch ks = new KorapSearch(query);
+ ks.setStartIndex(startIndex).setCount(count);
+ ks.setContext(
+ new SearchContext(
+ leftTokenContext,
+ leftContext,
+ rightTokenContext,
+ rightContext
+ )
+ );
+ return this.search(ks);
};
+
@Deprecated
public KorapResult search (KorapCollection collection,
- SpanQuery query,
- int startIndex,
- short count,
- boolean leftTokenContext,
- short leftContext,
- boolean rightTokenContext,
- short rightContext) {
- KorapSearch ks = new KorapSearch(query);
- ks.setContext(
+ SpanQuery query,
+ int startIndex,
+ short count,
+ boolean leftTokenContext,
+ short leftContext,
+ boolean rightTokenContext,
+ short rightContext) {
+ KorapSearch ks = new KorapSearch(query);
+ ks.setContext(
new SearchContext(
leftTokenContext,
- leftContext,
- rightTokenContext,
- rightContext
+ leftContext,
+ rightTokenContext,
+ rightContext
)
);
- ks.setCollection(collection);
- return this.search(ks);
+ ks.setCollection(collection);
+ return this.search(ks);
};
@@ -1048,351 +1126,348 @@
* Search the endpoint.
*/
public KorapResult search (KorapSearch ks) {
- if (DEBUG)
- log.trace("Start search");
+ if (DEBUG)
+ log.trace("Start search");
- this.termContexts = new HashMap<Term, TermContext>();
+ this.termContexts = new HashMap<Term, TermContext>();
- KorapCollection collection = ks.getCollection();
- collection.setIndex(this);
+ KorapCollection collection = ks.getCollection();
+ collection.setIndex(this);
- // Get the spanquery from the KorapSearch object
- SpanQuery query = ks.getQuery();
+ // Get the spanquery from the KorapSearch object
+ SpanQuery query = ks.getQuery();
- // Get the field of textual data and annotations ("tokens")
- String field = query.getField();
+ // Get the field of textual data and annotations ("tokens")
+ String field = query.getField();
- // Todo: Make kr subclassing ks - so ks has a method for a new KorapResult!
- KorapResult kr = new KorapResult(
- query.toString(),
- ks.getStartIndex(),
- ks.getCount(),
- ks.getContext()
- );
+ // Todo: Make kr subclassing ks - so ks has a method for a new KorapResult!
+ KorapResult kr = new KorapResult(
+ query.toString(),
+ ks.getStartIndex(),
+ ks.getCount(),
+ ks.getContext()
+ );
- // Set version info to result
- if (this.getVersion() != null)
- kr.setVersion(this.getVersion());
+ // Set version info to result
+ if (this.getVersion() != null)
+ kr.setVersion(this.getVersion());
- // The following fields should be lifted for matches
- HashSet<String> fields = (HashSet<String>) ks.getFields().clone();
- fields.add(field);
+ // The following fields should be lifted for matches
+ HashSet<String> fields = (HashSet<String>) ks.getFields().clone();
+ fields.add(field);
- // Some initializations ...
- int i = 0,
- startIndex = kr.getStartIndex(),
- count = kr.getItemsPerPage(),
- hits = kr.getItemsPerPage() + startIndex,
- limit = ks.getLimit(),
- itemsPerResourceCounter = 0;
- boolean cutoff = ks.doCutOff();
- short itemsPerResource = ks.getItemsPerResource();
+ // Some initializations ...
+ int i = 0,
+ startIndex = kr.getStartIndex(),
+ count = kr.getItemsPerPage(),
+ hits = kr.getItemsPerPage() + startIndex,
+ limit = ks.getLimit(),
+ itemsPerResourceCounter = 0;
+ boolean cutoff = ks.doCutOff();
+ short itemsPerResource = ks.getItemsPerResource();
- // Check if there is work to do at all
- if (limit > 0) {
- if (hits > limit)
- hits = limit;
+ // Check if there is work to do at all
+ if (limit > 0) {
+ if (hits > limit)
+ hits = limit;
- // Nah - nothing to do! Let's go shopping!
- if (limit < startIndex)
- return kr;
- };
+ // Nah - nothing to do! Let's go shopping!
+ if (limit < startIndex)
+ return kr;
+ };
- // Collect matches from atomic readers
- ArrayList<KorapMatch> atomicMatches = new ArrayList<KorapMatch>(kr.getItemsPerPage());
+ // Collect matches from atomic readers
+ ArrayList<KorapMatch> atomicMatches = new ArrayList<KorapMatch>(kr.getItemsPerPage());
+
+ // Start time out thread
+ TimeOutThread tthread = new TimeOutThread();
+ tthread.start();
+ long timeout = ks.getTimeOut();
- // Start time out thread
- TimeOutThread tthread = new TimeOutThread();
- tthread.start();
- long timeout = ks.getTimeOut();
+ // See: http://www.ibm.com/developerworks/java/library/j-benchmark1/index.html
+ long t1 = System.nanoTime();
- // See: http://www.ibm.com/developerworks/java/library/j-benchmark1/index.html
- long t1 = System.nanoTime();
-
- try {
-
- // Rewrite query (for regex and wildcard queries)
- // Revise!
- // Based on core/src/java/org/apache/lucene/search/IndexSearcher.java
- // and highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
- for ( Query rewrittenQuery = query.rewrite(this.reader());
- !rewrittenQuery.equals(query);
- rewrittenQuery = query.rewrite(this.reader())) {
- query = (SpanQuery) rewrittenQuery;
- };
+ try {
+ // Rewrite query (for regex and wildcard queries)
+ // Revise!
+ // Based on core/src/java/org/apache/lucene/search/IndexSearcher.java
+ // and highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
+ for ( Query rewrittenQuery = query.rewrite(this.reader());
+ !rewrittenQuery.equals(query);
+ rewrittenQuery = query.rewrite(this.reader())) {
+ query = (SpanQuery) rewrittenQuery;
+ };
- // Todo: run this in a separated thread
- for (AtomicReaderContext atomic : this.reader().leaves()) {
+ // Todo: run this in a separated thread
+ for (AtomicReaderContext atomic : this.reader().leaves()) {
- int oldLocalDocID = -1;
+ int oldLocalDocID = -1;
- /*
- * Todo: There may be a way to know early if the bitset is emty
- * by using OpenBitSet - but this may not be as fast as I think.
- */
- Bits bitset = collection.bits(atomic);
+ /*
+ * Todo: There may be a way to know early if the bitset is emty
+ * by using OpenBitSet - but this may not be as fast as I think.
+ */
+ Bits bitset = collection.bits(atomic);
- PositionsToOffset pto = new PositionsToOffset(atomic, field);
+ PositionsToOffset pto = new PositionsToOffset(atomic, field);
- // Spans spans = NearSpansOrdered();
- Spans spans = query.getSpans(atomic, (Bits) bitset, termContexts);
+ // Spans spans = NearSpansOrdered();
+ Spans spans = query.getSpans(atomic, (Bits) bitset, termContexts);
- IndexReader lreader = atomic.reader();
+ IndexReader lreader = atomic.reader();
+
+ // TODO: Get document information from Cache! Fieldcache?
+ for (; i < hits;i++) {
- // TODO: Get document information from Cache! Fieldcache?
- for (; i < hits;i++) {
+ if (DEBUG)
+ log.trace("Match Nr {}/{}", i, count);
+
+ // There are no more spans to find
+ if (!spans.next())
+ break;
- if (DEBUG)
- log.trace("Match Nr {}/{}", i, count);
+ // Timeout!
+ if (tthread.getTime() > timeout) {
+ kr.setTimeExceeded(true);
+ break;
+ };
+
+ int localDocID = spans.doc();
- // There are no more spans to find
- if (!spans.next())
- break;
+ // Count hits per resource
+ if (itemsPerResource > 0) {
- // Timeout!
- if (tthread.getTime() > timeout) {
- kr.setTimeExceeded(true);
- break;
- };
+ // IDS are identical
+ if (localDocID == oldLocalDocID || oldLocalDocID == -1) {
+ if (itemsPerResourceCounter++ >= itemsPerResource) {
+ if (spans.skipTo(localDocID + 1) != true) {
+ break;
+ }
+ else {
+ itemsPerResourceCounter = 1;
+ localDocID = spans.doc();
+ };
+ };
+ }
+
+ // Reset counter
+ else
+ itemsPerResourceCounter = 0;
+
+ oldLocalDocID = localDocID;
+ };
- int localDocID = spans.doc();
+ // The next matches are not yet part of the result
+ if (startIndex > i)
+ continue;
- // Count hits per resource
- if (itemsPerResource > 0) {
+ int docID = atomic.docBase + localDocID;
+
+ // Do not load all of this, in case the doc is the same!
+ Document doc = lreader.document(localDocID, fields);
+ KorapMatch match = kr.addMatch(
+ pto,
+ localDocID,
+ spans.start(),
+ spans.end()
+ );
- // IDS are identical
- if (localDocID == oldLocalDocID || oldLocalDocID == -1) {
- if (itemsPerResourceCounter++ >= itemsPerResource) {
- if (spans.skipTo(localDocID + 1) != true) {
- break;
- }
- else {
- itemsPerResourceCounter = 1;
- localDocID = spans.doc();
- };
- };
- }
+ if (spans.isPayloadAvailable())
+ match.addPayload((List<byte[]>) spans.getPayload());
- // Reset counter
- else
- itemsPerResourceCounter = 0;
-
- oldLocalDocID = localDocID;
- };
-
- // The next matches are not yet part of the result
- if (startIndex > i)
- continue;
-
- int docID = atomic.docBase + localDocID;
-
- // Do not load all of this, in case the doc is the same!
- Document doc = lreader.document(localDocID, fields);
- KorapMatch match = kr.addMatch(
- pto,
- localDocID,
- spans.start(),
- spans.end()
- ); // new KorapMatch();
-
- if (spans.isPayloadAvailable())
- match.addPayload((List<byte[]>) spans.getPayload());
-
- match.internalDocID = docID;
- match.populateDocument(doc, field, fields);
+ match.internalDocID = docID;
+ match.populateDocument(doc, field, fields);
- if (DEBUG) {
- if (match.getDocID() != null)
- log.trace("I've got a match in {} of {}",
- match.getDocID(), count);
- else
- log.trace("I've got a match in {} of {}",
- match.getUID(), count);
- };
+ if (DEBUG) {
+ if (match.getDocID() != null)
+ log.trace("I've got a match in {} of {}",
+ match.getDocID(), count);
+ else
+ log.trace("I've got a match in {} of {}",
+ match.getUID(), count);
+ };
+
+ atomicMatches.add(match);
+ };
- atomicMatches.add(match);
- };
+ // Can be disabled TEMPORARILY
+ while (!cutoff && spans.next()) {
+ if (limit > 0 && i >= limit)
+ break;
+
+ // Timeout!
+ if (tthread.getTime() > timeout) {
+ kr.setTimeExceeded(true);
+ break;
+ };
- // Can be disabled TEMPORARILY
- while (!cutoff && spans.next()) {
- if (limit > 0 && i >= limit)
- break;
+ // Count hits per resource
+ if (itemsPerResource > 0) {
+ int localDocID = spans.doc();
- // Timeout!
- if (tthread.getTime() > timeout) {
- kr.setTimeExceeded(true);
- break;
- };
-
- // Count hits per resource
- if (itemsPerResource > 0) {
- int localDocID = spans.doc();
-
- if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
- break;
+ if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+ break;
- // IDS are identical
- if (localDocID == oldLocalDocID || oldLocalDocID == -1) {
- if (localDocID == -1)
- break;
+ // IDS are identical
+ if (localDocID == oldLocalDocID || oldLocalDocID == -1) {
+ if (localDocID == -1)
+ break;
- if (itemsPerResourceCounter++ >= itemsPerResource) {
- if (spans.skipTo(localDocID + 1) != true) {
- break;
- };
- itemsPerResourceCounter = 1;
- localDocID = spans.doc();
- // continue;
- };
- }
+ if (itemsPerResourceCounter++ >= itemsPerResource) {
+ if (spans.skipTo(localDocID + 1) != true) {
+ break;
+ };
+ itemsPerResourceCounter = 1;
+ localDocID = spans.doc();
+ };
+ }
- // Reset counter
- else
- itemsPerResourceCounter = 0;
+ // Reset counter
+ else
+ itemsPerResourceCounter = 0;
- oldLocalDocID = localDocID;
- };
- i++;
- };
- atomicMatches.clear();
- };
+ oldLocalDocID = localDocID;
+ };
+ i++;
+ };
+ atomicMatches.clear();
+ };
- if (itemsPerResource > 0)
- kr.setItemsPerResource(itemsPerResource);
+ if (itemsPerResource > 0)
+ kr.setItemsPerResource(itemsPerResource);
- kr.setTotalResults(cutoff ? (long) -1 : (long) i);
- }
- catch (IOException e) {
- kr.addError(
- 600,
- "Unable to read index",
- e.getLocalizedMessage()
- );
- log.warn( e.getLocalizedMessage() );
- };
+ kr.setTotalResults(cutoff ? (long) -1 : (long) i);
+ }
+ catch (IOException e) {
+ kr.addError(
+ 600,
+ "Unable to read index",
+ e.getLocalizedMessage()
+ );
+ log.warn( e.getLocalizedMessage() );
+ };
- // Stop timer thread
- tthread.stopTimer();
+ // Stop timer thread
+ tthread.stopTimer();
- // Calculate time
- kr.setBenchmark(t1, System.nanoTime());
+ // Calculate time
+ kr.setBenchmark(t1, System.nanoTime());
- return kr;
+ return kr;
};
// Collect matches
public MatchCollector collect (KorapSearch ks, MatchCollector mc) {
- if (DEBUG)
- log.trace("Start collecting");
+ if (DEBUG)
+ log.trace("Start collecting");
- KorapCollection collection = ks.getCollection();
- collection.setIndex(this);
+ KorapCollection collection = ks.getCollection();
+ collection.setIndex(this);
- // Init term context
- this.termContexts = new HashMap<Term, TermContext>();
+ // Init term context
+ this.termContexts = new HashMap<Term, TermContext>();
- // Get span query
- SpanQuery query = ks.getQuery();
+ // Get span query
+ SpanQuery query = ks.getQuery();
- // Get the field of textual data and annotations
- String field = query.getField();
+ // Get the field of textual data and annotations
+ String field = query.getField();
- // TODO: Get document information from Cache!
- // See: http://www.ibm.com/developerworks/java/library/j-benchmark1/index.html
- long t1 = System.nanoTime();
+ // TODO: Get document information from Cache!
+ // See: http://www.ibm.com/developerworks/java/library/j-benchmark1/index.html
+ long t1 = System.nanoTime();
- // Only load UIDs
- HashSet<String> fields = new HashSet<>(1);
- fields.add("UID");
+ // Only load UIDs
+ HashSet<String> fields = new HashSet<>(1);
+ fields.add("UID");
- // List<KorapMatch> atomicMatches = new ArrayList<KorapMatch>(10);
+ // List<KorapMatch> atomicMatches = new ArrayList<KorapMatch>(10);
+ try {
- try {
-
- // Rewrite query (for regex and wildcard queries)
- for (Query rewrittenQuery = query.rewrite(this.reader());
+ // Rewrite query (for regex and wildcard queries)
+ for (Query rewrittenQuery = query.rewrite(this.reader());
rewrittenQuery != (Query) query;
rewrittenQuery = query.rewrite(this.reader())) {
- query = (SpanQuery) rewrittenQuery;
- };
+ query = (SpanQuery) rewrittenQuery;
+ };
- int matchcount = 0;
- String uniqueDocIDString;;
- int uniqueDocID = -1;
+ int matchcount = 0;
+ String uniqueDocIDString;;
+ int uniqueDocID = -1;
- // start thread:
- for (AtomicReaderContext atomic : this.reader().leaves()) {
+ // start thread:
+ for (AtomicReaderContext atomic : this.reader().leaves()) {
- int previousDocID = -1;
- int oldLocalDocID = -1;
+ int previousDocID = -1;
+ int oldLocalDocID = -1;
- // Use OpenBitSet;
- Bits bitset = collection.bits(atomic);
+ // Use OpenBitSet;
+ Bits bitset = collection.bits(atomic);
- // PositionsToOffset pto = new PositionsToOffset(atomic, field);
+ // PositionsToOffset pto = new PositionsToOffset(atomic, field);
+
+ Spans spans = query.getSpans(atomic, (Bits) bitset, termContexts);
- Spans spans = query.getSpans(atomic, (Bits) bitset, termContexts);
+ IndexReader lreader = atomic.reader();
- IndexReader lreader = atomic.reader();
+ while (spans.next()) {
+ int localDocID = spans.doc();
- while (spans.next()) {
- int localDocID = spans.doc();
+ // New match
+ // MatchIdentifier possibly needs more
+ /*
+ KorapMatch match = new KorapMatch();
+ match.setStartPos(spans.start());
+ match.setEndPos(spans.end());
+
+ // Add payload information to match
+ if (spans.isPayloadAvailable())
+ match.addPayload(spans.getPayload());
+ */
- // New match
- // MatchIdentifier possibly needs more
- /*
- KorapMatch match = new KorapMatch();
- match.setStartPos(spans.start());
- match.setEndPos(spans.end());
+ if (previousDocID != localDocID) {
+ if (matchcount > 0) {
+ mc.add(uniqueDocID, matchcount);
+ matchcount = 0;
+ };
- // Add payload information to match
- if (spans.isPayloadAvailable())
- match.addPayload(spans.getPayload());
- */
+ // Read document id from index
+ uniqueDocIDString =
+ lreader.document(localDocID, fields).get("UID");
- if (previousDocID != localDocID) {
- if (matchcount > 0) {
- mc.add(uniqueDocID, matchcount);
- matchcount = 0;
- };
+ if (uniqueDocIDString != null)
+ uniqueDocID = Integer.parseInt(uniqueDocIDString);
+
+ previousDocID = localDocID;
+ }
+ else {
+ matchcount++;
+ };
+ };
- // Read document id from index
- uniqueDocIDString =
- lreader.document(localDocID, fields).get("UID");
+ // Add count to collector
+ if (matchcount > 0) {
+ mc.add(uniqueDocID, matchcount);
+ matchcount = 0;
+ };
+ };
+ // end thread
- if (uniqueDocIDString != null)
- uniqueDocID = Integer.parseInt(uniqueDocIDString);
-
- previousDocID = localDocID;
- }
- else {
- matchcount++;
- };
- };
-
- // Add count to collector
- if (matchcount > 0) {
- mc.add(uniqueDocID, matchcount);
- matchcount = 0;
- };
- };
- // end thread
-
- // Benchmark the collector
- mc.setBenchmark(t1, System.nanoTime());
- }
- catch (IOException e) {
- mc.addError(
- 600,
- "Unable to read index",
- e.getLocalizedMessage()
+ // Benchmark the collector
+ mc.setBenchmark(t1, System.nanoTime());
+ }
+ catch (IOException e) {
+ mc.addError(
+ 600,
+ "Unable to read index",
+ e.getLocalizedMessage()
);
- log.warn(e.getLocalizedMessage());
- };
+ log.warn(e.getLocalizedMessage());
+ };
- mc.close();
- return mc;
+ mc.close();
+ return mc;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapNode.java b/src/main/java/de/ids_mannheim/korap/KorapNode.java
index 47f7009..d1beac0 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapNode.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapNode.java
@@ -21,13 +21,13 @@
/**
* Standalone REST-Service for the Lucene Search Backend.
*
- * @author Nils Diewald
+ * @author diewald
*/
public class KorapNode {
// Base URI the Grizzly HTTP server will listen on
public static String BASE_URI = "http://localhost:8080/";
-
+
// Logger
private final static Logger log = LoggerFactory.getLogger(KorapNode.class);
@@ -41,7 +41,7 @@
private static String dbClass = "org.sqlite.JDBC";
private static String dbURL = "jdbc:sqlite:";
-
+
/*
* Todo: Add shutdown hook,
* Then also close cdps.close();
@@ -55,50 +55,51 @@
*/
public static HttpServer startServer() {
- // Load configuration
- try {
- InputStream file = new FileInputStream(
- KorapNode.class.getClassLoader().getResource("server.properties").getFile()
+ // Load configuration
+ try {
+ InputStream file = new FileInputStream(
+ KorapNode.class.getClassLoader()
+ .getResource("server.properties")
+ .getFile()
);
- Properties prop = new Properties();
- prop.load(file);
+ Properties prop = new Properties();
+ prop.load(file);
- // Node properties
- path = prop.getProperty("lucene.indexDir", path);
- name = prop.getProperty("lucene.node.name", name);
- BASE_URI = prop.getProperty("lucene.node.baseURI", BASE_URI);
+ // Node properties
+ path = prop.getProperty("lucene.indexDir", path);
+ name = prop.getProperty("lucene.node.name", name);
+ BASE_URI = prop.getProperty("lucene.node.baseURI", BASE_URI);
- // Database properties
- dbUser = prop.getProperty("lucene.db.user", dbUser);
- dbPwd = prop.getProperty("lucene.db.pwd", dbPwd);
- dbClass = prop.getProperty("lucene.db.class", dbClass);
- dbURL = prop.getProperty("lucene.db.jdbcURL", dbURL);
+ // Database properties
+ dbUser = prop.getProperty("lucene.db.user", dbUser);
+ dbPwd = prop.getProperty("lucene.db.pwd", dbPwd);
+ dbClass = prop.getProperty("lucene.db.class", dbClass);
+ dbURL = prop.getProperty("lucene.db.jdbcURL", dbURL);
- }
- catch (IOException e) {
- log.error(e.getLocalizedMessage());
- };
+ }
+ catch (IOException e) {
+ log.error(e.getLocalizedMessage());
+ };
// create a resource config that scans for JAX-RS resources and providers
// in de.ids_mannheim.korap.server package
final ResourceConfig rc =
- new ResourceConfig().packages("de.ids_mannheim.korap.server");
+ new ResourceConfig().packages("de.ids_mannheim.korap.server");
// create and start a new instance of grizzly http server
// exposing the Jersey application at BASE_URI
return GrizzlyHttpServerFactory.createHttpServer(URI.create(BASE_URI), rc);
};
-
public static HttpServer startServer(String nodeName, String indexPath) {
// create a resource config that scans for JAX-RS resources and providers
// in de.ids_mannheim.korap.server package
final ResourceConfig rc =
- new ResourceConfig().packages("de.ids_mannheim.korap.server");
+ new ResourceConfig().packages("de.ids_mannheim.korap.server");
- name = nodeName;
- path = indexPath;
+ name = nodeName;
+ path = indexPath;
// create and start a new instance of grizzly http server
// exposing the Jersey application at BASE_URI
@@ -112,107 +113,106 @@
* @throws IOException
*/
public static void main(String[] args) throws IOException {
- // WADL available at BASE_URI + application.wadl
+ // WADL available at BASE_URI + application.wadl
final HttpServer server = startServer();
- // Establish shutdown hook
- Runtime.getRuntime().addShutdownHook(
+ // Establish shutdown hook
+ Runtime.getRuntime().addShutdownHook(
new Thread(
- new Runnable() {
- @Override
- public void run() {
- log.info("Stup Server");
- // staaahp!
- server.stop();
- }
- },
- "shutdownHook"
- )
- );
+ new Runnable() {
+ @Override
+ public void run() {
+ log.info("Stup Server");
+ // staaahp!
+ server.stop();
+ }
+ },
+ "shutdownHook"
+ )
+ );
- // Start server
- try {
- server.start();
- log.info("You may kill me gently with Ctrl+C");
- Thread.currentThread().join();
- }
- catch (Exception e) {
- log.error("Unable to start server: {}", e.getLocalizedMessage());
- };
+ // Start server
+ try {
+ server.start();
+ log.info("You may kill me gently with Ctrl+C");
+ Thread.currentThread().join();
+ }
+ catch (Exception e) {
+ log.error("Unable to start server: {}", e.getLocalizedMessage());
+ };
};
-
// What's the servers name?
public static String getName () {
- return name;
+ return name;
};
// What is the server listening on?
public static String getListener () {
- return BASE_URI;
+ return BASE_URI;
};
// Get database pool
public static ComboPooledDataSource getDBPool () {
- // Pool already initiated
- if (cpds != null)
- return cpds;
+ // Pool already initiated
+ if (cpds != null)
+ return cpds;
+
+ try {
- try {
-
- // Parameters are defined in the property file
- cpds = new ComboPooledDataSource();
- cpds.setDriverClass(dbClass);
- cpds.setJdbcUrl(dbURL);
- if (dbUser != null)
- cpds.setUser(dbUser);
- if (dbPwd != null)
- cpds.setPassword(dbPwd);
- cpds.setMaxStatements(100);
- return cpds;
- }
- catch (PropertyVetoException e) {
- log.error(e.getLocalizedMessage());
- };
- return null;
+ // Parameters are defined in the property file
+ cpds = new ComboPooledDataSource();
+ cpds.setDriverClass(dbClass);
+ cpds.setJdbcUrl(dbURL);
+ if (dbUser != null)
+ cpds.setUser(dbUser);
+ if (dbPwd != null)
+ cpds.setPassword(dbPwd);
+ cpds.setMaxStatements(100);
+ return cpds;
+ }
+ catch (PropertyVetoException e) {
+ log.error(e.getLocalizedMessage());
+ };
+ return null;
};
// Get Lucene Index
public static KorapIndex getIndex () {
- // Index already instantiated
- if (index != null)
- return index;
-
+ // Index already instantiated
+ if (index != null)
+ return index;
+
try {
- // Get a temporary index
- if (path == null)
- // Temporary index
- index = new KorapIndex();
+ // Get a temporary index
+ if (path == null)
+ // Temporary index
+ index = new KorapIndex();
- else {
- File file = new File(path);
+ else {
+ File file = new File(path);
- log.info("Loading index from {}", path);
- if (!file.exists()) {
- log.error("Index not found at {}", path);
- return null;
- };
+ log.info("Loading index from {}", path);
+ if (!file.exists()) {
+ log.error("Index not found at {}", path);
+ return null;
+ };
- // Set real index
- index = new KorapIndex(new MMapDirectory(file));
- };
- return index;
- }
- catch (IOException e) {
- log.error("Index not loadable at {}: {}", path, e.getMessage());
- };
- return null;
+ // Set real index
+ index = new KorapIndex(new MMapDirectory(file));
+ };
+ return index;
+ }
+ catch (IOException e) {
+ log.error("Index not loadable at {}: {}", path, e.getMessage());
+ };
+ return null;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 7a4e40b..8c86a24 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -26,11 +26,14 @@
*/
/**
- * @author Nils Diewald
+ * FieldDocument represents a simple API to create documents
+ * for storing with KorapIndex. <i>Field</i> in the name resembles
+ * the meaning of Lucene index fields.
*
- * FieldDocument implements a simple API to create documents for storing with KorapIndex.
+ * @author diewald
*/
@JsonIgnoreProperties(ignoreUnknown = true)
+// @JsonDeserialize(using = FieldDocumentDeserializer.class)
public class FieldDocument extends KorapDocument {
ObjectMapper mapper = new ObjectMapper();
diff --git a/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java b/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java
index d8bfe38..7ec1e10 100644
--- a/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java
+++ b/src/main/java/de/ids_mannheim/korap/response/KorapResponse.java
@@ -4,13 +4,12 @@
import java.io.*;
import com.fasterxml.jackson.annotation.*;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import de.ids_mannheim.korap.response.Notifications;
-import de.ids_mannheim.korap.response.serialize.KorapResponseDeserializer;
/**
* Base class for objects meant to be responded by the server.
@@ -23,11 +22,11 @@
* );
* </pre></blockquote>
*
- * @author Nils Diewald
- * @see de.ids_mannheim.korap.response.Notifications
- * @see de.ids_mannheim.korap.response.serialize.KorapResponseDeserializer
+ * @author diewald
+ * @see Notifications
*/
-@JsonDeserialize(using = KorapResponseDeserializer.class)
+@JsonInclude(Include.NON_NULL)
+@JsonIgnoreProperties(ignoreUnknown = true)
public class KorapResponse extends Notifications {
ObjectMapper mapper = new ObjectMapper();
@@ -35,11 +34,8 @@
private String benchmark;
private boolean timeExceeded = false;
-
/**
* Construct a new KorapResponse object.
- *
- * @return The new KorapResponse object
*/
public KorapResponse () {};
@@ -49,7 +45,6 @@
*
* @return String representation of the backend's version
*/
- @JsonIgnore
public String getVersion () {
return this.version;
};
@@ -61,9 +56,18 @@
* @param version The string representation of the backend's version
* @return KorapResponse object for chaining
*/
- @JsonIgnore
- public KorapResponse setVersion (String version) {
- this.version = version;
+ public KorapResponse setVersion (String fullVersion) {
+ int found = fullVersion.lastIndexOf('-');
+
+ // Is combined name and version
+ if (found > 0 && (found + 1 < fullVersion.length())) {
+ this.setName(fullVersion.substring(0, found));
+ this.version = fullVersion.substring(found + 1);
+ }
+ // Is only version number
+ else {
+ this.version = fullVersion;
+ };
return this;
};
@@ -74,7 +78,6 @@
*
* @return String representation of the backend's name
*/
- @JsonIgnore
public String getName () {
return this.name;
};
@@ -84,10 +87,9 @@
* Set the string representation of the backend's name.
* All nodes in a cluster should have the same backend name.
*
- * @param version The string representation of the backend's name
+ * @param name The string representation of the backend's name
* @return KorapResponse object for chaining
*/
- @JsonIgnore
public KorapResponse setName (String name) {
this.name = name;
return this;
@@ -100,7 +102,6 @@
*
* @return String representation of the node's name
*/
- @JsonIgnore
public String getNode () {
return this.node;
};
@@ -113,7 +114,6 @@
* @param version The string representation of the node's name
* @return KorapResponse object for chaining
*/
- @JsonIgnore
public KorapResponse setNode (String name) {
this.node = name;
return this;
@@ -139,11 +139,10 @@
* <p>
* Will add a warning (682) to the output.
*
- * @param timeout Either <tt>true</tt> or <tt>false</tt>, in case the response
- * timed out
+ * @param timeout Either <tt>true</tt> or <tt>false</tt>,
+ * in case the response timed out
* @return KorapResponse object for chaining
*/
- @JsonIgnore
public KorapResponse setTimeExceeded (boolean timeout) {
if (timeout)
this.addWarning(682, "Response time exceeded");
@@ -158,7 +157,6 @@
* @return String representation of the benchmark
* (including trailing time unit)
*/
- @JsonIgnore
public String getBenchmark () {
return this.benchmark;
};
@@ -190,7 +188,6 @@
* (including trailing time unit)
* @return KorapResponse for chaining
*/
- @JsonIgnore
public KorapResponse setBenchmark (String bm) {
this.benchmark = bm;
return this;
@@ -202,7 +199,6 @@
*
* @return The listener URI as a string representation
*/
- @JsonIgnore
public String getListener () {
return this.listener;
};
@@ -220,7 +216,6 @@
* @param listener String representation of the listener URI
* @return KorapResponse object for chaining
*/
- @JsonIgnore
public KorapResponse setListener (String listener) {
this.listener = listener;
return this;
@@ -228,9 +223,9 @@
/**
- * Serialize response as a JsonNode.
+ * Serialize response as a {@link JsonNode}.
*
- * @return JsonNode representation of the response
+ * @return {@link JsonNode} representation of the response
*/
@Override
public JsonNode toJsonNode () {
diff --git a/src/main/java/de/ids_mannheim/korap/response/Messages.java b/src/main/java/de/ids_mannheim/korap/response/Messages.java
index cc7b723..2881185 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Messages.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Messages.java
@@ -20,9 +20,9 @@
* m.add(614, "This is a new message");
* </pre></blockquote>
*
- * @author Nils Diewald
- * @see de.ids_mannheim.korap.response.Notifications
- * @see de.ids_mannheim.korap.response.Message
+ * @author diewald
+ * @see Notifications
+ * @see Message
*/
public class Messages implements Cloneable, Iterable<Message> {
diff --git a/src/main/java/de/ids_mannheim/korap/response/Notifications.java b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
index ad65024..68bae83 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Notifications.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
@@ -63,7 +63,7 @@
/**
* Return all warnings.
*
- * @return The <code>Messages</code> object representing all warnings
+ * @return {@link Messages} representing all warnings
*/
public Messages getWarnings () {
return this.warnings;
@@ -71,6 +71,20 @@
/**
+ * Set warnings by means of a {@link JsonNode}.
+ *
+ * @param msgs JSON array of warnings.
+ * @return Notifications object for chaining.
+ */
+ public Notifications setWarnings (JsonNode msgs) {
+ for (JsonNode msg : msgs)
+ this.addWarning(msg);
+ return this;
+ };
+
+
+
+ /**
* Return a specific warning based on an index.
*
* @param index The index of the warning in the list of warnings.
@@ -98,11 +112,10 @@
return this;
};
-
/**
* Appends a new warning.
*
- * @param node <code>JsonNode</code> representing a warning message
+ * @param node {@link JsonNode} representing a warning message
* @return Notification object for chaining
*/
public Notifications addWarning (JsonNode node) {
@@ -123,7 +136,7 @@
/**
* Appends new warnings.
*
- * @param msgs <code>Messages</code> representing multiple warnings
+ * @param msgs {@link Messages} representing multiple warnings
* @return Notification object for chaining
*/
public Notifications addWarnings (Messages msgs) {
@@ -138,7 +151,7 @@
/**
* Return all errors.
*
- * @return The <code>Messages</code> object representing all errors
+ * @return The {@link Messages} object representing all errors
*/
public Messages getErrors () {
return this.errors;
@@ -146,6 +159,19 @@
/**
+ * Set errors by means of a {@link JsonNode}.
+ *
+ * @param msgs JSON array of errors.
+ * @return Notifications object for chaining.
+ */
+ public Notifications setErrors (JsonNode msgs) {
+ for (JsonNode msg : msgs)
+ this.addError(msg);
+ return this;
+ };
+
+
+ /**
* Return a specific error based on an index.
*
* @param index The index of the error in the list of errors.
@@ -189,7 +215,7 @@
/**
* Appends a new error.
*
- * @param node <code>JsonNode</code> representing an error message
+ * @param node {@link JsonNode} representing an error message
* @return Notification object for chaining
*/
public Notifications addError (JsonNode msg) {
@@ -209,7 +235,7 @@
/**
* Appends new errors.
*
- * @param msgs <code>Messages</code> representing multiple errors
+ * @param msgs {@link Messages} representing multiple errors
* @return Notification object for chaining
*/
public Notifications addErrors (Messages msgs) {
@@ -224,12 +250,26 @@
/**
* Return all messages.
*
- * @return The <code>Messages</code> object representing all messages
+ * @return {@link Messages} representing all messages
*/
public Messages getMessages () {
return this.messages;
};
+
+ /**
+ * Set messages by means of a {@link JsonNode}.
+ *
+ * @param msgs JSON array of messages.
+ * @return Notifications object for chaining.
+ */
+ public Notifications setMessages (JsonNode msgs) {
+ for (JsonNode msg : msgs)
+ this.addMessage(msg);
+ return this;
+ };
+
+
/**
* Return a specific message based on an index.
*
@@ -274,7 +314,7 @@
/**
* Appends a new message.
*
- * @param node <code>JsonNode</code> representing a message
+ * @param node {@link JsonNode} representing a message
* @return Notification object for chaining
*/
public Notifications addMessage (JsonNode msg) {
@@ -293,7 +333,7 @@
/**
* Appends new messages.
*
- * @param msgs <code>Messages</code> representing multiple messages
+ * @param msgs {@link Messages} representing multiple messages
* @return Notification object for chaining
*/
public Notifications addMessages (Messages msgs) {
@@ -306,7 +346,7 @@
/**
- * Copy notifications from one notification object.
+ * Copy notifications from another notification object.
*
* @param notes Notification object to copy notifications from.
* @return Notification object for chaining
@@ -327,9 +367,9 @@
/**
- * Copy notifications from a JsonNode object.
+ * Copy notifications from a {@link JsonNode} object.
*
- * @param request Notifications containing JsonNode.
+ * @param request Notifications containing {@lin JsonNode}.
* @return Notification object for chaining
*/
public Notifications copyNotificationsFrom (JsonNode request) {
diff --git a/src/main/java/de/ids_mannheim/korap/response/serialize/KorapResponseDeserializer.java b/src/main/java/de/ids_mannheim/korap/response/serialize/KorapResponseDeserializer.java
deleted file mode 100644
index 23525ff..0000000
--- a/src/main/java/de/ids_mannheim/korap/response/serialize/KorapResponseDeserializer.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package de.ids_mannheim.korap.response.serialize;
-
-import java.util.*;
-import java.io.*;
-
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.JsonDeserializer;
-import com.fasterxml.jackson.databind.DeserializationContext;
-
-import de.ids_mannheim.korap.response.KorapResponse;
-
-/**
- * JSON Deserialization class for Jackson, used by KorapResponse.
- * No direct usage intended.
- *
- * @author Nils Diewald
- * @see de.ids_mannheim.korap.KorapResponse
- */
-public class KorapResponseDeserializer extends JsonDeserializer<KorapResponse> {
-
- /**
- * Deserialization of JSON format.
- *
- * @param parser A parser instance for consuming JSON.
- * @param ctxt A deserialization context.
- * @return The deserialized KorapResponse object.
- */
-
- @Override
- public KorapResponse deserialize (JsonParser parser, DeserializationContext ctxt)
- throws IOException, JsonProcessingException {
- JsonNode node = parser.getCodec().readTree(parser);
- KorapResponse kresp = new KorapResponse();
-
- // Deserialize version information
- if (node.has("version")) {
- String fullVersion = node.get("version").asText();
- int found = fullVersion.lastIndexOf('-');
-
- // Is combined name and version
- if (found > 0 && (found + 1 < fullVersion.length())) {
- kresp.setName(fullVersion.substring(0, found))
- .setVersion(fullVersion.substring(found + 1));
- }
- // Is only version number
- else {
- kresp.setVersion(fullVersion);
- };
- };
-
- // Deserialize timeout information
- if (node.has("timeExceeded") && node.get("timeExceeded").asBoolean())
- kresp.setTimeExceeded(true);
-
- // Deserialize benchmark information
- if (node.has("benchmark"))
- kresp.setBenchmark(node.get("benchmark").asText());
-
- // Deserialize listener information
- if (node.has("listener"))
- kresp.setListener(node.get("listener").asText());
-
- // Deserialize listener information
- if (node.has("node"))
- kresp.setNode(node.get("node").asText());
-
- // Copy notifications
- kresp.copyNotificationsFrom(node);
-
- return kresp;
- };
-};
diff --git a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
index 5542e44..cb27c50 100644
--- a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
+++ b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
@@ -28,8 +28,8 @@
@Test
public void checkHighlights () throws IOException, QueryException {
- KorapIndex ki = new KorapIndex();
- String json = new String(
+ KorapIndex ki = new KorapIndex();
+ String json = new String(
"{" +
" \"fields\" : [" +
" { "+
@@ -46,61 +46,61 @@
" ]" +
"}");
- FieldDocument fd = ki.addDoc(json);
- ki.commit();
+ FieldDocument fd = ki.addDoc(json);
+ ki.commit();
- KorapQuery kq = new KorapQuery("tokens");
- KorapResult kr = ki.search(
- (SpanQuery) kq.seq(kq._(1, kq.seg("s:b"))).toQuery()
+ KorapQuery kq = new KorapQuery("tokens");
+ KorapResult kr = ki.search(
+ (SpanQuery) kq.seq(kq._(1, kq.seg("s:b"))).toQuery()
);
- KorapMatch km = kr.getMatch(0);
- assertEquals(km.getStartPos(), 1);
- assertEquals(km.getEndPos(), 2);
- assertEquals(km.getStartPos(1), 1);
- assertEquals(km.getEndPos(1), 2);
- assertEquals("<span class=\"context-left\">a</span><span class=\"match\"><em class=\"class-1 level-0\">b</em></span><span class=\"context-right\">c</span>", km.getSnippetHTML());
+ KorapMatch km = kr.getMatch(0);
+ assertEquals(km.getStartPos(), 1);
+ assertEquals(km.getEndPos(), 2);
+ assertEquals(km.getStartPos(1), 1);
+ assertEquals(km.getEndPos(1), 2);
+ assertEquals("<span class=\"context-left\">a</span><span class=\"match\"><em class=\"class-1 level-0\">b</em></span><span class=\"context-right\">c</span>", km.getSnippetHTML());
- kr = ki.search((SpanQuery) kq.seq(kq._(1, kq.seg("s:b"))).append(kq._(2, kq.seg("s:c"))).toQuery());
- km = kr.getMatch(0);
- assertEquals(km.getStartPos(), 1);
- assertEquals(km.getEndPos(), 3);
- assertEquals(km.getStartPos(1), 1);
- assertEquals(km.getEndPos(1), 2);
- assertEquals(km.getStartPos(2), 2);
- assertEquals(km.getEndPos(2), 3);
- assertEquals("<span class=\"context-left\">a</span><span class=\"match\"><em class=\"class-1 level-0\">b</em><em class=\"class-2 level-0\">c</em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
+ kr = ki.search((SpanQuery) kq.seq(kq._(1, kq.seg("s:b"))).append(kq._(2, kq.seg("s:c"))).toQuery());
+ km = kr.getMatch(0);
+ assertEquals(km.getStartPos(), 1);
+ assertEquals(km.getEndPos(), 3);
+ assertEquals(km.getStartPos(1), 1);
+ assertEquals(km.getEndPos(1), 2);
+ assertEquals(km.getStartPos(2), 2);
+ assertEquals(km.getEndPos(2), 3);
+ assertEquals("<span class=\"context-left\">a</span><span class=\"match\"><em class=\"class-1 level-0\">b</em><em class=\"class-2 level-0\">c</em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
- kr = ki.search((SpanQuery) kq.seq(kq._(1, kq.seq(kq.seg("s:a")).append(kq.seg("s:b")))).append(kq._(2, kq.seg("s:c"))).toQuery());
- km = kr.getMatch(0);
- assertEquals(km.getStartPos(), 0);
- assertEquals(km.getEndPos(), 3);
- assertEquals(km.getStartPos(1), 0);
- assertEquals(km.getEndPos(1), 2);
- assertEquals(km.getStartPos(2), 2);
- assertEquals(km.getEndPos(2), 3);
- assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-1 level-0\">ab</em><em class=\"class-2 level-0\">c</em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
+ kr = ki.search((SpanQuery) kq.seq(kq._(1, kq.seq(kq.seg("s:a")).append(kq.seg("s:b")))).append(kq._(2, kq.seg("s:c"))).toQuery());
+ km = kr.getMatch(0);
+ assertEquals(km.getStartPos(), 0);
+ assertEquals(km.getEndPos(), 3);
+ assertEquals(km.getStartPos(1), 0);
+ assertEquals(km.getEndPos(1), 2);
+ assertEquals(km.getStartPos(2), 2);
+ assertEquals(km.getEndPos(2), 3);
+ assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-1 level-0\">ab</em><em class=\"class-2 level-0\">c</em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
- kr = ki.search((SpanQuery) kq._(3, kq.seq(kq._(1, kq.seq(kq.seg("s:a")).append(kq.seg("s:b")))).append(kq._(2, kq.seg("s:c")))).toQuery());
- km = kr.getMatch(0);
- assertEquals(km.getStartPos(), 0);
- assertEquals(km.getEndPos(), 3);
- assertEquals(km.getStartPos(1), 0);
- assertEquals(km.getEndPos(1), 2);
- assertEquals(km.getStartPos(2), 2);
- assertEquals(km.getEndPos(2), 3);
- assertEquals(km.getStartPos(3), 0);
- assertEquals(km.getEndPos(3), 3);
- assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-3 level-0\"><em class=\"class-1 level-1\">ab</em><em class=\"class-2 level-1\">c</em></em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
+ kr = ki.search((SpanQuery) kq._(3, kq.seq(kq._(1, kq.seq(kq.seg("s:a")).append(kq.seg("s:b")))).append(kq._(2, kq.seg("s:c")))).toQuery());
+ km = kr.getMatch(0);
+ assertEquals(km.getStartPos(), 0);
+ assertEquals(km.getEndPos(), 3);
+ assertEquals(km.getStartPos(1), 0);
+ assertEquals(km.getEndPos(1), 2);
+ assertEquals(km.getStartPos(2), 2);
+ assertEquals(km.getEndPos(2), 3);
+ assertEquals(km.getStartPos(3), 0);
+ assertEquals(km.getEndPos(3), 3);
+ assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-3 level-0\"><em class=\"class-1 level-1\">ab</em><em class=\"class-2 level-1\">c</em></em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
};
@Test
public void checkHighlightsManually () throws IOException, QueryException {
- KorapIndex ki = new KorapIndex();
- String json = new String(
+ KorapIndex ki = new KorapIndex();
+ String json = new String(
"{" +
" \"fields\" : [" +
" { "+
@@ -117,221 +117,219 @@
" ]" +
"}");
- FieldDocument fd = ki.addDoc(json);
- ki.commit();
+ FieldDocument fd = ki.addDoc(json);
+ ki.commit();
- KorapQuery kq = new KorapQuery("tokens");
+ KorapQuery kq = new KorapQuery("tokens");
- KorapResult kr = ki.search((SpanQuery) kq.seq(kq.seg("s:a")).append(kq.seg("s:b")).append(kq.seg("s:c")).toQuery());
- KorapMatch km = kr.getMatch(0);
- km.addHighlight(0, 1, (short) 7);
- assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-7 level-0\">ab</em>c</span><span class=\"context-right\"></span>", km.getSnippetHTML());
+ KorapResult kr = ki.search((SpanQuery) kq.seq(kq.seg("s:a")).append(kq.seg("s:b")).append(kq.seg("s:c")).toQuery());
+ KorapMatch km = kr.getMatch(0);
+ km.addHighlight(0, 1, (short) 7);
+ assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-7 level-0\">ab</em>c</span><span class=\"context-right\"></span>", km.getSnippetHTML());
- km.addHighlight(1, 2, (short) 6);
- assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-7 level-0\">a<em class=\"class-6 level-1\">b</em></em><em class=\"class-6 level-1\">c</em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
+ km.addHighlight(1, 2, (short) 6);
+ assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-7 level-0\">a<em class=\"class-6 level-1\">b</em></em><em class=\"class-6 level-1\">c</em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
- km.addHighlight(0, 1, (short) 5);
- assertEquals("[{7:{5:a{6:b}}}{6:c}]", km.getSnippetBrackets());
- assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-7 level-0\"><em class=\"class-5 level-1\">a<em class=\"class-6 level-2\">b</em></em></em><em class=\"class-6 level-2\">c</em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
+ km.addHighlight(0, 1, (short) 5);
+ assertEquals("[{7:{5:a{6:b}}}{6:c}]", km.getSnippetBrackets());
+ assertEquals("<span class=\"context-left\"></span><span class=\"match\"><em class=\"class-7 level-0\"><em class=\"class-5 level-1\">a<em class=\"class-6 level-2\">b</em></em></em><em class=\"class-6 level-2\">c</em></span><span class=\"context-right\"></span>", km.getSnippetHTML());
};
-
@Test
public void highlightMissingBug () throws IOException, QueryException {
- KorapIndex ki = new KorapIndex();
- FieldDocument fd = new FieldDocument();
- fd.addString("ID", "doc-1");
- fd.addString("UID", "1");
- fd.addTV("base",
- "abab",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" +
- "[(1-2)s:b|i:b|_1#1-2]" +
- "[(2-3)s:a|i:c|_2#2-3]" +
- "[(3-4)s:b|i:a|_3#3-4]");
- ki.addDoc(fd);
- fd = new FieldDocument();
- fd.addString("ID", "doc-2");
- fd.addString("UID", "2");
- fd.addTV("base",
- "aba",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" +
- "[(1-2)s:b|i:b|_1#1-2]" +
- "[(2-3)s:a|i:c|_2#2-3]");
- ki.addDoc(fd);
+ KorapIndex ki = new KorapIndex();
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addString("UID", "1");
+ fd.addTV("base",
+ "abab",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:a|i:c|_2#2-3]" +
+ "[(3-4)s:b|i:a|_3#3-4]");
+ ki.addDoc(fd);
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-2");
+ fd.addString("UID", "2");
+ fd.addTV("base",
+ "aba",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:a|i:c|_2#2-3]");
+ ki.addDoc(fd);
- // Commit!
- ki.commit();
- fd = new FieldDocument();
- fd.addString("ID", "doc-3");
- fd.addString("UID", "3");
- fd.addTV("base",
- "abab",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" +
- "[(1-2)s:b|i:b|_1#1-2]" +
- "[(2-3)s:a|i:c|_2#2-3]" +
- "[(3-4)s:b|i:a|_3#3-4]");
- ki.addDoc(fd);
+ // Commit!
+ ki.commit();
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-3");
+ fd.addString("UID", "3");
+ fd.addTV("base",
+ "abab",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:a|i:c|_2#2-3]" +
+ "[(3-4)s:b|i:a|_3#3-4]");
+ ki.addDoc(fd);
+
+ // Commit!
+ ki.commit();
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-4");
+ fd.addString("UID", "4");
+ fd.addTV("base",
+ "aba",
+ "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" +
+ "[(1-2)s:b|i:b|_1#1-2]" +
+ "[(2-3)s:a|i:c|_2#2-3]");
+ ki.addDoc(fd);
+
+ // Commit!
+ ki.commit();
- // Commit!
- ki.commit();
- fd = new FieldDocument();
- fd.addString("ID", "doc-4");
- fd.addString("UID", "4");
- fd.addTV("base",
- "aba",
- "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" +
- "[(1-2)s:b|i:b|_1#1-2]" +
- "[(2-3)s:a|i:c|_2#2-3]");
- ki.addDoc(fd);
+ KorapQuery kq = new KorapQuery("base");
+ SpanQuery q = (SpanQuery) kq.or(kq._(1, kq.seg("s:a"))).or(kq._(2, kq.seg("s:b"))).toQuery();
+ KorapResult kr = ki.search(q);
+ assertEquals((long) 14, kr.getTotalResults());
+ assertEquals("[{1:a}]bab", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("a[{2:b}]ab", kr.getMatch(1).getSnippetBrackets());
+ assertEquals("ab[{1:a}]b", kr.getMatch(2).getSnippetBrackets());
+ assertEquals("aba[{2:b}]", kr.getMatch(3).getSnippetBrackets());
+
+ assertEquals("[{1:a}]ba", kr.getMatch(4).getSnippetBrackets());
+ assertEquals("a[{2:b}]a", kr.getMatch(5).getSnippetBrackets());
+ assertEquals("ab[{1:a}]", kr.getMatch(6).getSnippetBrackets());
- // Commit!
- ki.commit();
+ assertEquals("[{1:a}]bab", kr.getMatch(7).getSnippetBrackets());
+ assertEquals("a[{2:b}]ab", kr.getMatch(8).getSnippetBrackets());
+ assertEquals("ab[{1:a}]b", kr.getMatch(9).getSnippetBrackets());
+ assertEquals("aba[{2:b}]", kr.getMatch(10).getSnippetBrackets());
- KorapQuery kq = new KorapQuery("base");
- SpanQuery q = (SpanQuery) kq.or(kq._(1, kq.seg("s:a"))).or(kq._(2, kq.seg("s:b"))).toQuery();
- KorapResult kr = ki.search(q);
- assertEquals((long) 14, kr.getTotalResults());
- assertEquals("[{1:a}]bab", kr.getMatch(0).getSnippetBrackets());
- assertEquals("a[{2:b}]ab", kr.getMatch(1).getSnippetBrackets());
- assertEquals("ab[{1:a}]b", kr.getMatch(2).getSnippetBrackets());
- assertEquals("aba[{2:b}]", kr.getMatch(3).getSnippetBrackets());
+ assertEquals("[{1:a}]ba", kr.getMatch(11).getSnippetBrackets());
+ assertEquals("a[{2:b}]a", kr.getMatch(12).getSnippetBrackets());
+ assertEquals("ab[{1:a}]", kr.getMatch(13).getSnippetBrackets());
- assertEquals("[{1:a}]ba", kr.getMatch(4).getSnippetBrackets());
- assertEquals("a[{2:b}]a", kr.getMatch(5).getSnippetBrackets());
- assertEquals("ab[{1:a}]", kr.getMatch(6).getSnippetBrackets());
+ kq = new KorapQuery("base");
+ q = (SpanQuery) kq.or(kq._(1, kq.seg("i:a"))).or(kq._(2, kq.seg("i:c"))).toQuery();
+ KorapSearch qs = new KorapSearch(q);
+ qs.getContext().left.setToken(true).setLength((short) 1);
+ qs.getContext().right.setToken(true).setLength((short) 1);
+ kr = ki.search(qs);
+ assertEquals((long) 10, kr.getTotalResults());
- assertEquals("[{1:a}]bab", kr.getMatch(7).getSnippetBrackets());
- assertEquals("a[{2:b}]ab", kr.getMatch(8).getSnippetBrackets());
- assertEquals("ab[{1:a}]b", kr.getMatch(9).getSnippetBrackets());
- assertEquals("aba[{2:b}]", kr.getMatch(10).getSnippetBrackets());
+ assertEquals("[{1:a}]b ...", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("... b[{2:a}]b", kr.getMatch(1).getSnippetBrackets());
+ assertEquals("... a[{1:b}]", kr.getMatch(2).getSnippetBrackets());
+ assertEquals("[{1:a}]b ...", kr.getMatch(3).getSnippetBrackets());
+ assertEquals("... b[{2:a}]", kr.getMatch(4).getSnippetBrackets());
+ assertEquals("[{1:a}]b ...", kr.getMatch(5).getSnippetBrackets());
+ assertEquals("... b[{2:a}]b", kr.getMatch(6).getSnippetBrackets());
+ assertEquals("... a[{1:b}]", kr.getMatch(7).getSnippetBrackets());
+ assertEquals("[{1:a}]b ...", kr.getMatch(8).getSnippetBrackets());
+ assertEquals("... b[{2:a}]", kr.getMatch(9).getSnippetBrackets());
- assertEquals("[{1:a}]ba", kr.getMatch(11).getSnippetBrackets());
- assertEquals("a[{2:b}]a", kr.getMatch(12).getSnippetBrackets());
- assertEquals("ab[{1:a}]", kr.getMatch(13).getSnippetBrackets());
+ qs.getContext().left.setToken(true).setLength((short) 0);
+ qs.getContext().right.setToken(true).setLength((short) 0);
+ kr = ki.search(qs);
+ assertEquals((long) 10, kr.getTotalResults());
+ assertEquals("[{1:a}] ...", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("... [{2:a}] ...", kr.getMatch(1).getSnippetBrackets());
+ assertEquals("... [{1:b}]", kr.getMatch(2).getSnippetBrackets());
+ assertEquals("[{1:a}] ...", kr.getMatch(3).getSnippetBrackets());
+ assertEquals("... [{2:a}]", kr.getMatch(4).getSnippetBrackets());
+ assertEquals("[{1:a}] ...", kr.getMatch(5).getSnippetBrackets());
+ assertEquals("... [{2:a}] ...", kr.getMatch(6).getSnippetBrackets());
+ assertEquals("... [{1:b}]", kr.getMatch(7).getSnippetBrackets());
+ assertEquals("[{1:a}] ...", kr.getMatch(8).getSnippetBrackets());
+ assertEquals("... [{2:a}]", kr.getMatch(9).getSnippetBrackets());
- kq = new KorapQuery("base");
- q = (SpanQuery) kq.or(kq._(1, kq.seg("i:a"))).or(kq._(2, kq.seg("i:c"))).toQuery();
- KorapSearch qs = new KorapSearch(q);
- qs.getContext().left.setToken(true).setLength((short) 1);
- qs.getContext().right.setToken(true).setLength((short) 1);
- kr = ki.search(qs);
- assertEquals((long) 10, kr.getTotalResults());
+ q = (SpanQuery) kq._(
+ 3, kq.or(kq._(1, kq.seg("i:a"))).or(kq._(2, kq.seg("i:c")))
+ ).toQuery();
+ qs = new KorapSearch(q);
+ qs.getContext().left.setToken(true).setLength((short) 0);
+ qs.getContext().right.setToken(true).setLength((short) 0);
+ kr = ki.search(qs);
+ assertEquals((long) 10, kr.getTotalResults());
- assertEquals("[{1:a}]b ...", kr.getMatch(0).getSnippetBrackets());
- assertEquals("... b[{2:a}]b", kr.getMatch(1).getSnippetBrackets());
- assertEquals("... a[{1:b}]", kr.getMatch(2).getSnippetBrackets());
- assertEquals("[{1:a}]b ...", kr.getMatch(3).getSnippetBrackets());
- assertEquals("... b[{2:a}]", kr.getMatch(4).getSnippetBrackets());
- assertEquals("[{1:a}]b ...", kr.getMatch(5).getSnippetBrackets());
- assertEquals("... b[{2:a}]b", kr.getMatch(6).getSnippetBrackets());
- assertEquals("... a[{1:b}]", kr.getMatch(7).getSnippetBrackets());
- assertEquals("[{1:a}]b ...", kr.getMatch(8).getSnippetBrackets());
- assertEquals("... b[{2:a}]", kr.getMatch(9).getSnippetBrackets());
-
- qs.getContext().left.setToken(true).setLength((short) 0);
- qs.getContext().right.setToken(true).setLength((short) 0);
- kr = ki.search(qs);
- assertEquals((long) 10, kr.getTotalResults());
-
- assertEquals("[{1:a}] ...", kr.getMatch(0).getSnippetBrackets());
- assertEquals("... [{2:a}] ...", kr.getMatch(1).getSnippetBrackets());
- assertEquals("... [{1:b}]", kr.getMatch(2).getSnippetBrackets());
- assertEquals("[{1:a}] ...", kr.getMatch(3).getSnippetBrackets());
- assertEquals("... [{2:a}]", kr.getMatch(4).getSnippetBrackets());
- assertEquals("[{1:a}] ...", kr.getMatch(5).getSnippetBrackets());
- assertEquals("... [{2:a}] ...", kr.getMatch(6).getSnippetBrackets());
- assertEquals("... [{1:b}]", kr.getMatch(7).getSnippetBrackets());
- assertEquals("[{1:a}] ...", kr.getMatch(8).getSnippetBrackets());
- assertEquals("... [{2:a}]", kr.getMatch(9).getSnippetBrackets());
-
- q = (SpanQuery) kq._(3, kq.or(kq._(1, kq.seg("i:a"))).or(kq._(2, kq.seg("i:c")))).toQuery();
- qs = new KorapSearch(q);
- qs.getContext().left.setToken(true).setLength((short) 0);
- qs.getContext().right.setToken(true).setLength((short) 0);
- kr = ki.search(qs);
- assertEquals((long) 10, kr.getTotalResults());
-
- assertEquals("[{3:{1:a}}] ...", kr.getMatch(0).getSnippetBrackets());
- assertEquals("... [{3:{2:a}}] ...", kr.getMatch(1).getSnippetBrackets());
- assertEquals("... [{3:{1:b}}]", kr.getMatch(2).getSnippetBrackets());
- assertEquals("[{3:{1:a}}] ...", kr.getMatch(3).getSnippetBrackets());
- assertEquals("... [{3:{2:a}}]", kr.getMatch(4).getSnippetBrackets());
- assertEquals("[{3:{1:a}}] ...", kr.getMatch(5).getSnippetBrackets());
- assertEquals("... [{3:{2:a}}] ...", kr.getMatch(6).getSnippetBrackets());
- assertEquals("... [{3:{1:b}}]", kr.getMatch(7).getSnippetBrackets());
- assertEquals("[{3:{1:a}}] ...", kr.getMatch(8).getSnippetBrackets());
- assertEquals("... [{3:{2:a}}]", kr.getMatch(9).getSnippetBrackets());
+ assertEquals("[{3:{1:a}}] ...", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("... [{3:{2:a}}] ...", kr.getMatch(1).getSnippetBrackets());
+ assertEquals("... [{3:{1:b}}]", kr.getMatch(2).getSnippetBrackets());
+ assertEquals("[{3:{1:a}}] ...", kr.getMatch(3).getSnippetBrackets());
+ assertEquals("... [{3:{2:a}}]", kr.getMatch(4).getSnippetBrackets());
+ assertEquals("[{3:{1:a}}] ...", kr.getMatch(5).getSnippetBrackets());
+ assertEquals("... [{3:{2:a}}] ...", kr.getMatch(6).getSnippetBrackets());
+ assertEquals("... [{3:{1:b}}]", kr.getMatch(7).getSnippetBrackets());
+ assertEquals("[{3:{1:a}}] ...", kr.getMatch(8).getSnippetBrackets());
+ assertEquals("... [{3:{2:a}}]", kr.getMatch(9).getSnippetBrackets());
};
-
@Test
public void highlightGreaterClassBug () throws IOException, QueryException {
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001", "00002"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001", "00002"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
- };
- ki.commit();
+ };
+ ki.commit();
- // 15
- String json = getString(getClass().getResource("/queries/bugs/greater_highlights_15.jsonld").getFile());
+ // 15
+ String json = getString(getClass().getResource("/queries/bugs/greater_highlights_15.jsonld").getFile());
- KorapSearch ks = new KorapSearch(json);
- KorapResult kr = ks.run(ki);
- assertEquals(kr.getQuery(),"{15: tokens:s:Alphabet}");
- assertEquals(kr.getTotalResults(),7);
- assertEquals(kr.getStartIndex(),0);
- assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [{15:Alphabet}] stammende Urform des Buchstaben ist wahrscheinlich ...");
- assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><em class=\"class-15 level-0\">Alphabet</em></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getQuery(),"{15: tokens:s:Alphabet}");
+ assertEquals(kr.getTotalResults(),7);
+ assertEquals(kr.getStartIndex(),0);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [{15:Alphabet}] stammende Urform des Buchstaben ist wahrscheinlich ...");
+ assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><em class=\"class-15 level-0\">Alphabet</em></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
+ json = getString(getClass().getResource("/queries/bugs/greater_highlights_16.jsonld").getFile());
- json = getString(getClass().getResource("/queries/bugs/greater_highlights_16.jsonld").getFile());
+ // 16
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getQuery(),"{16: tokens:s:Alphabet}");
+ assertEquals(kr.getTotalResults(),7);
+ assertEquals(kr.getStartIndex(),0);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [{16:Alphabet}] stammende Urform des Buchstaben ist wahrscheinlich ...");
+ assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><em class=\"class-16 level-0\">Alphabet</em></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
- // 16
- ks = new KorapSearch(json);
- kr = ks.run(ki);
- assertEquals(kr.getQuery(),"{16: tokens:s:Alphabet}");
- assertEquals(kr.getTotalResults(),7);
- assertEquals(kr.getStartIndex(),0);
- assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [{16:Alphabet}] stammende Urform des Buchstaben ist wahrscheinlich ...");
- assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><em class=\"class-16 level-0\">Alphabet</em></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
-
- // 127
- json = getString(getClass().getResource("/queries/bugs/greater_highlights_127.jsonld").getFile());
+ // 127
+ json = getString(getClass().getResource("/queries/bugs/greater_highlights_127.jsonld").getFile());
- ks = new KorapSearch(json);
- kr = ks.run(ki);
- assertEquals(kr.getQuery(),"{127: tokens:s:Alphabet}");
- assertEquals(kr.getTotalResults(),7);
- assertEquals(kr.getStartIndex(),0);
- assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [{127:Alphabet}] stammende Urform des Buchstaben ist wahrscheinlich ...");
- assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><em class=\"class-127 level-0\">Alphabet</em></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getQuery(),"{127: tokens:s:Alphabet}");
+ assertEquals(kr.getTotalResults(),7);
+ assertEquals(kr.getStartIndex(),0);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [{127:Alphabet}] stammende Urform des Buchstaben ist wahrscheinlich ...");
+ assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\"><em class=\"class-127 level-0\">Alphabet</em></span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
- // 255
- json = getString(getClass().getResource("/queries/bugs/greater_highlights_255.jsonld").getFile());
+ // 255
+ json = getString(getClass().getResource("/queries/bugs/greater_highlights_255.jsonld").getFile());
- ks = new KorapSearch(json);
- kr = ks.run(ki);
- assertEquals(kr.getQuery(),"{255: tokens:s:Alphabet}");
- assertEquals(kr.getTotalResults(),7);
- assertEquals(kr.getStartIndex(),0);
- assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [Alphabet] stammende Urform des Buchstaben ist wahrscheinlich ...");
- assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\">Alphabet</span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(kr.getQuery(),"{255: tokens:s:Alphabet}");
+ assertEquals(kr.getTotalResults(),7);
+ assertEquals(kr.getStartIndex(),0);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),"... 2. Herkunft Die aus dem proto-semitischen [Alphabet] stammende Urform des Buchstaben ist wahrscheinlich ...");
+ assertEquals(kr.getMatch(0).getSnippetHTML(),"<span class=\"context-left\"><span class=\"more\"></span>2. Herkunft Die aus dem proto-semitischen </span><span class=\"match\">Alphabet</span><span class=\"context-right\"> stammende Urform des Buchstaben ist wahrscheinlich<span class=\"more\"></span></span>");
- // 300
- json = getString(getClass().getResource("/queries/bugs/greater_highlights_300.jsonld").getFile());
+ // 300
+ json = getString(getClass().getResource("/queries/bugs/greater_highlights_300.jsonld").getFile());
- ks = new KorapSearch(json);
- kr = ks.run(ki);
- assertEquals(709, kr.getError(0).getCode());
- assertEquals("Valid class numbers exceeded", kr.getError(0).getMessage());
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+ assertEquals(709, kr.getError(0).getCode());
+ assertEquals("Valid class numbers exceeded", kr.getError(0).getMessage());
- assertEquals(kr.getError(0).getMessage(),"Valid class numbers exceeded");
+ assertEquals(kr.getError(0).getMessage(),"Valid class numbers exceeded");
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/response/TestResponse.java b/src/test/java/de/ids_mannheim/korap/response/TestResponse.java
index 2ec3345..4cb46e2 100644
--- a/src/test/java/de/ids_mannheim/korap/response/TestResponse.java
+++ b/src/test/java/de/ids_mannheim/korap/response/TestResponse.java
@@ -86,6 +86,7 @@
public void testResponseDeserialzation () throws IOException {
String jsonResponse = "{\"version\":\"0.38\"}";
KorapResponse kresp = mapper.readValue(jsonResponse, KorapResponse.class);
+
assertEquals("0.38", kresp.getVersion());
assertNull(kresp.getName());
assertEquals(jsonResponse, kresp.toJsonString());
@@ -142,9 +143,11 @@
assertTrue(kresp.hasErrors());
assertFalse(kresp.hasMessages());
assertEquals(kresp.getError(0).getMessage(), "This is a single error");
- assertEquals(kresp.getWarning(0).getMessage(), "Response time exceeded");
- assertEquals(kresp.getWarning(1).getMessage(), "This is a warning");
- assertEquals(kresp.getWarning(2).getMessage(), "This is a second warning");
+
+ // THIS MAY BREAK!
+ assertEquals(kresp.getWarning(0).getMessage(), "This is a warning");
+ assertEquals(kresp.getWarning(1).getMessage(), "This is a second warning");
+ assertEquals(kresp.getWarning(2).getMessage(), "Response time exceeded");
assertEquals("0.49", kresp.getVersion());
assertEquals("seaweed", kresp.getName());
assertEquals("40.5s", kresp.getBenchmark());