src/main/java/de/ids_mannheim/korap/KorapIndex.java - KorAP/Krill - Gitiles

 package de.ids_mannheim.korap;

 import java.util.*;

 import java.io.File;
 import java.io.IOException;

 // import java.net.URL;

 import java.nio.ByteBuffer;
 import java.util.zip.GZIPInputStream;
 import java.io.FileInputStream;

 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.QueryWrapperFilter;

 import org.apache.lucene.search.spans.Spans;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;

 import org.apache.lucene.document.Document;

 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;

 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;

 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;

 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.DocIdSetIterator;

 import org.apache.lucene.util.Version;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.lucene.util.FixedBitSet;

 // Automata
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.RegExp;
 import org.apache.lucene.util.automaton.CompiledAutomaton;

 import com.fasterxml.jackson.annotation.*;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import de.ids_mannheim.korap.KorapResult;
 import de.ids_mannheim.korap.KorapMatch;
 import de.ids_mannheim.korap.KorapCollection;
 import de.ids_mannheim.korap.KorapSearch;
 import de.ids_mannheim.korap.index.FieldDocument;
 import de.ids_mannheim.korap.index.PositionsToOffset;
 import de.ids_mannheim.korap.index.TermInfo;
 import de.ids_mannheim.korap.index.SpanInfo;
 import de.ids_mannheim.korap.index.MatchIdentifier;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 /*
   Todo: Use FieldCache!
   TODO: Reuse the indexreader everywhere - it should be threadsafe!

   http://invertedindex.blogspot.co.il/2009/04/lucene-dociduid-mapping-and-payload.html
   see korap/search.java -> retrieveTokens

   Support a callback for interrupts (to stop the searching)!

   Support multiple indices.

   Support frequency search with regular expressions, so multiple bookkeeping:
   c<:VVFIN:ging:gehen:past::
   c>:VVFIN:gnig:neheg:past::
   -> search for frequencies of VVFIN/gehen
   -> c:VVFIN:[^:]*?:gehen:past:...
 */

 /**
  * KorapIndex implements a simple API for searching in and writing to a
  * Lucene index and equesting several information but the index's nature.
  *
  * @author ndiewald
  */
 public class KorapIndex {
     private Directory directory;

     // Temp:
     public IndexReader reader;

     private IndexWriter writer;
     private IndexSearcher searcher;
     private boolean readerOpen = false;
     private int commitCounter = 0;
     private int autoCommit = 500; // Todo: Use configuration
     private HashMap termContexts;
     private ObjectMapper mapper = new ObjectMapper();


     private static ByteBuffer bb       = ByteBuffer.allocate(4),
 	                      bbOffset = ByteBuffer.allocate(8),
 	                      bbTerm   = ByteBuffer.allocate(16);

     private byte[] pl = new byte[4];

     private Set<String> fieldsToLoad;

     // Logger
     private final static Logger log = LoggerFactory.getLogger(KorapIndex.class);

     public KorapIndex () throws IOException {
         this((Directory) new RAMDirectory());
     };


     public KorapIndex (String index) throws IOException {
 	this(FSDirectory.open(new File( index )));
     };

     public KorapIndex (Directory directory) throws IOException {
 	this.directory = directory;

 	fieldsToLoad = new HashSet<String>(16);
 	fieldsToLoad.add("author");
 	fieldsToLoad.add("ID");
 	fieldsToLoad.add("title");
 	fieldsToLoad.add("subTitle");
 	fieldsToLoad.add("textClass");
 	fieldsToLoad.add("pubPlace");
 	fieldsToLoad.add("pubDate");
 	fieldsToLoad.add("corpusID");
 	fieldsToLoad.add("foundries");
 	fieldsToLoad.add("layerInfo");
 	fieldsToLoad.add("tokenization");

 	// Base analyzer for searching and indexing
 	// StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

 	Map<String,Analyzer> analyzerPerField = new HashMap<String,Analyzer>();
 	analyzerPerField.put("textClass", new WhitespaceAnalyzer(Version.LUCENE_CURRENT));
 	analyzerPerField.put("foundries", new WhitespaceAnalyzer(Version.LUCENE_CURRENT));
 	PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
             new StandardAnalyzer(Version.LUCENE_CURRENT),
             analyzerPerField
         );

 	// Create configuration with base analyzer
 	IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);

 	this.writer = new IndexWriter(this.directory, config);
     };


     public void close () throws IOException {
 	this.closeReader();
 	this.closeWriter();
     };


     public IndexReader reader () {
 	if (!readerOpen)
 	    this.openReader();

 	return this.reader;
     };

     public IndexSearcher searcher () {
 	if (this.searcher == null) {
 	    this.searcher = new IndexSearcher(this.reader());
 	};
 	return this.searcher;
     };

     public void closeWriter () throws IOException {
 	this.writer.close();
     };


     public void closeReader () throws IOException {
 	if (readerOpen) {
 	    this.reader.close();
 	    readerOpen = false;
 	};
     };


     public void openReader () {
 	try {
 	    this.reader = DirectoryReader.open(this.directory);
 	    readerOpen = true;
 	    if (this.searcher != null) {
 		this.searcher = new IndexSearcher(reader);
 	    };
 	}

 	catch (IOException e) {
 	    log.warn( e.getLocalizedMessage() );
 	};
     };


     public FieldDocument addDoc (FieldDocument fd) throws IOException {

 	// Add document to writer
 	this.writer.addDocument( fd.doc );
 	if (++commitCounter > autoCommit) {
 	    this.commit();
 	    commitCounter = 0;
 	};
 	return fd;
     };

     // Add with file!
     public FieldDocument addDoc (String json) throws IOException {
 	FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
 	return this.addDoc(fd);
     };

     public FieldDocument addDoc (File json) throws IOException {
       FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
       return this.addDoc(fd);
     };

     public FieldDocument addDocFile(String json) throws IOException {
       return this.addDocFile(json, false);
     };

     public FieldDocument addDocFile(String json, boolean gzip) {
       try {
 	if (gzip) {
 	  FieldDocument fd = this.mapper.readValue(new GZIPInputStream(new FileInputStream(json)), FieldDocument.class);
 	  return this.addDoc(fd);
 	};
 	return this.addDoc(json);
       }
       catch (IOException e) {
 	log.error("File json not found");
       };
       return (FieldDocument) null;
     };

     public void commit () throws IOException {
 	if (commitCounter > 0) {
 	    this.writer.commit();
 	    commitCounter = 0;
 	    this.closeReader();
 	};
     };


     // Get autoCommit valiue
     public int autoCommit () {
 	return this.autoCommit;
     };


     // Set autoCommit value
     public void autoCommit (int number) {
 	this.autoCommit = number;
     };


     // Search for meta information in term vectors
     private long numberOfAtomic (Bits docvec,
 				 AtomicReaderContext atomic,
 				 Term term) throws IOException {

 	// This reimplements docsAndPositionsEnum with payloads
 	final Terms terms = atomic.reader().fields().terms(term.field());

 	// No terms were found
 	if (terms != null) {
 	    // Todo: Maybe reuse a termsEnum!
 	    final TermsEnum termsEnum = terms.iterator(null);

 	    // Set the positioon in the iterator to the term that is seeked
 	    if (termsEnum.seekExact(term.bytes(), true)) {

 		// Start an iterator to fetch all payloads of the term
 		DocsAndPositionsEnum docs = termsEnum.docsAndPositions(
 		    docvec,
 		    null,
 		    DocsAndPositionsEnum.FLAG_PAYLOADS
 		);

 		// Iterator is empty
 		// TODO: Maybe this is an error ...
 		if (docs.docID() == DocsAndPositionsEnum.NO_MORE_DOCS) {
 		    return 0;
 		};

 		// Init some variables for data copying
 		long occurrences = 0;
 		BytesRef payload;

 		// Init nextDoc()
 		while (docs.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {

 		    // Initialize (go to first term)
 		    docs.nextPosition();

 		    // Copy payload with the offset of the BytesRef
 		    payload = docs.getPayload();
 		    System.arraycopy(payload.bytes, payload.offset, pl, 0, 4);

 		    // Add payload as integer
 		    occurrences += bb.wrap(pl).getInt();
 		};

 		// Return the sum of all occurrences
 		return occurrences;
 	    };
 	};

 	// Nothing found
 	return 0;
     };


     /**
      * Search for the number of occurrences of different types,
      * e.g. "documents", "sentences" etc.
      *
      * @param field The field containing the textual data and the annotations.
      * @param type The type of meta information, e.g. "documents" or "sentences".
      */
     public long numberOf (KorapCollection collection, String field, String type) throws IOException {
 	// Short cut for documents
 	if (type.equals("documents")) {
 	    if (collection.getCount() <= 0) {
 		return (long) this.reader().numDocs();
 	    };

 	    long docCount = 0;
 	    // System.err.println("CHECK");
 	    int i = 1;
 	    for (AtomicReaderContext atomic : this.reader().leaves()) {
 		// System.err.println("READER" + i + "a-" + docCount);
 		docCount += collection.bits(atomic).cardinality();
 		// System.err.println("READER" + i + "b-" + docCount);
 		i++;
 	    };
 	    return docCount;
 	};

 	// Create search term
 	Term term = new Term(field, "-:" + type);
 	// System.err.println(">> Search for -:" + type + " in " + field);

 	long occurrences = 0;
 	try {
 	    // Iterate over all atomic readers and collect occurrences
 	    for (AtomicReaderContext atomic : this.reader().leaves()) {
 		occurrences += this.numberOfAtomic(
 		   collection.bits(atomic),
 		    atomic,
 		    term
 		);
 	    };
 	}

 	// Something went wrong
 	catch (IOException e) {
 	    log.warn( e.getLocalizedMessage() );
 	};

 	return occurrences;
     };

     public long numberOf (String field, String type) throws IOException {
 	return this.numberOf(new KorapCollection(this), field, type);
     };


     /**
      * Search for the number of occurrences of different types,
      * e.g. "documents", "sentences" etc., in the base foundry.
      *
      * @param type The type of meta information, e.g. "documents" or "sentences".
      *
      * @see #numberOf(String, String)
      */
     public long numberOf (String type) throws IOException {
 	return this.numberOf("tokens", type);
     };


     /**
      * Search for the number of occurrences of different types,
      * e.g. "documents", "sentences" etc., in a specific set of documents.
      *
      * @param docvec The document vector for filtering the search space.
      * @param field The field containing the textual data and the annotations.
      * @param type The type of meta information, e.g. "documents" or "sentences".
      *
      * @see #numberOf(String, String)
      */
     public long numberOf (Bits docvec, String field, String type) throws IOException {

 	// Shortcut for documents
 	if (type.equals("documents")) {
 	    OpenBitSet os = (OpenBitSet) docvec;
 	    return os.cardinality();
 	};

 	Term term = new Term(field, "-:" + type);

 	int occurrences = 0;
 	try {
 	    for (AtomicReaderContext atomic : this.reader().leaves()) {
 		occurrences += this.numberOfAtomic(docvec, atomic, term);
 	    };
 	}
 	catch (IOException e) {
 	    log.warn( e.getLocalizedMessage() );
 	};

 	return occurrences;
     };

     @Deprecated
     public long countDocuments () throws IOException {
 	log.warn("countDocuments() is DEPRECATED in favor of numberOf(\"documents\")!");
 	return this.numberOf("documents");
     };


     @Deprecated
     public long countAllTokens () throws IOException {
 	log.warn("countAllTokens() is DEPRECATED in favor of numberOf(\"tokens\")!");
 	return this.numberOf("tokens");
     };


     public KorapMatch getMatch (String id) {
 	return this.getMatchInfo(id, "tokens", false, null, null, false, true);
     };

     public KorapMatch getMatchInfo (String id,
 				    String field,
 				    String foundry,
 				    String layer,
 				    boolean includeSpans,
 				    boolean includeHighlights) {
 	return this.getMatchInfo(id, field, true, foundry, layer, includeSpans, includeHighlights);
     };

     /**
      * Get a match.
      * BE AWARE - THIS IS STILL A PLAYGROUND!
      */
     /*
       KorapInfo is associated with a KorapMatch and has an array with all informations
       per position in the match.
     */
     public KorapMatch getMatchInfo (String idString,
 				    String field,
 				    boolean info,
 				    String foundry,
 				    String layer,
 				    boolean includeSpans,
 				    boolean includeHighlights) {

 	KorapMatch match = new KorapMatch(idString, includeHighlights);

 	// Create a filter based on the corpusID and the docID
 	BooleanQuery bool = new BooleanQuery();
 	bool.add(new TermQuery(new Term("ID",       match.getDocID())),    BooleanClause.Occur.MUST);
 	bool.add(new TermQuery(new Term("corpusID", match.getCorpusID())), BooleanClause.Occur.MUST);
 	Filter filter = (Filter) new QueryWrapperFilter(bool);

 	CompiledAutomaton fst = null;

 	if (info) {
 	    /* Create an automaton for prefixed terms of interest.
 	     * You can define the necessary foundry, the necessary layer,
 	     * in case the foundry is given, and if span annotations
 	     * are of interest.
 	     */
 	    StringBuffer regex = new StringBuffer();

 	    // Todo: Only support one direction!
 	    if (includeSpans)
 		regex.append("((\">\"|\"<\"\">\"?)\":\")?");
 	    if (foundry != null) {
 		regex.append(foundry).append('/');
 		if (layer != null)
 		    regex.append(layer).append(":");
 	    }
 	    else if (includeSpans) {
 		regex.append("([^-is]|[-is][^:])");
 	    }
 	    else {
 		regex.append("([^-is<>]|([-is>][^:])|<[^:>])");
 	    };
 	    regex.append("(.){1,}|_[0-9]+");


 	    log.trace("The final regexString is {}", regex.toString());
 	    RegExp regexObj = new RegExp(regex.toString(), RegExp.COMPLEMENT);
 	    fst = new CompiledAutomaton(regexObj.toAutomaton());
 	    log.trace("The final regexObj is {}", regexObj.toString());
 	};


 	try {
 	    // Iterate over all atomic indices and find the matching document
 	    for (AtomicReaderContext atomic : this.reader().leaves()) {

 		// Retrieve the single document of interest
 		DocIdSet filterSet = filter.getDocIdSet(
 		    atomic,
 		    atomic.reader().getLiveDocs()
 		);

 		// Create a bitset for the correct document
 		Bits bitset = filterSet.bits();

 		DocIdSetIterator filterIterator = filterSet.iterator();

 		// No document found
 		if (filterIterator == null)
 		    continue;

 		// Go to the matching doc - and remember its ID
 		int localDocID = filterIterator.nextDoc();

 		if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
 		    continue;

 		// We've found the correct document! Hurray!
 		log.trace("We've found a matching document");
 		HashSet<String> fieldsToLoadLocal = new HashSet<>(fieldsToLoad);
 		fieldsToLoadLocal.add(field);

 		// Get terms from the document
 		Terms docTerms = atomic.reader().getTermVector(localDocID, field);

 		// Load the necessary fields of the document
 		Document doc = atomic.reader().document(localDocID, fieldsToLoadLocal);

 		// Put some more information to the match
 		PositionsToOffset pto = new PositionsToOffset(atomic, field);
 		match.setPositionsToOffset(pto);
 		match.setLocalDocID(localDocID);
 		match.populateDocument(doc, field, fieldsToLoadLocal);

 		log.trace("The document has the id '{}'", match.getDocID());

 		if (!info) break;

 		// Limit the terms to all the terms of interest
 		TermsEnum termsEnum = docTerms.intersect(fst, null);

 		DocsAndPositionsEnum docs = null;

 		// List of terms to populate
 		SpanInfo termList = new SpanInfo(pto, localDocID);

 		// Iterate over all terms in the document
 		while (termsEnum.next() != null) {

 		    // Get the positions and payloads of the term in the document
 		    // The bitvector may look different (don't know why)
 		    // and so the local ID may differ.
 		    // That's why the requesting bitset is null.
 		    docs = termsEnum.docsAndPositions(
 		        null,
 			docs,
 			DocsAndPositionsEnum.FLAG_PAYLOADS
 		    );

 		    // Init document iterator
 		    docs.nextDoc();

 		    // Should never happen ... but hell.
 		    if (docs.docID() == DocIdSetIterator.NO_MORE_DOCS)
 			continue;

 		    // How often does this term occur in the document?
 		    int termOccurrences = docs.freq();

 		    // log.trace("I found {} documents with this term", termOccurrences);

 		    // String representation of the term
 		    String termString = termsEnum.term().utf8ToString();

 		    // Iterate over all occurrences
 		    for (int i = 0; i < termOccurrences; i++) {

 			// Init positions and get the current
 			int pos = docs.nextPosition();

 			// Check, if the position of the term is in the interesting area

 			// log.trace("Check position!");

 			if (pos >= match.getStartPos() && pos < match.getEndPos()) {

 			    log.trace(
 			        ">> {}: {}-{}-{}",
 				termString,
 				docs.freq(),
 				pos,
 				docs.getPayload()
 			    );

 			    BytesRef payload = docs.getPayload();

 			    // Copy the payload
 			    bbTerm.clear();
 			    if (payload != null) {
 				bbTerm.put(
 				    payload.bytes,
 				    payload.offset,
 				    payload.length
 				);
 			    };
 			    TermInfo ti = new TermInfo(termString, pos, bbTerm).analyze();
 			    if (ti.getEndPos() < match.getEndPos()) {
 				log.trace("Add {}", ti.toString());
 				termList.add(ti);
 			    };
 			};
 		    };
 		};

 		// Add annotations based on the retrieved infos
 		for (TermInfo t : termList.getTerms()) {
 		    log.trace("Add term {}/{}:{} to {}({})-{}({})",
 			      t.getFoundry(),
 			      t.getLayer(),
 			      t.getValue(),
 			      t.getStartChar(),
 			      t.getStartPos(),
 			      t.getEndChar(),
 			      t.getEndPos());

 		    if (t.getType() == "term" || t.getType() == "span")
 			match.addAnnotation(t.getStartPos(), t.getEndPos(), t.getAnnotation());
 		    else if (t.getType() == "relSrc")
 			match.addRelation(t.getStartPos(), t.getEndPos(), t.getAnnotation());
 		};

 		break;
 	    };
 	}
 	catch (IOException e) {
 	    log.warn(e.getLocalizedMessage());
 	    match.setError(e.getLocalizedMessage());
 	};

 	return match;
     };


     /**
      * Search in the index.
      */
     public KorapResult search (SpanQuery query) {
 	return this.search(new KorapCollection(this), new KorapSearch(query));
     };

     public KorapResult search (SpanQuery query, short count) {
 	return this.search(
 	    new KorapCollection(this),
 	    new KorapSearch(query).setCount(count)
         );
     };

     public KorapResult search (SpanQuery query,
 			       int startIndex,
 			       short count,
 			       boolean leftTokenContext,
 			       short leftContext,
 			       boolean rightTokenContext,
 			       short rightContext) {
 	return this.search(
 	    new KorapCollection(this),
 	    query,
 	    startIndex,
 	    count,
 	    leftTokenContext,
 	    leftContext,
 	    rightTokenContext,
 	    rightContext
         );
     };

     public KorapResult search (KorapSearch ks) {
 	// TODO: This might leak
 	return this.search(new KorapCollection(this), ks);
     };

     public KorapResult search (KorapCollection collection,
 			       SpanQuery query,
 			       int startIndex,
 			       short count,
 			       boolean leftTokenContext,
 			       short leftContext,
 			       boolean rightTokenContext,
 			       short rightContext) {
 	KorapSearch ks = new KorapSearch(query);
 	ks.setStartIndex(startIndex).setCount(count);
 	ks.leftContext.setToken(leftTokenContext).setLength(leftContext);
 	ks.rightContext.setToken(rightTokenContext).setLength(rightContext);
 	return this.search(collection, ks);
     };


     public KorapResult search (KorapCollection collection, KorapSearch ks) {
 	log.trace("Start search");

 	this.termContexts = new HashMap<Term, TermContext>();

 	SpanQuery query = ks.getQuery();

 	// Get the field of textual data and annotations
 	String field = query.getField();

 	// Todo: Make kr subclassing ks - so ks has a method for a new KorapResult!
 	KorapResult kr = new KorapResult(
 	    query.toString(),
 	    ks.getStartIndex(),
 	    ks.getCount(),
 	    ks.leftContext.isToken(),
 	    ks.leftContext.getLength(),
 	    ks.rightContext.isToken(),
 	    ks.rightContext.getLength()
 	);

 	HashSet<String> fieldsToLoadLocal = new HashSet<>(fieldsToLoad);
 	fieldsToLoadLocal.add(field);

 	int i = 0;
 	long t1 = 0, t2 = 0;
 	int startIndex = kr.getStartIndex();
 	int count = kr.getItemsPerPage();
 	int hits = kr.itemsPerPage() + startIndex;
 	int limit = ks.getLimit();
 	boolean cutoff = ks.doCutOff();

 	if (limit > 0) {
 	    if (hits > limit)
 		hits = limit;

 	    if (limit < startIndex)
 		return kr;
 	};

 	ArrayList<KorapMatch> atomicMatches = new ArrayList<KorapMatch>(kr.itemsPerPage());

 	try {

 	    // Rewrite query (for regex and wildcard queries)
 	    for (Query rewrittenQuery = query.rewrite(this.reader());
                  rewrittenQuery != (Query) query;
                  rewrittenQuery = query.rewrite(this.reader())) {
 		query = (SpanQuery) rewrittenQuery;
 	    };

 	    for (AtomicReaderContext atomic : this.reader().leaves()) {

 		// Use OpenBitSet;
 		Bits bitset = collection.bits(atomic);

 		PositionsToOffset pto = new PositionsToOffset(atomic, field);

 		// Spans spans = NearSpansOrdered();
 		Spans spans = query.getSpans(atomic, (Bits) bitset, termContexts);

 		IndexReader lreader = atomic.reader();

 		// TODO: Get document information from Cache!

 		// See: http://www.ibm.com/developerworks/java/library/j-benchmark1/index.html
 		t1 = System.nanoTime();

 		for (; i < hits; i++) {

 		    log.trace("Match Nr {}/{}", i, count);

 		    // There are no more spans to find
 		    if (spans.next() != true)
 			break;

 		    // The next matches are not yet part of the result
 		    if (startIndex > i)
 			continue;

 		    int localDocID = spans.doc();
 		    int docID = atomic.docBase + localDocID;

 		    // Document doc = lreader.document(docID, fieldsToLoadLocal);


 		    // Do not load all of this, in case the doc is the same!
 		    Document doc = lreader.document(localDocID, fieldsToLoadLocal);
 		    KorapMatch match = kr.addMatch(
 		        pto,
 			localDocID,
 			spans.start(),
 			spans.end()
 		    ); // new KorapMatch();

 		    if (spans.isPayloadAvailable()) {

 			// TODO: Here are offsets and highlight offsets!
 			// <> payloads have 12 bytes (iii) or 8!?
 			// highlightoffsets have 11 bytes (iis)!

 			/*
 			int[] offsets = getOffsetsFromPayload(spans.getPayload());
 			match.startOffset(offsets[0]);
 			match.startOffset(offsets[1]);
 			*/

 			try {
 			    ByteBuffer bb = ByteBuffer.allocate(10);
 			    for (byte[] b : spans.getPayload()) {

 				log.trace("Found a payload!!! with length {}", b.length);

 				// Todo element searches!

 				// Highlights!
 				if (b.length == 9) {
 				    bb.put(b);
 				    bb.rewind();

 				    int start = bb.getInt();
 				    int end = bb.getInt() -1;
 				    byte number = bb.get();

 				    log.trace("Have a payload: {}-{}", start, end);
 				    match.addHighlight(start, end, number);
 				}

 				// Element payload for match!
 				// This MAY BE the correct match
 				else if (b.length == 8) {
 				    bb.put(b);
 				    bb.rewind();

 				    if (match.potentialStartPosChar == -1) {
 					match.potentialStartPosChar = bb.getInt(0);
 				    }
 				    else {
 					if (bb.getInt(0) < match.potentialStartPosChar)
 					match.potentialStartPosChar = bb.getInt(0);
 				    };

 				    if (bb.getInt(4) > match.potentialEndPosChar)
 					match.potentialEndPosChar = bb.getInt(4);

 				    log.trace("Element payload from {} to {}",
 					      match.potentialStartPosChar,
 					      match.potentialEndPosChar);
 				}

 				else if (b.length == 4) {
 				    bb.put(b);
 				    bb.rewind();
 				    log.debug("Unknown[4]: {}", bb.getInt());
 				};

 				bb.clear();
 			    };
 			}

 			catch (Exception e) {
 			    log.error(e.getMessage());
 			}

 			// match.payload(spans.getPayload());
 		    };


 		    match.internalDocID = docID;
 		    match.populateDocument(doc, field, fieldsToLoadLocal);

 		    log.trace("I've got a match in {} of {}", match.getDocID(), count);

 		    atomicMatches.add(match);
 		};

 		// Benchmark till now
 		if (i >= kr.itemsPerPage() &&
 		    kr.getBenchmarkSearchResults().length() == 0) {
 		    t2 = System.nanoTime();
 		    kr.setBenchmarkSearchResults(t1, t2);
 		};

 		// Can be disabled TEMPORARILY
 		while (!cutoff && spans.next()) {
 		    if (limit > 0 && i <= limit)
 			break;
 		    i++;
 		};
 		atomicMatches.clear();
 	    };

 	    t1 = System.nanoTime();
 	    kr.setBenchmarkHitCounter(t2, t1);
 	    if (kr.getBenchmarkSearchResults().length() == 0) {
 		kr.setBenchmarkSearchResults(t2, t1);
 	    };

 	    kr.setTotalResults(cutoff ? -1 : i);
 	}
 	catch (IOException e) {
 	    kr.setError("There was an IO error");
 	    log.warn( e.getLocalizedMessage() );
 	};

 	return kr;
     };
 };