Further improvements on the match data collector

commit: bfe554b1fd3b359b3b0fc0033bf0e51bf7ec874d [log] [tgz]
author: Nils Diewald <nils@diewald-online.de> Thu Jan 09 19:35:05 2014 +0000
committer: Nils Diewald <nils@diewald-online.de> Thu Jan 09 19:35:05 2014 +0000
tree: 1ee92bb73f10c09f7d9c5bdc27f27857ac28ca7d
parent: 2cd1c3d33ea8ba100e6104ab80d75d0ac805f2b4 [diff]
diff --git a/pom.xml b/pom.xml
index e07c7c6..50707e7 100644
--- a/pom.xml
+++ b/pom.xml

@@ -59,7 +59,7 @@
       <artifactId>lucene-core</artifactId>
       <groupId>org.apache.lucene</groupId>
       <type>jar</type>
-      <version>4.3.0</version>
+      <version>4.3.1</version>
     </dependency>
 
     <!-- Lucene queryparser dependency -->
@@ -67,7 +67,7 @@
       <artifactId>lucene-queryparser</artifactId>
       <groupId>org.apache.lucene</groupId>
       <type>jar</type>
-      <version>4.3.0</version>
+      <version>4.3.1</version>
     </dependency>
 
     <!-- Lucene analyzers dependency -->
@@ -75,7 +75,7 @@
       <artifactId>lucene-analyzers-common</artifactId>
       <groupId>org.apache.lucene</groupId>
       <type>jar</type>
-      <version>4.3.0</version>
+      <version>4.3.1</version>
     </dependency>
 
     <!-- JCache -->

diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 9679d31..2384f12 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java

@@ -1,12 +1,13 @@
 package de.ids_mannheim.korap;
 
+import java.util.*;
+
 import java.io.File;
 import java.io.IOException;
-import java.util.*;
-import java.net.URL;
+
+// import java.net.URL;
 
 import java.nio.ByteBuffer;
-import java.util.HashMap;
 import java.util.zip.GZIPInputStream;
 import java.io.FileInputStream;
 
@@ -16,7 +17,6 @@
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.BooleanClause;
-
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.QueryWrapperFilter;
 
@@ -73,13 +73,10 @@
 import de.ids_mannheim.korap.index.FieldDocument;
 import de.ids_mannheim.korap.index.PositionsToOffset;
 import de.ids_mannheim.korap.index.TermInfo;
-import de.ids_mannheim.korap.document.KorapPrimaryData;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.nio.ByteBuffer;
-
 /*
   Todo: Use FieldCache!
   TODO: Reuse the indexreader everywhere - it should be threadsafe!
@@ -142,7 +139,7 @@
     public KorapIndex (Directory directory) throws IOException {
 	this.directory = directory;
 
-	fieldsToLoad = new HashSet<String>();
+	fieldsToLoad = new HashSet<String>(16);
 	fieldsToLoad.add("author");
 	fieldsToLoad.add("ID");
 	fieldsToLoad.add("title");
@@ -306,6 +303,7 @@
 		);
 
 		// Iterator is empty
+		// TODO: Maybe this is an error ...
 		if (docs.docID() == DocsAndPositionsEnum.NO_MORE_DOCS) {
 		    return 0;
 		};
@@ -317,9 +315,7 @@
 		// Init nextDoc()
 		while (docs.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
 
-		    // Go to first term (initialization phase)
-// TODO: THIS MAY BE WRONG!
-// TODO:: DELETEEEEE AND TESTT!
+		    // Initialize (go to first term)
 		    docs.nextPosition();
 
 		    // Copy payload with the offset of the BytesRef
@@ -458,125 +454,6 @@
      * Get a match.
      * BE AWARE - THIS IS STILL A PLAYGROUND!
      */
-    public KorapMatch getMatch (String id) {
-
-	String corpusID = "WPD";
-	String docID = "WPD_AAA.00003";
-	String field = "tokens"; // text field
-	String foundry = "mate";
-	String layer = "l";
-	int startPos = 20;
-	int endPos = 30;
-	Boolean includeSpans = true;
-
-	KorapMatch km = (KorapMatch) null;
-	LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
-
-	StringBuffer regex = new StringBuffer();
-
-	// Todo: Ignore -: stuff!
-
-	if (includeSpans)
-	    regex.append("((<>|<|>):)?");
-	else
-	    regex.append("[^<>]");
-	if (foundry != null)
-	    regex.append(foundry).append('/');
-	if (layer != null)
-	    regex.append(layer).append(":");
-	regex.append(".+?");
-
-	BooleanQuery bool = new BooleanQuery();
-	bool.add(new TermQuery(new Term("ID", docID)), BooleanClause.Occur.MUST);
-	bool.add(new TermQuery(new Term("corpusID", corpusID)), BooleanClause.Occur.MUST);
-
-	Filter filter = (Filter) new QueryWrapperFilter(bool);
-
-	// Create an automaton for prefixed terms of interest:
-	CompiledAutomaton fst = new CompiledAutomaton(
-            new RegExp(regex.toString()).toAutomaton()
-        );
-
-	try {
-	for (AtomicReaderContext atomic : this.reader().leaves()) {
-	    DocIdSetIterator filterIter = filter.getDocIdSet(
-                atomic,
-		atomic.reader().getLiveDocs()
-            ).iterator();
-
-	    // Go to the matching doc
-	    int localDocID = filterIter.nextDoc();
-	    if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
-		continue;
-
-	    // We've found the correct document!
-	    HashSet<String> fieldsToLoadLocal = new HashSet<>(fieldsToLoad);
-	    fieldsToLoadLocal.add(field);
-
-	    // Load the necessary fields of the document
-	    Document doc = atomic.reader().document(localDocID, fieldsToLoadLocal);
-	    // Get terms from the document
-	    Terms docTerms = atomic.reader().getTermVector(localDocID, field);
-
-	    km = new KorapMatch(
-	        new PositionsToOffset(atomic, field),
-		localDocID,
-		startPos,
-		endPos
-            );
-
-	    // A termsEnum object could be reused here
-	    final TermsEnum termsEnum = docTerms.intersect(fst, null);
-
-	    // Create a bitset for the correct document
-	    // Yeah ... I know ... it could've been easier probably
-	    FixedBitSet bitset = new FixedBitSet(atomic.reader().numDocs());
-	    bitset.or(filterIter);
-
-	    DocsAndPositionsEnum docs = (DocsAndPositionsEnum) null;
-
-	    // Iterate over all terms in the document
-	    while (termsEnum.next() != null) {
-		docs = termsEnum.docsAndPositions(
-	            bitset,
-		    docs,
-		    DocsAndPositionsEnum.FLAG_PAYLOADS
-		);
-
-		// Init docs
-		docs.nextDoc();
-
-		// How often does this term occur in the document?
-		int termOccurrences = docs.freq();
-
-		// Iterate over all occurrences
-		for (int i = 0; i < termOccurrences; i++) {
-
-		    // Init positions and get the current
-		    int pos = docs.nextPosition();
-
-		    // Check, if the position of the term is in the interesting area
-		    if (pos >= startPos && pos <= endPos) {
-			termList.add(new TermInfo(
-			    termsEnum.term().utf8ToString(),
-			    pos,
-			    docs.getPayload()
-		        ));
-		    };
-		};
-	    };
-
-	    break;
-	};
-	}
-	catch (IOException e) {
-	    // ...
-	};
-
-	return km;
-    };
-
-
     // TODO: collect all information based on a prefix (like cnx/p etc.)
     // TODO: Generate a meaningful structure (e.g. a tree)
     /*
@@ -585,6 +462,170 @@
 
       public KorapInfo infoOf (KorapMatch km, String prefix);
     */
+    public KorapMatch getMatch (String id) {
+
+	// List of terms to populate
+	LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
+
+	KorapMatch match = new KorapMatch();
+
+	// That's purely temporary
+	String corpusID = "WPD";
+	String docID    = "WPD_AAA.00003";
+	String field    = "tokens"; // text field
+	String foundry  = "mate";
+	String layer    = "l";
+	int startPos    = 25;
+	int endPos      = 30;
+	Boolean includeSpans = true;
+
+	// Create a filter based on the corpusID and the docID
+	BooleanQuery bool = new BooleanQuery();
+	bool.add(new TermQuery(new Term("ID",       docID)),    BooleanClause.Occur.MUST);
+	bool.add(new TermQuery(new Term("corpusID", corpusID)), BooleanClause.Occur.MUST);
+	Filter filter = (Filter) new QueryWrapperFilter(bool);
+
+	// Create an automaton for prefixed terms of interest based on a Regex
+	// Todo: Ignore -: stuff!
+	StringBuffer regex = new StringBuffer();
+	if (includeSpans)
+	    regex.append("(((\"<>\"|\"<\"|\">\")\":\")?");
+	else
+	    regex.append("[^<>]");
+	if (foundry != null)
+	    regex.append(foundry).append('/');
+	if (layer != null)
+	    regex.append(layer).append(":");
+	regex.append("(.){1,})|_[0-9]+");
+
+	RegExp regexObj = new RegExp(regex.toString());
+	CompiledAutomaton fst = new CompiledAutomaton(regexObj.toAutomaton());
+	log.trace("The final regex is {}", regex.toString());
+
+	try {
+
+	    // Iterate over all atomic indices and find the matching document
+	    for (AtomicReaderContext atomic : this.reader().leaves()) {
+		/*
+		DocIdSetIterator filterIter = filter.getDocIdSet(
+		    atomic,
+		    atomic.reader().getLiveDocs()
+		).iterator();
+		*/
+
+		DocIdSet filterSet = filter.getDocIdSet(
+		    atomic,
+		    atomic.reader().getLiveDocs()
+		);
+
+		// Create a bitset for the correct document
+		// Yeah ... I know ... it could've been easier probably
+		/*
+		FixedBitSet bitset = new FixedBitSet(atomic.reader().numDocs());
+		bitset.or(filterIter);
+		*/
+		Bits bitset = filterSet.bits();
+
+		// Go to the matching doc
+		// int localDocID = bitset.iterator().nextDoc();
+		int localDocID = filterSet.iterator().nextDoc();
+
+		// log.trace("Found documents {} with the docID {}", bitset.cardinality(), localDocID);
+
+		if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+		    continue;
+
+		// We've found the correct document!
+		HashSet<String> fieldsToLoadLocal = new HashSet<>(fieldsToLoad);
+		fieldsToLoadLocal.add(field);
+
+		// Get terms from the document
+		Terms docTerms = atomic.reader().getTermVector(localDocID, field);
+
+		/* ---
+		 *
+		 */
+		log.trace("docTerms has Payloads: {}", docTerms.hasPayloads());
+		log.trace("docTerms has Positions: {}", docTerms.hasPositions());
+
+		// Load the necessary fields of the document
+		Document doc = atomic.reader().document(localDocID, fieldsToLoadLocal);
+
+		// Put some more information to the match
+		match.setPositionsToOffset(new PositionsToOffset(atomic, field));
+		match.setLocalDocID(localDocID);
+
+		log.trace("pto and localDocID defined");
+
+		match.setStartPos(startPos);
+		match.setEndPos(endPos);
+		match.populateDocument(doc, field, fieldsToLoadLocal);
+
+		log.trace("We have found the correct document: {}", match.getTitle());
+		// log.trace("The match is: {}", doc.get("tokens"));
+
+		// A termsEnum object could be reused here
+		TermsEnum termsEnum = docTerms.intersect(fst, null);
+
+		DocsAndPositionsEnum docs = (DocsAndPositionsEnum) null;
+		// DocsAndPositionsEnum docs;
+
+		// Iterate over all terms in the document
+		while (termsEnum.next() != null) {
+
+		    log.trace("> {}", termsEnum.term().utf8ToString());
+
+		    docs = termsEnum.docsAndPositions(
+						      null, //bitset.bits(),
+						      null, //docs,
+						      DocsAndPositionsEnum.FLAG_PAYLOADS
+		    );
+
+		    docs.nextDoc();
+		    // log.trace("Check for '{}'({}) in document {}({}) from {}", termsEnum.term().utf8ToString(), termsEnum.totalTermFreq(), docs.docID(), localDocID, bitset.cardinality());
+		    docs.nextPosition();
+
+		    if (docs.docID() == DocIdSetIterator.NO_MORE_DOCS ||
+			(docs.docID() != localDocID && docs.advance(localDocID) != localDocID))
+			continue;
+
+		    log.trace("Frequencies: {}!", docs.getPayload());
+
+
+		    // Init docs
+		    /*
+		    if (docs.advance(localDocID) == DocIdSetIterator.NO_MORE_DOCS || docs.docID() != localDocID)
+			continue;
+		    */
+
+		    // How often does this term occur in the document?
+		    int termOccurrences = docs.freq();
+
+		    // Iterate over all occurrences
+		    for (int i = 0; i < termOccurrences; i++) {
+
+			// Init positions and get the current
+			int pos = docs.nextPosition();
+
+			// Check, if the position of the term is in the interesting area
+			if (pos >= startPos && pos <= endPos) {
+			    termList.add(new TermInfo(
+			        termsEnum.term().utf8ToString(),
+				pos,
+				docs.getPayload()
+		            ));
+			};
+		    };
+		};
+		break;
+	    };
+	}
+	catch (IOException e) {
+	    // ...
+	};
+
+	return match;
+    };
 
 
     /**
@@ -810,27 +851,10 @@
 
 
 		    match.internalDocID = docID;
-		    match.setField(field);
-		    match.setAuthor(doc.get("author"));
-		    match.setTextClass(doc.get("textClass"));
-		    match.setDocID(doc.get("ID"));
-		    match.setTitle(doc.get("title"));
-		    match.setSubTitle(doc.get("subTitle"));
-		    match.setPubPlace(doc.get("pubPlace"));
-		    match.setCorpusID(doc.get("corpusID"));
-		    match.setPubDate(doc.get("pubDate"));
+		    match.populateDocument(doc, field, fieldsToLoadLocal);
 
 		    log.trace("I've got a match in {} of {}", match.getDocID(), count);
 
-		    // Temporary (later meta fields in term vector)
-		    match.setFoundries(doc.get("foundries"));
-		    match.setTokenization(doc.get("tokenization"));
-
-		    match.setLayerInfo(doc.get("layerInfo"));
-
-		    match.setPrimaryData(
-		        new KorapPrimaryData(doc.get(field))
-		    );
 		    atomicMatches.add(match);
 		};
 

diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index 8f54f5e..cc4fa60 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java

@@ -5,8 +5,12 @@
 
 import com.fasterxml.jackson.annotation.*;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.*;
 
 import de.ids_mannheim.korap.index.PositionsToOffset;
+import de.ids_mannheim.korap.document.KorapPrimaryData;
+
 import static de.ids_mannheim.korap.util.KorapHTML.*;
 
 // import org.apache.commons.codec.binary.Base64;
@@ -15,6 +19,7 @@
 import org.slf4j.LoggerFactory;
 
 import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.document.Document;
 
 /*
   Todo: The implemented classes and private names are horrible!
@@ -32,9 +37,10 @@
 
     // Snippet information
     @JsonIgnore
-    public short leftContext,
-  	         rightContext;
+    public short leftContextOffset,
+  	         rightContextOffset;
 
+    // Should be deprecated, but used wildly in tests!
     @JsonIgnore
     public int startPos,
 	       endPos;
@@ -45,6 +51,8 @@
 
     private int startOffsetChar = 0;
 
+    private int localDocID = -1;
+
     @JsonIgnore
     public boolean leftTokenContext,
 	           rightTokenContext;
@@ -90,6 +98,11 @@
     };
 
     /**
+     * Constructs a new KorapMatch object.
+     */
+    public KorapMatch () {};
+
+    /**
      * Insert a highlight for the snippet view by means of positional
      * offsets and an optional class number.
      *
@@ -123,6 +136,40 @@
 	this.highlight.add(new int[]{ start, end, number});
     };
 
+    public void populateDocument (Document doc, String field, HashSet<String> fields) {
+
+	this.setField(field);
+
+	this.setPrimaryData(
+	    new KorapPrimaryData(doc.get(field))
+	);
+
+	if (fields.contains("corpusID"))
+	    this.setCorpusID(doc.get("corpusID"));
+	if (fields.contains("ID"))
+	    this.setDocID(doc.get("ID"));
+	if (fields.contains("author"))
+	    this.setAuthor(doc.get("author"));
+	if (fields.contains("textClass"))
+	    this.setTextClass(doc.get("textClass"));
+	if (fields.contains("title"))
+	    this.setTitle(doc.get("title"));
+	if (fields.contains("subTitle"))
+	    this.setSubTitle(doc.get("subTitle"));
+	if (fields.contains("pubDate"))
+	    this.setPubDate(doc.get("pubDate"));
+	if (fields.contains("pubPlace"))
+	    this.setPubPlace(doc.get("pubPlace"));
+
+	// Temporary (later meta fields in term vector)
+	if (fields.contains("foundries"))
+	    this.setFoundries(doc.get("foundries"));
+	if (fields.contains("tokenization"))
+	    this.setTokenization(doc.get("tokenization"));
+	if (fields.contains("layerInfo"))
+	    this.setLayerInfo(doc.get("layerInfo"));
+    };
+
     @JsonProperty("docID")
     public String getDocID () {
 	return super.getID();
@@ -132,6 +179,66 @@
 	super.setID(id);
     };
 
+    @JsonIgnore
+    public int getStartPos() {
+	return this.startPos;
+    };
+
+    @JsonIgnore
+    public void setStartPos(int pos) {
+	this.startPos = pos;
+
+	if (this.positionsToOffset == null || this.localDocID == -1) {
+	    log.warn("You have to define " +
+		     "positionsToOffset and localDocID first " +
+		     "before adding position information");
+	    return;
+	};
+
+	// Preprocess matching
+	this.positionsToOffset.add(this.localDocID, pos);
+    };
+
+    @JsonIgnore
+    public int getEndPos() {
+	return this.endPos;
+    };
+
+    @JsonIgnore
+    public void setEndPos(int pos) {
+	this.endPos = pos;
+
+	if (this.positionsToOffset == null || this.localDocID == -1) {
+	    log.warn("You have to define " +
+		     "positionsToOffset and localDocID first " +
+		     "before adding position information");
+	    return;
+	};
+
+	// Preprocess matching
+	this.positionsToOffset.add(this.localDocID, pos - 1);
+    };
+
+    @JsonIgnore
+    public int getLocalDocID () {
+	return this.localDocID;
+    };
+
+    @JsonIgnore
+    public void setLocalDocID (int id) {
+	this.localDocID = id;
+    };
+
+    @JsonIgnore
+    public void setPositionsToOffset (PositionsToOffset pto) {
+	this.positionsToOffset = pto;
+    };
+
+    @JsonIgnore
+    public PositionsToOffset getPositionsToOffset () {
+	return this.positionsToOffset;
+    };
+
     @Override
     @JsonProperty("ID")
     public String getID () {
@@ -560,7 +667,6 @@
 
     @JsonProperty("snippet")
     public String getSnippetHTML () {
-
 	this._processHighlight();
 
 	if (this.processed && this.snippetHTML != null)
@@ -708,26 +814,26 @@
 
 	// left context
 	if (leftTokenContext) {
-	    startOffsetChar = this.positionsToOffset.start(ldid, startPos - this.leftContext);
+	    startOffsetChar = this.positionsToOffset.start(ldid, startPos - this.leftContextOffset);
 	}
 	else {
-	    startOffsetChar = startPosChar - this.leftContext;
+	    startOffsetChar = startPosChar - this.leftContextOffset;
 	};
 
 	// right context
 	if (rightTokenContext) {
 	    endOffsetChar = this.positionsToOffset.end(
 	        ldid,
-		this.endPos + this.rightContext - 1
+		this.endPos + this.rightContextOffset - 1
 	    );
-	    log.trace("For endOffset {} ({}+{}-1) pto returns {}", (this.endPos + this.rightContext - 1), this.endPos, this.rightContext, endOffsetChar);
+	    log.trace("For endOffset {} ({}+{}-1) pto returns {}", (this.endPos + this.rightContextOffset - 1), this.endPos, this.rightContextOffset, endOffsetChar);
 	}
 	else {
 	    if (endPosChar == -1) {
 		endOffsetChar = -1;
 	    }
 	    else {
-		endOffsetChar = endPosChar + this.rightContext;
+		endOffsetChar = endPosChar + this.rightContextOffset;
 	    };
 	};
 
@@ -750,7 +856,7 @@
 	this.startOffsetChar = startOffsetChar;
 
 
-	log.trace("Offsetposition {} till {} with contexts {} and {}", startOffsetChar, endOffsetChar, leftContext, rightContext);
+	log.trace("Offsetposition {} till {} with contexts {} and {}", startOffsetChar, endOffsetChar, leftContextOffset, rightContextOffset);
 
 	if (endOffsetChar > -1 && endOffsetChar < this.getPrimaryDataLength()) {
 	    this.tempSnippet = this.getPrimaryData(startOffsetChar, endOffsetChar);
@@ -760,7 +866,7 @@
 	    endMore = false;
 	};
 
-	log.trace("Temporary snippet is \"{}\"", this.tempSnippet);
+	// log.trace("Temporary snippet is \"{}\"", this.tempSnippet);
 
 	if (this.span == null)
 	    this.span = new LinkedList<int[]>();
@@ -797,4 +903,32 @@
 	    };
 	};
     };
+
+
+    // Identical to KorapResult!
+    public String toJSON () {
+	ObjectNode json =  (ObjectNode) mapper.valueToTree(this);
+
+	ArrayNode leftContext = mapper.createArrayNode();
+	leftContext.add(this.leftTokenContext ? "token" : "char");
+	leftContext.add(this.leftContextOffset);
+
+	ArrayNode rightContext = mapper.createArrayNode();
+	rightContext.add(this.rightTokenContext ? "token" : "char");
+	rightContext.add(this.rightContextOffset);
+
+	ObjectNode context = mapper.createObjectNode();
+	context.put("left", leftContext);
+	context.put("right", rightContext);
+	json.put("context", context);
+
+	try {
+	    return mapper.writeValueAsString(json);
+	}
+	catch (Exception e) {
+	    log.warn(e.getLocalizedMessage());
+	};
+
+	return "{}";
+    };
 };

diff --git a/src/main/java/de/ids_mannheim/korap/KorapResult.java b/src/main/java/de/ids_mannheim/korap/KorapResult.java
index 2723424..6b05e36 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapResult.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapResult.java

@@ -29,10 +29,13 @@
     private int startIndex = 0;
 
     private short itemsPerPage = ITEMS_PER_PAGE;
-    private short leftContextOffset = 6, rightContextOffset = 6;
-    private boolean leftTokenContext, rightTokenContext;
+    private short leftContextOffset = 6,
+	          rightContextOffset = 6;
+    private boolean leftTokenContext,
+	            rightTokenContext;
 
-    private String benchmarkSearchResults = "", benchmarkHitCounter = "0";
+    private String benchmarkSearchResults = "",
+	           benchmarkHitCounter = "0";
     private String error = null;
 
     // Logger
@@ -72,15 +75,19 @@
 
     public KorapMatch addMatch (PositionsToOffset pto, int localDocID, int startPos, int endPos) {
 	KorapMatch km = new KorapMatch(pto, localDocID, startPos, endPos);
-	// Temporary - should use the same interface like results in the future:
-	km.leftContext = this.leftContextOffset;
-	km.leftTokenContext = this.leftTokenContext;
-	km.rightContext = this.rightContextOffset;
-	km.rightTokenContext = this.rightTokenContext;
+
+	// Temporary - should use the same interface like results
+	// in the future:
+	km.leftContextOffset  = this.leftContextOffset;
+	km.leftTokenContext   = this.leftTokenContext;
+	km.rightContextOffset = this.rightContextOffset;
+	km.rightTokenContext  = this.rightTokenContext;
 
 	// Add pos for context
-	// That's not really a good position for it, to be honest ...
-	// But maybe it will make the offset information in the match be obsolete!
+	// That's not really a good position for it,
+	// to be honest ...
+	// But maybe it will make the offset
+	// information in the match be obsolete!
 	if (km.leftTokenContext) {
 	    pto.add(localDocID, startPos - this.leftContextOffset);
 	};
@@ -159,11 +166,8 @@
 	return startIndex;
     };
 
+    // Identical to KorapMatch!
     public String toJSON () {
-	
-	//	ObjectNode json = (ObjectNode) mapper.createObjectNode();
-	// ObjectNode json = (ObjectNode) mapper.treeAsTokens(this);
-
 	ObjectNode json =  (ObjectNode) mapper.valueToTree(this);
 
 	ArrayNode leftContext = mapper.createArrayNode();
@@ -180,7 +184,7 @@
 	json.put("context", context);
 
 	try {
-	    return mapper.writeValueAsString(json); // mapper.writeValueAsString(treeMapper);
+	    return mapper.writeValueAsString(json);
 	}
 	catch (Exception e) {
 	    log.warn(e.getLocalizedMessage());

diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 121ef3c..a6e96e5 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java

@@ -51,4 +51,20 @@
 
 	assertEquals("ID (0)", "match-0p7-9(0)8-8(2)7-8c7-9(0)8-9(2)7-9", kr.match(0).getID());
     };
+
+
+    @Test
+    public void indexExample2 () throws IOException {
+	// Construct index
+	KorapIndex ki = new KorapIndex();
+	// Indexing test files
+	for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
+	    ki.addDocFile(
+	        getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+            );
+	};
+	ki.commit();
+	System.err.println(ki.getMatch("test").toJSON());
+    };
+
 };
commit	bfe554b1fd3b359b3b0fc0033bf0e51bf7ec874d	[log] [tgz]
author	Nils Diewald <nils@diewald-online.de>	Thu Jan 09 19:35:05 2014 +0000
committer	Nils Diewald <nils@diewald-online.de>	Thu Jan 09 19:35:05 2014 +0000
tree	1ee92bb73f10c09f7d9c5bdc27f27857ac28ca7d
parent	2cd1c3d33ea8ba100e6104ab80d75d0ac805f2b4 [diff]