New feature and some bugfixes concerning span based context extension

commit: 1e5d5944a1737f779e517fa805e91d4a10e318b8 [log] [tgz]
author: Nils Diewald <nils@diewald-online.de> Tue May 20 13:29:53 2014 +0000
committer: Nils Diewald <nils@diewald-online.de> Tue May 20 13:29:53 2014 +0000
tree: f37fb8cbc2ee9644b1edc5cc47d8a2327a691189
parent: d216a03b070e391301de5fbc68d6dbe2b25dac87 [diff]
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index e2d957c..d2df845 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java

@@ -73,6 +73,7 @@
 import de.ids_mannheim.korap.index.PositionsToOffset;
 import de.ids_mannheim.korap.index.TermInfo;
 import de.ids_mannheim.korap.index.SpanInfo;
+import de.ids_mannheim.korap.index.SearchContext;
 import de.ids_mannheim.korap.index.MatchIdentifier;
 import de.ids_mannheim.korap.query.SpanElementQuery;
 
@@ -138,7 +139,7 @@
     private final static Logger log = LoggerFactory.getLogger(KorapIndex.class);
 
     // This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = true;
+    public static final boolean DEBUG = false;
 
     {
 	Properties prop = new Properties();
@@ -502,7 +503,16 @@
 
 
     public KorapMatch getMatch (String id) {
-	return this.getMatchInfo(id, "tokens", false, null, null, false, true, false);
+	return this.getMatchInfo(
+            id,       // MatchID
+	    "tokens", // field
+	    false,    // info
+	    null,     // foundry
+	    null,     // layer
+	    false,    // includeSpans
+	    true,     // includeHighlights
+	    false     // extendToSentence
+	);
     };
 
     public KorapMatch getMatchInfo (String id,
@@ -524,6 +534,7 @@
 	return this.getMatchInfo(id, field, true, foundry, layer, includeSpans, includeHighlights, extendToSentence);
     };
 
+
     /**
      * Get a match.
      * BE AWARE - THIS IS STILL A PLAYGROUND!
@@ -630,57 +641,44 @@
 		match.setPositionsToOffset(pto);
 		match.setLocalDocID(localDocID);
 		match.populateDocument(doc, field, fieldsToLoadLocal);
-
 		if (DEBUG)
 		    log.trace("The document has the id '{}'", match.getDocID());
 
-		if (!info) break;
+		SearchContext context = match.getContext();
 
 		// Search for minimal surrounding sentences
 		if (extendToSentence) {
-
-		    SpanElementQuery squery = new SpanElementQuery(field, "s");
-		    Spans sentence = squery.getSpans(atomic,
-						     (Bits) bitset,
-						     new HashMap<Term, TermContext>());
+		    /*
+		    int[] newPos = match.expandContextToSpan(
+		      atomic,
+		      bitset,
+		      field,
+		      "s"
+		    );
+		    if (newPos[0] > 0)
+			match.setStartPos(newPos[0]);
+		    if (newPos[1] > 0)
+			match.setEndPos(newPos[1]);
 
 		    if (DEBUG)
-			log.trace("Now search for {}", sentence.toString());
-
-		    int newStart = -1, newEnd = -1;
-
-		    while (true) {
-
-			// Game over
-			if (sentence.next() != true)
-			    break;
-
-			// There's an s found, that starts before the match
-			if (sentence.start() <= match.getStartPos()) {
-			    newStart = sentence.start() > newStart ? sentence.start() : newStart;
-
-			}
-			else if (newStart == -1)
-			    break;
-
-			// There's an s found, that ends after the match
-			if (sentence.end() >= match.getEndPos()) {
-			    newEnd = sentence.end();
-			    break;
-			};
-		    };
-
-		    // We have a new match surrounding
-		    if (newStart > -1 && newEnd > -1) {
-			if (DEBUG)
-			    log.trace("New match spans from {}-{}",
-				      newStart,
-				      newEnd);
-			match.setStartPos(newStart);
-			match.setEndPos(newEnd);
-		    };
+			log.trace("Expand context to {}-{}", newPos[0], newPos[1]);
+		    */
+		    int [] spanContext = match.expandContextToSpan("s");
+		    match.setStartPos(spanContext[0]);
+		    match.setEndPos(spanContext[1]);
+		    match.startMore = false;
+		    match.endMore = false;
+		}
+		else {
+		    if (DEBUG)
+			log.trace("Don't expand context");
 		};
+		
+		context.left.setToken(true).setLength(0);
+		context.right.setToken(true).setLength(0);
 
+		if (!info)
+		    break;
 
 		// Limit the terms to all the terms of interest
 		TermsEnum termsEnum = docTerms.intersect(fst, null);
@@ -901,6 +899,8 @@
         );
     };
 
+    // THis should probably be deprecated
+    @Deprecated
     public KorapResult search (SpanQuery query,
 			       int startIndex,
 			       short count,
@@ -908,16 +908,11 @@
 			       short leftContext,
 			       boolean rightTokenContext,
 			       short rightContext) {
-	return this.search(
-	    new KorapCollection(this),
-	    query,
-	    startIndex,
-	    count,
-	    leftTokenContext,
-	    leftContext,
-	    rightTokenContext,
-	    rightContext
-        );
+
+	KorapSearch ks = new KorapSearch(query);
+	ks.setStartIndex(startIndex).setCount(count);
+	ks.setContext(new SearchContext(leftTokenContext, leftContext, rightTokenContext, rightContext));	
+	return this.search(new KorapCollection(this), ks);
     };
 
     public KorapResult search (KorapSearch ks) {
@@ -925,6 +920,7 @@
 	return this.search(new KorapCollection(this), ks);
     };
 
+    @Deprecated
     public KorapResult search (KorapCollection collection,
 			       SpanQuery query,
 			       int startIndex,
@@ -934,9 +930,7 @@
 			       boolean rightTokenContext,
 			       short rightContext) {
 	KorapSearch ks = new KorapSearch(query);
-	ks.setStartIndex(startIndex).setCount(count);
-	ks.leftContext.setToken(leftTokenContext).setLength(leftContext);
-	ks.rightContext.setToken(rightTokenContext).setLength(rightContext);
+	ks.setContext(new SearchContext(leftTokenContext, leftContext, rightTokenContext, rightContext));	
 	return this.search(collection, ks);
     };
 
@@ -957,10 +951,7 @@
 	    query.toString(),
 	    ks.getStartIndex(),
 	    ks.getCount(),
-	    ks.leftContext.isToken(),
-	    ks.leftContext.getLength(),
-	    ks.rightContext.isToken(),
-	    ks.rightContext.getLength()
+	    ks.getContext()
 	);
 
 	if (this.getVersion() != null)

diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index 2072699..d44c1d5 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java

@@ -1,5 +1,7 @@
 package de.ids_mannheim.korap;
 import java.util.*;
+import java.io.*;
+
 import java.lang.StringBuffer;
 import java.nio.ByteBuffer;
 
@@ -10,17 +12,24 @@
 import com.fasterxml.jackson.databind.node.*;
 
 import de.ids_mannheim.korap.index.PositionsToOffset;
+import de.ids_mannheim.korap.index.SearchContext;
 import de.ids_mannheim.korap.document.KorapPrimaryData;
 
 import static de.ids_mannheim.korap.util.KorapHTML.*;
 import de.ids_mannheim.korap.index.MatchIdentifier;
 import de.ids_mannheim.korap.index.PosIdentifier;
+import de.ids_mannheim.korap.query.SpanElementQuery;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
 import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.search.spans.Spans;
 
 /*
   Todo: The implemented classes and private names are horrible!
@@ -42,15 +51,14 @@
     private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
 
     // This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = false;
+    public static final boolean DEBUG = true;
 
     // Mapper for JSON serialization
     ObjectMapper mapper = new ObjectMapper();
 
     // Snippet information
     @JsonIgnore
-    public short leftContextOffset,
-  	         rightContextOffset;
+    public SearchContext context;
 
     // Should be deprecated, but used wildly in tests!
     @JsonIgnore
@@ -63,7 +71,7 @@
     private String error = null;
     private String version;
 
-    // TEMPRARILY
+    // TEMPORARILY
     @JsonIgnore
     public int localDocID = -1;
 
@@ -76,10 +84,6 @@
     int relationNumberCounter   = 2048;
     int identifierNumberCounter = -2;
 
-    @JsonIgnore
-    public boolean leftTokenContext,
-	           rightTokenContext;
-
     private String tempSnippet,
 	           snippetHTML,
 	           snippetBrackets,
@@ -87,8 +91,8 @@
 
     private HighlightCombinator snippetStack;
 
-    private boolean startMore = true,
-	            endMore = true;
+    public boolean startMore = true,
+	           endMore = true;
 
     private Collection<byte[]> payload;
     private ArrayList<Highlight> highlight;
@@ -99,7 +103,7 @@
 
     /**
      * Constructs a new KorapMatch object.
-     * TODo: Maybe that's not necessary!
+     * Todo: Maybe that's not necessary!
      *
      * @param pto The PositionsToOffset object, containing relevant
      *            positional information for highlighting
@@ -113,9 +117,9 @@
      */
     public KorapMatch (PositionsToOffset pto, int localDocID, int startPos, int endPos) {
 	this.positionsToOffset = pto;
-	this.localDocID = localDocID;
-	this.startPos = startPos;
-	this.endPos = endPos;
+	this.localDocID     = localDocID;
+	this.startPos       = startPos;
+	this.endPos         = endPos;
     };
 
     
@@ -520,6 +524,153 @@
     };
 
 
+    public KorapMatch setContext (SearchContext context) {
+	this.context = context;
+	return this;
+    };
+
+    @JsonIgnore
+    public SearchContext getContext () {
+	if (this.context == null)
+	    this.context = new SearchContext();
+	return this.context;
+    };
+    
+
+    // Expand the context to a span
+    public int[] expandContextToSpan (String element) {
+
+	// TODO: THE BITS HAVE TO BE SET!
+	
+	if (this.positionsToOffset != null)
+	    return this.expandContextToSpan(
+	        this.positionsToOffset.getAtomicReader(),
+		(Bits) null,
+		"tokens",
+		element
+	    );
+	return new int[]{0,0,0,0};
+    };
+
+    // Expand the context to a span
+    // TODO: THIS IS PLAIN DUMB MAKE IT MOAR CLEVER! MOAR!!!
+    public int[] expandContextToSpan (AtomicReaderContext atomic,
+				      Bits bitset,
+				      String field,
+				      String element) {
+
+	try {
+	    // Store character offsets in ByteBuffer
+	    ByteBuffer bb = ByteBuffer.allocate(8);
+
+	    SpanElementQuery cquery =
+		new SpanElementQuery(field, element);
+
+	    Spans contextSpans = cquery.getSpans(
+	        atomic,
+		bitset,
+		new HashMap<Term, TermContext>()
+	    );
+
+	    int newStart = -1,
+		newEnd = -1;
+	    int newStartChar = -1,
+		newEndChar = -1;
+
+	    if (DEBUG)
+		log.trace("Extend match to context boundary with {} in {}",
+			  cquery.toString(),
+			  this.localDocID);
+
+	    while (true) {
+
+		// Game over
+		if (contextSpans.next() != true)
+		    break;
+
+		if (contextSpans.doc() != this.localDocID) {
+		    contextSpans.skipTo(this.localDocID);
+		    if (contextSpans.doc() != this.localDocID)
+			break;
+		};
+
+		// There's a <context> found -- I'm curious,
+		// if it's closer to the match than everything before
+		if (contextSpans.start() <= this.getStartPos() &&
+		    contextSpans.end() >= this.getStartPos()) {
+
+		    // Set as newStart
+		    newStart = contextSpans.start() > newStart ?
+			contextSpans.start() : newStart;
+
+		    // Get character offset (start)
+		    if (contextSpans.isPayloadAvailable()) {
+			try {
+			    bb.rewind();
+			    for (byte[] b : contextSpans.getPayload()) {
+
+				// Not an element span
+				if (b.length != 8)
+				    continue;
+
+				bb.put(b);
+				bb.rewind();
+				newStartChar = bb.getInt();
+				newEndChar = bb.getInt();
+				break;
+			    };
+			}
+			catch (Exception e) {
+			    log.warn(e.getMessage());
+			};
+		    };
+		}
+		else {
+		    // Has to be resettet to avoid multiple readings of the payload
+		    newEndChar = 0;
+		};
+		
+		// There's an s found, that ends after the match
+		if (contextSpans.end() >= this.getEndPos()) {
+		    newEnd = contextSpans.end();
+
+		    // Get character offset (end)
+		    if (newEndChar == 0 && contextSpans.isPayloadAvailable()) {
+			try {
+			    bb.rewind();
+			    for (byte[] b : contextSpans.getPayload()) {
+
+				// Not an element span
+				if (b.length != 8)
+				    continue;
+
+				bb.put(b);
+				bb.rewind();
+				newEndChar = bb.getInt(1);
+				break;
+			    };
+			}
+			catch (Exception e) {
+			    log.warn(e.getMessage());
+			};
+		    };
+		    break;
+		};
+	    };
+	    
+	    // We have a new match surrounding
+	    if (DEBUG)
+		log.trace("New match spans from {}-{}/{}-{}", newStart, newEnd, newStartChar, newEndChar);
+
+	    return new int[]{newStart, newEnd, newStartChar, newEndChar};
+	}
+	catch (IOException e) {
+	    log.error(e.getMessage());
+	};
+	
+	return new int[]{-1,-1,-1,-1};
+    };
+
     
     // Reset all internal data
     private void _reset () {
@@ -574,10 +725,7 @@
 	
 	// Get the list of spans for matches and highlighting
 	if (this.span == null || this.span.size() == 0) {
-	    if (!this._processHighlightSpans(
-	            leftTokenContext,
-		    rightTokenContext
-	       ))
+	    if (!this._processHighlightSpans())
 		return false;
 	};
 
@@ -585,6 +733,10 @@
 	// (opening and closing elements)
 	ArrayList<int[]> stack = this._processHighlightStack();
 
+	if (DEBUG)
+	    log.trace("The snippet is {}", this.tempSnippet);
+
+
 	// The temporary snippet is empty, nothing to do
 	if (this.tempSnippet == null) {
 	    processed = true;
@@ -1127,20 +1279,16 @@
     /**
      * This will retrieve character offsets for all spans.
      */
-    private boolean _processHighlightSpans (boolean leftTokenContext,
-					    boolean rightTokenContext) {
+    private boolean _processHighlightSpans () {
 
 	if (DEBUG)
 	    log.trace("--- Process Highlight spans");
 
-	int startOffsetChar,
-	    endOffsetChar,
-	    startPosChar,
-	    endPosChar;
-
 	// Local document ID
 	int ldid = this.localDocID;
 
+	int startPosChar = -1, endPosChar = -1;
+
 	// No positionsToOffset object found
 	if (this.positionsToOffset == null)
 	    return false;
@@ -1154,8 +1302,8 @@
 	// Check potential differing start characters
 	// e.g. from element spans
 	if (potentialStartPosChar != -1 &&
-	    (startPosChar > potentialStartPosChar))
-	    startPosChar = potentialStartPosChar;
+	    (startPosChar > this.potentialStartPosChar))
+	    startPosChar = this.potentialStartPosChar;
 
 	endPosChar = this.positionsToOffset.end(ldid, this.endPos - 1);
 
@@ -1174,98 +1322,17 @@
 		      startPosChar,
 		      endPosChar);
 
-	// left context
-	if (leftTokenContext) {
-	    if (DEBUG)
-		log.trace("PTO will retrieve {} (Left context)",
-			  this.startPos - this.leftContextOffset);
-
-	    startOffsetChar = this.positionsToOffset.start(
-	      ldid,
-	      this.startPos - this.leftContextOffset
-	    );
-	}
-	else {
-	    startOffsetChar = startPosChar - this.leftContextOffset;
-	};
-
-	// right context
-	if (rightTokenContext) {
-	    if (DEBUG)
-		log.trace("PTO will retrieve {} (Right context)",
-			  this.endPos + this.rightContextOffset - 1);
-
-	    endOffsetChar = this.positionsToOffset.end(
-	        ldid,
-		this.endPos + this.rightContextOffset - 1
-	    );
-	}
-	else {
-	    if (endPosChar == -1) {
-		endOffsetChar = -1;
-	    }
-	    else {
-		endOffsetChar = endPosChar + this.rightContextOffset;
-	    };
-	};
-
-	// This can happen in case of non-token characters
-	// in the match and null offsets
-	if (startOffsetChar > startPosChar) {
-	    startOffsetChar = startPosChar;
-	}
-	else if (startOffsetChar < 0) {
-	    startOffsetChar = 0;
-	};
-
-	// No ... at the beginning
-	if (startOffsetChar == 0) {
-	    startMore = false;
-	};
-
-	if (endOffsetChar != -1 && endOffsetChar < endPosChar)
-	    endOffsetChar = endPosChar;
-
-	if (DEBUG)
-	    log.trace("The context spans from chars {}-{}",
-		      startOffsetChar, endOffsetChar);
-
-	if (endOffsetChar > -1 &&
-	    (endOffsetChar < this.getPrimaryDataLength())) {
-	    this.tempSnippet = this.getPrimaryData(
-	        startOffsetChar,
-		endOffsetChar
-	    );
-	}
-	else {
-	    this.tempSnippet = this.getPrimaryData(startOffsetChar);
-	    // endPosChar = this.tempSnippet.length() - 1 + startOffsetChar;
-	    endMore = false;
-	};
-
-	if (DEBUG)
-	    log.trace("Snippet: '" + this.tempSnippet + "'");
+	this.identifier = null;
 
 	// No spans yet
 	if (this.span == null)
 	    this.span = new LinkedList<int[]>();
 
-	this.identifier = null;
+	// Process offset char findings
+	int[] intArray = this._processOffsetChars(ldid, startPosChar, endPosChar);
 
-	// TODO: Simplify
-	int[] intArray = new int[]{
-	    startPosChar - startOffsetChar,
-	    endPosChar - startOffsetChar,
-	    -1,
-	    0};
-
-	if (DEBUG)
-	    log.trace("The match entry is {}-{} ({}-{}) with startOffsetChar {}",
-		      startPosChar - startOffsetChar,
-		      endPosChar - startOffsetChar,
-		      startPosChar,
-		      endPosChar,
-		      startOffsetChar);
+	// Recalculate startOffsetChar
+	int startOffsetChar = startPosChar - intArray[0];
 
 	// Add match span
 	this.span.add(intArray);
@@ -1313,6 +1380,135 @@
     };
 
 
+    // Pass the local docid to retrieve character positions for the offset
+    private int[] _processOffsetChars (int ldid, int startPosChar, int endPosChar) {
+
+	int startOffsetChar = -1, endOffsetChar = -1;
+	int startOffset = -1, endOffset = -1;
+
+	// The offset is defined by a span
+	if (this.getContext().isSpanDefined()) {
+
+	    if (DEBUG)
+		log.trace("Try to expand to <{}>",
+			  this.context.getSpanContext());
+
+	    this.startMore = false;
+	    this.endMore = false;
+
+	    int [] spanContext = this.expandContextToSpan(
+	        this.positionsToOffset.getAtomicReader(),
+	        (Bits) null,
+	        "tokens",
+	        this.context.getSpanContext()
+	    );
+	    startOffset = spanContext[0];
+	    endOffset = spanContext[1];
+	    startOffsetChar = spanContext[2];
+	    endOffsetChar = spanContext[3];
+	    if (DEBUG)
+		log.trace("Got context is based from span {}-{}/{}-{}",
+			  startOffset, endOffset, startOffsetChar, endOffsetChar);
+	};
+
+	// The offset is defined by tokens or characters
+	if (endOffset == -1) {
+
+	    PositionsToOffset pto = this.positionsToOffset;
+	    
+	    // The left offset is defined by tokens
+	    if (this.context.left.isToken()) {
+		startOffset = this.startPos - this.context.left.getLength();
+		if (DEBUG)
+		    log.trace("PTO will retrieve {} (Left context)", startOffset);
+		pto.add(ldid, startOffset);
+	    }
+
+	    // The left offset is defined by characters
+	    else {
+		startOffsetChar = startPosChar - this.context.left.getLength();
+	    };
+
+	    // The right context is defined by tokens
+	    if (this.context.right.isToken()) {
+		endOffset = this.endPos + this.context.right.getLength() -1;
+		if (DEBUG)
+		    log.trace("PTO will retrieve {} (Right context)", endOffset);
+		pto.add(ldid, endOffset);
+
+	    }
+
+	    // The right context is defined by characters
+	    else {
+		endOffsetChar = (endPosChar == -1) ? -1 :
+		    endPosChar + this.context.right.getLength();
+	    };
+
+	    if (startOffset != -1)
+		startOffsetChar = pto.start(ldid, startOffset);
+
+	    if (endOffset != -1)
+		endOffsetChar = pto.end(ldid, endOffset);
+	};
+
+	if (DEBUG)
+	    log.trace("Premature found offsets at {}-{}",
+		      startOffsetChar,
+		      endOffsetChar);
+	
+
+	// This can happen in case of non-token characters
+	// in the match and null offsets
+	if (startOffsetChar > startPosChar)
+	    startOffsetChar = startPosChar;
+	else if (startOffsetChar < 0)
+	    startOffsetChar = 0;
+
+	// No "..." at the beginning
+	if (startOffsetChar == 0)
+	    this.startMore = false;
+
+	if (endOffsetChar != -1 && endOffsetChar < endPosChar)
+	    endOffsetChar = endPosChar;
+
+	if (DEBUG)
+	    log.trace("The context spans from chars {}-{}",
+		      startOffsetChar, endOffsetChar);
+
+	// Get snippet information from the primary data
+	if (endOffsetChar > -1 &&
+	    (endOffsetChar < this.getPrimaryDataLength())) {
+	    this.tempSnippet = this.getPrimaryData(
+		startOffsetChar,
+		endOffsetChar
+	    );
+	}
+	else {
+	    this.tempSnippet = this.getPrimaryData(startOffsetChar);
+	    this.endMore = false;
+	};
+
+	if (DEBUG)
+	    log.trace("Snippet: '" + this.tempSnippet + "'");
+
+	if (DEBUG)
+	    log.trace("The match entry is {}-{} ({}-{}) with absolute offsetChars {}-{}",
+		      startPosChar - startOffsetChar,
+		      endPosChar - startOffsetChar,
+		      startPosChar,
+		      endPosChar,
+		      startOffsetChar,
+		      endOffsetChar);
+
+	// TODO: Simplify
+	return new int[]{
+	    startPosChar - startOffsetChar,
+	    endPosChar - startOffsetChar,
+	    -1,
+	    0};
+    };
+    
+
     // Identical to KorapResult!
     public String toJSON () {
 	ObjectNode json =  (ObjectNode) mapper.valueToTree(this);
@@ -1321,18 +1517,7 @@
 	if (json.size() == 0)
 	    return "{}";
 
-	ArrayNode leftContext = mapper.createArrayNode();
-	leftContext.add(this.leftTokenContext ? "token" : "char");
-	leftContext.add(this.leftContextOffset);
-
-	ArrayNode rightContext = mapper.createArrayNode();
-	rightContext.add(this.rightTokenContext ? "token" : "char");
-	rightContext.add(this.rightContextOffset);
-
-	ObjectNode context = mapper.createObjectNode();
-	context.put("left", leftContext);
-	context.put("right", rightContext);
-	json.put("context", context);
+	json.put("context", this.getContext().toJSON());
 
 	if (this.version != null)
 	    json.put("version", this.getVersion());

diff --git a/src/main/java/de/ids_mannheim/korap/KorapResult.java b/src/main/java/de/ids_mannheim/korap/KorapResult.java
index 68ddac6..e53b7f4 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapResult.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapResult.java

@@ -3,6 +3,7 @@
 import java.util.*;
 import de.ids_mannheim.korap.KorapMatch;
 import de.ids_mannheim.korap.index.PositionsToOffset;
+import de.ids_mannheim.korap.index.SearchContext;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -28,11 +29,9 @@
     private int totalResults = 0;
     private int startIndex = 0;
 
+    private SearchContext context;
+
     private short itemsPerPage = ITEMS_PER_PAGE;
-    private short leftContextOffset = 6,
-	          rightContextOffset = 6;
-    private boolean leftTokenContext,
-	            rightTokenContext;
 
     private String benchmarkSearchResults,
 	           benchmarkHitCounter;
@@ -46,16 +45,13 @@
     private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
 
     // Empty result
-    public KorapResult () {
-    };
+    public KorapResult () {};
+
 
     public KorapResult (String query,
 			int startIndex,
 			short itemsPerPage,
-			boolean leftTokenContext,
-			short leftContextOffset,
-			boolean rightTokenContext,
-			short rightContextOffset) {
+			SearchContext context) {
 
 	mapper.enable(SerializationFeature.INDENT_OUTPUT);
 	// mapper.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS);
@@ -65,11 +61,7 @@
 	this.query = query;
 	this.startIndex = startIndex;
 	this.itemsPerPage = (itemsPerPage > 50 || itemsPerPage < 1) ? ITEMS_PER_PAGE : itemsPerPage;
-	this.leftContextOffset = leftContextOffset;
-	this.rightContextOffset = rightContextOffset;
-
-	this.leftTokenContext = leftTokenContext;
-	this.rightTokenContext = rightTokenContext;
+	this.context = context;
     };
 
     public void add (KorapMatch km) {
@@ -81,22 +73,23 @@
 
 	// Temporary - should use the same interface like results
 	// in the future:
-	km.leftContextOffset  = this.leftContextOffset;
-	km.leftTokenContext   = this.leftTokenContext;
-	km.rightContextOffset = this.rightContextOffset;
-	km.rightTokenContext  = this.rightTokenContext;
+	km.setContext(this.context);
 
 	// Add pos for context
 	// That's not really a good position for it,
 	// to be honest ...
 	// But maybe it will make the offset
 	// information in the match be obsolete!
+
+	// TODO:
+	/*
 	if (km.leftTokenContext) {
 	    pto.add(localDocID, startPos - this.leftContextOffset);
 	};
 	if (km.rightTokenContext) {
 	    pto.add(localDocID, endPos + this.rightContextOffset - 1);
 	};
+	*/
 
 	this.add(km);
 	return km;
@@ -194,22 +187,22 @@
 	return startIndex;
     };
 
+
+    public KorapResult setContext (SearchContext context) {
+	this.context = context;
+	return this;
+    };
+
+    @JsonIgnore
+    public SearchContext getContext () {
+	return this.context;
+    };
+
     // Identical to KorapMatch!
     public String toJSON () {
 	ObjectNode json =  (ObjectNode) mapper.valueToTree(this);
 
-	ArrayNode leftContext = mapper.createArrayNode();
-	leftContext.add(this.leftTokenContext ? "token" : "char");
-	leftContext.add(this.leftContextOffset);
-
-	ArrayNode rightContext = mapper.createArrayNode();
-	rightContext.add(this.rightTokenContext ? "token" : "char");
-	rightContext.add(this.rightContextOffset);
-
-	ObjectNode context = mapper.createObjectNode();
-	context.put("left", leftContext);
-	context.put("right", rightContext);
-	json.put("context", context);
+	json.put("context", this.getContext().toJSON());
 
 	if (this.version != null)
 	    json.put("version", this.version);

diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index 8843e31..0737415 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java

@@ -8,6 +8,7 @@
 import de.ids_mannheim.korap.KorapIndex;
 import de.ids_mannheim.korap.KorapResult;
 import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.index.SearchContext;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.JsonNode;
@@ -36,68 +37,13 @@
 
     private JsonNode request;
 
-    public KorapSearchContext leftContext, rightContext;
+    public SearchContext context;
+    private String spanContext;
 
     {
-	leftContext  = new KorapSearchContext();
-	rightContext = new KorapSearchContext();
+	context  = new SearchContext();
     };
 
-    public class KorapSearchContext {
-	private boolean type = true;
-	private short length = 6, maxLength = 300;
-
-	public boolean isToken () {
-	    return this.type;
-	};
-
-	public boolean isCharacter () {
-	    return !(this.type);
-	};
-
-	public KorapSearchContext setToken (boolean value) {
-	    this.type = value;
-	    return this;
-	};
-
-	public KorapSearchContext setCharacter (boolean value) {
-	    this.type = !(value);
-	    return this;
-	};
-
-	public short getLength() {
-	    return this.length;
-	};
-
-	public KorapSearchContext setLength (short value) {
-	    if (value >= 0) {
-		if (value <= maxLength) {
-		    this.length = value;
-		}
-		else {
-		    this.length = this.maxLength;
-		};
-	    };
-	    return this;
-	};
-
-	public KorapSearchContext setLength (int value) {
-	    return this.setLength((short) value);
-	};
-
-	public void fromJSON (JsonNode json) {
-	    String type = json.get(0).asText();
-	    if (type.equals("token")) {
-		this.setToken(true);
-	    }
-	    else if (type.equals("char")) {
-		this.setCharacter(true);
-	    };
-	    this.setLength(json.get(1).asInt());
-	};
-    };
-
-
     public KorapSearch (String jsonString) {
 	ObjectMapper mapper = new ObjectMapper();
 	try {
@@ -141,14 +87,8 @@
 			this.setCutOff(meta.get("cutOff").asBoolean());
 
 		    // Defined contexts
-		    if (meta.has("context")) {
-			JsonNode context = meta.get("context");
-			if (context.has("left"))
-			    this.leftContext.fromJSON(context.get("left"));
-
-			if (context.has("right"))
-			    this.rightContext.fromJSON(context.get("right"));
-		    };
+		    if (meta.has("context"))
+			this.context.fromJSON(meta.get("context"));
 		};
 	    };
 	}
@@ -194,6 +134,16 @@
 	return this;
     };
 
+    public SearchContext getContext () {
+	return this.context;
+    };
+
+
+    public KorapSearch setContext (SearchContext context) {
+	this.context = context;
+	return this;
+    };
+
     public int getStartIndex () {
 	return this.startIndex;
     };

diff --git a/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java b/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java
index eae79c1..57375d0 100644
--- a/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java
+++ b/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java

@@ -20,17 +20,18 @@
     private AtomicReaderContext atomic;
     private boolean processed = false;
     private Integer[] pair;
-    private static ByteBuffer bbOffset = ByteBuffer.allocate(8);
+    private static ByteBuffer bbOffset =
+	ByteBuffer.allocate(8);
 
     HashSet<PositionsToOffsetArray> positions;
     HashMap<PositionsToOffsetArray, Integer[]> offsets;
 
-    private final static Logger log = LoggerFactory.getLogger(PositionsToOffset.class);
+    private final static Logger log =
+	LoggerFactory.getLogger(PositionsToOffset.class);
 
     // This advices the java compiler to ignore all loggings
     public static final boolean DEBUG = false;
 
-
     private class PositionsToOffsetArray {
 	public int docID;
 	public int pos;
@@ -245,4 +246,8 @@
 	positions.clear();
 	return offsets;
     };
+
+    public AtomicReaderContext getAtomicReader () {
+	return this.atomic;
+    };
 };

diff --git a/src/main/java/de/ids_mannheim/korap/index/SearchContext.java b/src/main/java/de/ids_mannheim/korap/index/SearchContext.java
new file mode 100644
index 0000000..3c72456
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/index/SearchContext.java

@@ -0,0 +1,151 @@
+package de.ids_mannheim.korap.index;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.*;
+import com.fasterxml.jackson.annotation.*;
+
+
+public class SearchContext {
+    ObjectMapper mapper = new ObjectMapper();
+
+    
+    private boolean spanType = false;
+
+    @JsonIgnore
+    public SearchContextSide left, right;
+
+    @JsonIgnore
+    public String spanContext;
+
+    {
+	left  = new SearchContextSide();
+	right = new SearchContextSide();
+    };
+
+    public SearchContext () {};
+
+    public SearchContext (String spanContext) {
+	this.spanType = true;
+	this.spanContext = spanContext;
+    };
+
+    public SearchContext (boolean leftTokenContext,
+			  short leftContext,
+			  boolean rightTokenContext,
+			  short rightContext) {
+	this.spanType = false;
+	this.left.setToken(leftTokenContext);
+	this.left.setLength(leftContext);
+	this.right.setToken(leftTokenContext);
+	this.right.setLength(rightContext);
+    };
+
+    public boolean isSpanDefined () {
+	return this.spanType;
+    };
+
+    public String getSpanContext () {
+	return this.spanContext;
+    };
+
+    public SearchContext setSpanContext (String spanContext) {
+	this.spanType = true;
+
+	if (spanContext.equals("sentence")) {
+	    spanContext = "s";
+	}
+	else if (spanContext.equals("paragraph")) {
+	    spanContext = "p";
+	};
+	
+	this.spanContext = spanContext;
+	return this;
+    };
+
+    public class SearchContextSide {
+	private boolean type = true;
+	private short length = 6;
+	private short maxLength = 500;
+	
+	public boolean isToken () {
+	    return this.type;
+	};
+	
+	public boolean isCharacter () {
+	    return !(this.type);
+	};
+
+	public SearchContextSide setToken (boolean value) {
+	    this.type = value;
+	    return this;
+	};
+
+	public SearchContextSide setCharacter (boolean value) {
+	    this.type = !(value);
+	    return this;
+	};
+
+	public short getLength() {
+	    return this.length;
+	};
+	
+	public SearchContextSide setLength (short value) {
+	    if (value >= 0) {
+		if (value <= maxLength) {
+		    this.length = value;
+		}
+		else {
+		    this.length = this.maxLength;
+		};
+	    };
+	    return this;
+	};
+
+	public SearchContextSide setLength (int value) {
+	    return this.setLength((short) value);
+	};
+
+	public void fromJSON (JsonNode json) {
+	    String type = json.get(0).asText();
+	    if (type.equals("token")) {
+		this.setToken(true);
+	    }
+	    else if (type.equals("char")) {
+		this.setCharacter(true);
+	    };
+	    this.setLength(json.get(1).asInt(this.length));
+	};
+    };
+
+
+    public void fromJSON (JsonNode context) {
+	if (context.isContainerNode()) {
+	    if (context.has("left"))
+		this.left.fromJSON(context.get("left"));
+	    
+	    if (context.has("right"))
+		this.right.fromJSON(context.get("right"));
+	}
+	else if (context.isValueNode()) {
+	    this.setSpanContext(context.asText());
+	};
+    };
+
+    public ObjectNode toJSON () {
+	ArrayNode leftContext = mapper.createArrayNode();
+	leftContext.add(this.left.isToken() ? "token" : "char");
+	leftContext.add(this.left.getLength());
+
+	ArrayNode rightContext = mapper.createArrayNode();
+	rightContext.add(this.right.isToken() ? "token" : "char");
+	rightContext.add(this.right.getLength());
+
+	ObjectNode context = mapper.createObjectNode();
+	context.put("left", leftContext);
+	context.put("right", rightContext);
+
+	return context;
+    };
+
+};

diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index 14ff683..6de3fb7 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java

@@ -31,7 +31,7 @@
 
     private final static Logger log = LoggerFactory.getLogger(ClassSpans.class);
     // This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = true;
+    public static final boolean DEBUG = false;
 
     public ClassSpans (SpanQuery highlight,
 		       AtomicReaderContext context,

diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index 02dfdcf..d1a8759 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java

@@ -30,6 +30,10 @@
 	private boolean hasMoreFirstSpan;
 	
 	private Logger log = LoggerFactory.getLogger(NextSpans.class);
+
+    // This advices the java compiler to ignore all loggings
+    public static final boolean DEBUG = false;
+
 	
     public NextSpans (SpanNextQuery spanNextQuery,
 		      AtomicReaderContext context,
@@ -59,12 +63,13 @@
 			    matchEndPosition = matchList.get(0).getEnd();
 			    if (collectPayloads)
 			    	matchPayload.addAll( matchList.get(0).getPayloads() );
-			    
-			    log.trace("Match doc#: {}",matchDocNumber);
-				log.trace("Match positions: {}-{}", matchStartPosition, 
-						matchEndPosition);
-				matchList.remove(0);
-				return true;
+			    if (DEBUG) {
+				log.trace("Match doc#: {}",matchDocNumber);
+				log.trace("Match positions: {}-{}", matchStartPosition,
+					  matchEndPosition);
+			    };
+			    matchList.remove(0);
+			    return true;
 			}
 			// Forward firstspan
 			hasMoreFirstSpan = firstSpans.next();

diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index af72c2d..5050e55 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties

@@ -9,8 +9,8 @@
 #log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
 #log4j.logger.de.ids_mannheim.korap.query.spans.NextSpans = TRACE, stdout
 #log4j.logger.de.ids_mannheim.korap.query.spans.SimpleSpans = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.query.spans.ClassSpans = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.query.spans.ClassSpans = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
 
 # Collections
 #log4j.logger.de.ids_mannheim.korap.KorapFilter = TRACE, stdout
@@ -18,12 +18,11 @@
 
 
 # Results:
-# log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
 #log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
 #log4j.logger.de.ids_mannheim.korap.index.PositionsToOffset = TRACE, stdout
 
 #log4j.logger.de.ids_mannheim.korap.index.TestSegmentIndex = TRACE, stdout
-
 #log4j.logger.de.ids_mannheim.korap.analysis.MultiTermTokenStream = TRACE, stdout
 
 log4j.appender.stdout=org.apache.log4j.ConsoleAppender
commit	1e5d5944a1737f779e517fa805e91d4a10e318b8	[log] [tgz]
author	Nils Diewald <nils@diewald-online.de>	Tue May 20 13:29:53 2014 +0000
committer	Nils Diewald <nils@diewald-online.de>	Tue May 20 13:29:53 2014 +0000
tree	f37fb8cbc2ee9644b1edc5cc47d8a2327a691189
parent	d216a03b070e391301de5fbc68d6dbe2b25dac87 [diff]