Added sentence expansion for match info
diff --git a/CHANGES b/CHANGES
index 0d8e3fc..8d4f839 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,6 @@
+0.30.1 2014-02-13
+        - Added sentence extension for match info (diewald)
+
 0.30 2014-02-13
         - This is a major version (prepared for the IDS meeting on the 17th of february)
 	- Improved stringification for distance queries (margaretha)
diff --git a/pom.xml b/pom.xml
index cb77ece..03d4809 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,7 +11,7 @@
 -->
   <groupId>KorAP-modules</groupId>
   <artifactId>KorAP-lucene-index</artifactId>
-  <version>0.30</version>
+  <version>0.30.1</version>
   <packaging>jar</packaging>
 
   <name>KorAP-lucene-index</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 34f4962..d2825dc 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -75,6 +75,7 @@
 import de.ids_mannheim.korap.index.TermInfo;
 import de.ids_mannheim.korap.index.SpanInfo;
 import de.ids_mannheim.korap.index.MatchIdentifier;
+import de.ids_mannheim.korap.query.SpanElementQuery;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -461,7 +462,7 @@
 
 
     public KorapMatch getMatch (String id) {
-	return this.getMatchInfo(id, "tokens", false, null, null, false, true);
+	return this.getMatchInfo(id, "tokens", false, null, null, false, true, false);
     };
 
     public KorapMatch getMatchInfo (String id,
@@ -470,7 +471,17 @@
 				    String layer,
 				    boolean includeSpans,
 				    boolean includeHighlights) {
-	return this.getMatchInfo(id, field, true, foundry, layer, includeSpans, includeHighlights);
+	return this.getMatchInfo(id, field, true, foundry, layer, includeSpans, includeHighlights, false);
+    };
+
+    public KorapMatch getMatchInfo (String id,
+				    String field,
+				    String foundry,
+				    String layer,
+				    boolean includeSpans,
+				    boolean includeHighlights,
+				    boolean extendToSentence) {
+	return this.getMatchInfo(id, field, true, foundry, layer, includeSpans, includeHighlights, extendToSentence);
     };
 
     /**
@@ -487,7 +498,8 @@
 				    String foundry,
 				    String layer,
 				    boolean includeSpans,
-				    boolean includeHighlights) {
+				    boolean includeHighlights,
+				    boolean extendToSentence) {
 
 	KorapMatch match = new KorapMatch(idString, includeHighlights);
 
@@ -577,6 +589,47 @@
 
 		if (!info) break;
 
+		// Search for minimal surrounding sentences
+		if (extendToSentence) {
+
+		    SpanElementQuery squery = new SpanElementQuery(field, "s");
+		    Spans sentence = squery.getSpans(atomic,
+						     (Bits) bitset,
+						     new HashMap<Term, TermContext>());
+
+		    log.trace("Now search for {}", sentence.toString());
+
+		    int newStart = -1, newEnd = -1;
+
+		    while (true) {
+
+			// Game over
+			if (sentence.next() != true)
+			    break;
+
+			// There's an s found, that starts before the match
+			if (sentence.start() <= match.getStartPos()) {
+			    newStart = sentence.start() > newStart ? sentence.start() : newStart;
+			}
+			else if (newStart == -1)
+			    break;
+			
+			// There's an s found, that ends after the match
+			if (sentence.end() >= match.getEndPos()) {
+			    newEnd = sentence.end();
+			    break;
+			};
+		    };
+
+		    // We have a new match surrounding
+		    if (newStart > -1 && newEnd > -1) {
+			log.trace("New match spans from {}-{}", newStart, newEnd);
+			match.setStartPos(newStart);
+			match.setEndPos(newEnd);
+		    };
+		};
+
+
 		// Limit the terms to all the terms of interest
 		TermsEnum termsEnum = docTerms.intersect(fst, null);
 
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index 51308d1..6c80e4c 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -208,6 +208,9 @@
 	assertEquals("... Orte in [Norwegen]: Å i ...", kr.match(2).getSnippetBrackets());
 	assertEquals("WPD_AAA.00005", kr.match(2).getDocID());
 
+	/*
+	System.err.println(ki.getMatchInfo(kr.match(2).getID(), "tokens", "xip", "l", true, false).getSnippetHTML());
+	*/
 
 	query = kq.seg("tt/l:Vokal").without("mate/m:number:sg").toQuery();
 	kr = ki.search(query, 0, (short) 5, true, (short) 2, false, (short) 5);
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index ffbf04e..b556c6f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -479,6 +479,33 @@
     };
 
 
+    @Test
+    public void indexExample7SentenceExpansion () throws IOException {
+	KorapIndex ki = new KorapIndex();
+	ki.addDoc(createSimpleFieldDoc());
+	ki.commit();
+
+	KorapMatch km = ki.getMatchInfo("match-c1!d1-p3-4",
+			     "tokens",
+			     null,
+			     null,
+			     false,
+			     false);
+
+	assertEquals("... [{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}] ...",
+		     km.getSnippetBrackets());
+
+	km = ki.getMatchInfo("match-c1!d1-p3-4",
+			     "tokens",
+			     null,
+			     null,
+			     false,
+			     false,
+			     true);
+
+	assertEquals("... [{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}] ...",
+		     km.getSnippetBrackets());
+    };
 
     private FieldDocument createSimpleFieldDoc(){
 	FieldDocument fd = new FieldDocument();
@@ -488,7 +515,7 @@
 		 "abcabcabac",
 		 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<i>4|_0#0-1|-:t$<i>10]" +
 		 "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1#1-2]" +
-		 "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2#2-3]" +
+		 "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2#2-3|<>:s#2-5$<i>5]" +
 		 "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<i>1|_3#3-4]" +
 		 "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4#4-5]" +
 		 "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5#5-6]" +
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
index 979dacd..780a397 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
@@ -200,11 +200,19 @@
     public void queryJSONcosmas4 () {
 	SpanQueryWrapperInterface sqwi = jsonQuery(getClass().getResource("/queries/cosmas4.json").getFile());
 
-	// "das /+w1:3,s1 Buch"
+	// "das /+w1:3,s1:1 Buch"
 	assertEquals(sqwi.toQuery().toString(), "spanMultipleDistance(tokens:s:das, tokens:s:Buch, [(w[1:3], ordered, notExcluded), (s[1:1], ordered, notExcluded)])");
     };
 
     @Test
+    public void queryJSONcosmas4b () {
+	SpanQueryWrapperInterface sqwi = jsonQuery(getClass().getResource("/queries/cosmas4b.json").getFile());
+
+	// "das /+w1:3,s1 Buch"
+	assertEquals(sqwi.toQuery().toString(), "spanMultipleDistance(tokens:s:das, tokens:s:Buch, [(w[1:3], ordered, notExcluded), (s[0:1], ordered, notExcluded)])");
+    };
+
+    @Test
     public void queryJSONcosmas10 () {
 	SpanQueryWrapperInterface sqwi = jsonQuery(getClass().getResource("/queries/cosmas10.json").getFile());
 
diff --git a/src/test/resources/queries/readme.txt b/src/test/resources/queries/readme.txt
index 2aa2f2e..c5b3d63 100644
--- a/src/test/resources/queries/readme.txt
+++ b/src/test/resources/queries/readme.txt
@@ -20,7 +20,7 @@
 
 // Based on KorAP-querySerialization/examples/
 cosmas3:  "das /+w1:3 Buch" # word-distance constraint
-cosmas4:  "das /+w1:3,s1 Buch" # combined word-distance and sent-distance constraint
+cosmas4:  "das /+w1:3,s1:1 Buch" # combined word-distance and sent-distance constraint
 cosmas10: "Institut für $deutsche Sprache" # finds both
 cosmas16: "$wegen #IN(L) <s>"  # finds 'wegen' at beginning of sentence, also when capitalised
 cosmas17: "#BED($wegen , +sa)" # equivalent to above