Make match and context size configurable (address #128)

Change-Id: Ieef96dd68adf4e3ce00f59fc21face545c2ce897
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java b/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java
new file mode 100644
index 0000000..3c3d0ff
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java
@@ -0,0 +1,121 @@
+package de.ids_mannheim.korap.index;
+
+import static de.ids_mannheim.korap.TestSimple.getJsonString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.junit.Test;
+
+import de.ids_mannheim.korap.Krill;
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.response.Match;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.KrillProperties;
+import de.ids_mannheim.korap.util.QueryException;
+
+public class TestMaxMatchTokens {
+
+    private KrillIndex ki;
+    private String json;
+
+    public TestMaxMatchTokens () throws IOException {
+        ki = new KrillIndex();
+        // Indexing test files
+        for (String i : new String[] { "00001" }) {
+            ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
+                    true);
+        };
+        ki.commit();
+
+        json = getJsonString(getClass()
+                .getResource("/queries/position/sentence-contain-token.json")
+                .getFile());
+    }
+
+    @Test
+    public void testLimitingMatchWithProperties () throws IOException {
+        // from properties
+        assertEquals(50, ki.getMaxTokenMatchSize());
+
+        Krill ks = new Krill(json);
+        Result kr = ks.apply(ki);
+        Match km = kr.getMatch(0);
+        assertTrue(km.getLength()<ki.getMaxTokenMatchSize());
+    };
+
+    @Test
+    public void testLimitingMatchInKrillIndex () throws IOException {
+        // Limiting default match token size in KrillIndex
+        ki.setMaxTokenMatchSize(2);
+
+        Krill ks = new Krill(json);
+        Result kr = ks.apply(ki);
+        assertEquals(78, kr.getTotalResults());
+
+        assertEquals(
+                "... sechsthäufigste Buchstabe in deutschen Texten. [[Mit Ausnahme]<!>] von Fremdwörtern und Namen ist ...",
+                kr.getMatch(0).getSnippetBrackets());
+        assertEquals(
+                "<span class=\"context-left\"><span class=\"more\"></span>sechsthäufigste Buchstabe in deutschen Texten. </span><span class=\"match\"><mark>Mit Ausnahme</mark><span class=\"cutted\"></span></span><span class=\"context-right\"> von Fremdwörtern und Namen ist<span class=\"more\"></span></span>",
+                kr.getMatch(0).getSnippetHTML());
+    }
+
+    @Test
+    public void testLimitingMatchInKrill () throws IOException {
+        // Change limit via Krill
+        Krill ks = new Krill(json);
+        ks.setMaxTokenMatchSize(3);
+
+        Result kr = ks.apply(ki);
+
+        assertEquals(
+                "... sechsthäufigste Buchstabe in deutschen Texten. [[Mit Ausnahme von]<!>] Fremdwörtern und Namen ist das ...",
+                kr.getMatch(0).getSnippetBrackets());
+        assertEquals(
+                "<span class=\"context-left\"><span class=\"more\"></span>sechsthäufigste Buchstabe in deutschen Texten. </span><span class=\"match\"><mark>Mit Ausnahme von</mark><span class=\"cutted\"></span></span><span class=\"context-right\"> Fremdwörtern und Namen ist das<span class=\"more\"></span></span>",
+                kr.getMatch(0).getSnippetHTML());
+    };
+
+    @Test
+    public void testMatchInfoWithKrillConfig ()
+            throws IOException, QueryException {
+        KrillIndex ki = new KrillIndex();
+        // Indexing test files
+        ki.addDoc(
+                getClass().getResourceAsStream("/wiki/WUD17-C94-39360.json.gz"),
+                true);
+        ki.commit();
+        Match km;
+
+        ArrayList<String> foundry = new ArrayList<String>();
+        foundry.add("opennlp");
+        ArrayList<String> layer = new ArrayList<String>();
+        layer.add("opennlp");
+
+        // maxMatchTokens from properties = 5
+        km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens", false,
+                foundry, layer, false, false, false, false, false);
+
+        assertEquals("... [[g. Artikel vornimmst, wäre es fein]] ...",
+                km.getSnippetBrackets());
+        
+        // lower than limit
+        int maxMatchTokens = 2;
+        km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens",
+                false, foundry, layer, false, false, false, false, true, // extendToSentence
+                maxMatchTokens);
+
+        assertTrue(km.endCutted);
+        assertEquals("... [[g. Artikel]<!>] ...", km.getSnippetBrackets());
+        
+        // more than limit
+        maxMatchTokens = 51;
+        km = ki.getMatchInfo("match-WUD17/C94/39360-p380-431", "tokens",
+                false, foundry, layer, false, false, false, false, true, // extendToSentence
+                maxMatchTokens);
+        assertEquals(KrillProperties.maxTokenMatchSize, (km.getSnippetBrackets().split(" ").length -2));
+    }
+}
diff --git a/src/test/java/de/ids_mannheim/korap/response/TestMatch.java b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
index 7f96e70..3b1d0df 100644
--- a/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
+++ b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
@@ -1,26 +1,26 @@
 package de.ids_mannheim.korap.response;
 
-import de.ids_mannheim.korap.response.Match;
+import static org.junit.Assert.assertEquals;
 
-import static org.junit.Assert.*;
 import org.junit.Test;
-import org.junit.Ignore;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 @RunWith(JUnit4.class)
 public class TestMatch {
 
+    int maxMatchTokens = 50;
+            
     @Test
     public void testNoMatch () {
-        Match m = new Match("aaa", false);
+        Match m = new Match(maxMatchTokens,"aaa", false);
         assertEquals(null, m.getID());
     };
 
 
     @Test
     public void testMatchBug () {
-        Match m = new Match("match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01",
+        Match m = new Match(maxMatchTokens,"match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01",
                 false);
         assertEquals(null, m.getID());
     };
@@ -28,21 +28,21 @@
 
     @Test
     public void testMatchTextSigle1 () {
-        Match m = new Match("match-GOE!GOE_AGK.00000-p60348-60349", false);
+        Match m = new Match(maxMatchTokens,"match-GOE!GOE_AGK.00000-p60348-60349", false);
         assertEquals("GOE_AGK.00000", m.getTextSigle());
     };
 
 
     @Test
     public void testMatchTextSigle2 () {
-        Match m = new Match("match-PRO-DUD!PRO-DUD_KSTA-2013-01.3651-p326-327",
+        Match m = new Match(maxMatchTokens,"match-PRO-DUD!PRO-DUD_KSTA-2013-01.3651-p326-327",
                 false);
         assertEquals("PRO-DUD_KSTA-2013-01.3651", m.getTextSigle());
     };
 
     @Test
     public void testMatchLong () {
-        Match m = new Match("match-PRO-DUD!PRO-DUD_KSTA-2013-01.3651-p326-480",
+        Match m = new Match(maxMatchTokens,"match-PRO-DUD!PRO-DUD_KSTA-2013-01.3651-p326-480",
                 false);
         assertEquals(326, m.getStartPos());
         assertEquals(376, m.getEndPos());
diff --git a/src/test/resources/krill.properties b/src/test/resources/krill.properties
index 0c26284..95f40ca 100644
--- a/src/test/resources/krill.properties
+++ b/src/test/resources/krill.properties
@@ -4,4 +4,7 @@
 krill.index.commit.count = 15
 
 krill.namedVC = queries/collections/named-vcs/
-krill.test = true
\ No newline at end of file
+krill.test = true
+
+krill.match.max.token=50
+krill.context.max.token=3
\ No newline at end of file
diff --git a/src/test/resources/queries/position/sentence-contain-token.json b/src/test/resources/queries/position/sentence-contain-token.json
new file mode 100644
index 0000000..1d67729
--- /dev/null
+++ b/src/test/resources/queries/position/sentence-contain-token.json
@@ -0,0 +1,37 @@
+{
+  "meta":{
+    "startPage" : 2,
+    "count" : 5,
+    "context" : {
+      "left" : [ "token", 5 ],
+      "right" : [ "token", 5 ]
+    }
+  },
+    "query": {
+        "operands": [
+            {
+                "@type": "koral:span",
+                "wrap": {
+                    "foundry": "base",
+                    "@type": "koral:term",
+                    "layer": "s",
+                    "key": "s"
+                }
+            },
+            {
+                "@type": "koral:token",
+                "wrap": {
+                    "foundry": "opennlp",
+                    "@type": "koral:term",
+                    "match": "match:eq",
+                    "key": "ADJA",
+                    "layer": "p"
+                }
+            }
+        ],
+        "frames": ["frames:contains"],
+        "@type": "koral:group",
+        "operation": "operation:position"
+    },
+    "@context": "http://korap.ids-mannheim.de/ns/koral/0.3/context.jsonld"
+}