Re-introduce krill.context.max.char support
Change-Id: I75c0654af1fddfe6d1c5ba1842b9f0ce5b6b0c8a
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
index 6a2485e..d88e26b 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
@@ -105,6 +105,7 @@
// EM: not implemented yet
// String maxCharContextSize = prop.getProperty("krill.context.max.char");
+ String maxCharContextSize = prop.getProperty("krill.context.max.char");
String defaultSearchContextLength = prop.getProperty("krill.search.context.default");
String maxTextSizeValue = prop.getProperty("krill.index.textSize.max");
@@ -117,10 +118,10 @@
KrillProperties.maxTokenContextSize = Integer
.parseInt(maxTokenContextSize);
}
-// if (maxCharContextSize != null) {
-// KrillProperties.maxCharContextSize = Integer
-// .parseInt(maxCharContextSize);
-// }
+ if (maxCharContextSize != null) {
+ KrillProperties.maxCharContextSize = Integer
+ .parseInt(maxCharContextSize);
+ }
if (defaultSearchContextLength != null) {
KrillProperties.defaultSearchContextLength = Integer
.parseInt(defaultSearchContextLength);
diff --git a/src/main/resources/krill.properties.info b/src/main/resources/krill.properties.info
index 081d04b..bbe38d5 100644
--- a/src/main/resources/krill.properties.info
+++ b/src/main/resources/krill.properties.info
@@ -20,7 +20,7 @@
#
# krill.match.max.token = 5
#
-## Maximum number (i.e. length) of tokens to be retrievable.
+## Maximum number (i.e. length) of tokens to be retrievable in a match.
## Matches longer than that will be cut.
## Defaults to 50
@@ -69,5 +69,11 @@
## Note: Only token-based contexts are affected; character-based contexts
## are currently not adjusted by this feature.
-
-
+# krill.context.max.char =
+#
+## Maximum number of characters to be retrieved (left and right) of a match
+## when the client requests character-based context.
+## Character-based contexts are independent of token-based contexts:
+## the client chooses one or the other per request. When character context
+## is used, token-based maxShrink adjustment does NOT apply.
+## Defaults to 500
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMaxContext.java b/src/test/java/de/ids_mannheim/korap/index/TestMaxContext.java
index 3c29a95..f9a4330 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMaxContext.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMaxContext.java
@@ -7,8 +7,8 @@
import java.io.IOException;
import java.util.Properties;
-import org.junit.BeforeClass;
import org.junit.After;
+import org.junit.BeforeClass;
import org.junit.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
@@ -551,6 +551,42 @@
};
@Test
+ public void testMaxCharContextSizeProperty () {
+ Properties props = new Properties();
+ props.setProperty("krill.context.max.char", "100");
+ KrillProperties.updateConfigurations(props);
+ assertEquals(100, KrillProperties.maxCharContextSize);
+ };
+
+ @Test
+ public void testMaxCharContextClampsBracketsSnippet ()
+ throws JsonMappingException, JsonProcessingException {
+ KrillProperties.maxCharContextSize = 50;
+
+ JsonNode jsonNode = mapper.readTree(jsonQuery);
+ ArrayNode leftNode = (ArrayNode) jsonNode.at("/meta/context/left");
+ ArrayNode rightNode = (ArrayNode) jsonNode.at("/meta/context/right");
+ leftNode.set(0, "char");
+ rightNode.set(0, "char");
+ leftNode.set(1, "200");
+ rightNode.set(1, "200");
+
+ Krill ks = new Krill(jsonNode);
+ Result kr = ks.apply(ki);
+
+ SearchContext context = kr.getContext();
+ assertEquals(50, context.left.getLength());
+ assertEquals(50, context.right.getLength());
+
+ Match km = kr.getMatch(0);
+ assertEquals(50, km.getContext().left.getLength());
+ assertEquals(50, km.getContext().right.getLength());
+
+ String rightContext = km.getSnippetBrackets().split("]]")[1];
+ assertEquals(50, rightContext.length() - 4);
+ };
+
+ @Test
public void testKwicMaxTokenBasic () {
// kwic.max.token = matchMax + 2*contextMax - totalShrink
// Setting kwicMaxToken=60 with matchMax=50 and contextMax=25:
@@ -662,4 +698,105 @@
assertEquals(3, KrillProperties.leftContextMaxShrink);
assertEquals(7, KrillProperties.rightContextMaxShrink);
};
-};
+
+ @Test
+ public void testMaxCharContextClampsHTMLSnippet ()
+ throws JsonMappingException, JsonProcessingException {
+ KrillProperties.maxCharContextSize = 50;
+
+ JsonNode jsonNode = mapper.readTree(jsonQuery);
+ ArrayNode leftNode = (ArrayNode) jsonNode.at("/meta/context/left");
+ ArrayNode rightNode = (ArrayNode) jsonNode.at("/meta/context/right");
+ leftNode.set(0, "char");
+ rightNode.set(0, "char");
+ leftNode.set(1, "200");
+ rightNode.set(1, "200");
+
+ Krill ks = new Krill(jsonNode);
+ Result kr = ks.apply(ki);
+ Match km = kr.getMatch(0);
+
+ String html = km.getSnippetHTML();
+ assertTrue(html.contains("<span class=\"context-left\">"));
+ assertTrue(html.contains("<span class=\"match\">"));
+ assertTrue(html.contains("<span class=\"context-right\">"));
+ };
+
+ @Test
+ public void testCharContextBelowMaxIsNotClamped ()
+ throws JsonMappingException, JsonProcessingException {
+ assertEquals(500, KrillProperties.maxCharContextSize);
+
+ JsonNode jsonNode = mapper.readTree(jsonQuery);
+ ArrayNode leftNode = (ArrayNode) jsonNode.at("/meta/context/left");
+ ArrayNode rightNode = (ArrayNode) jsonNode.at("/meta/context/right");
+ leftNode.set(0, "char");
+ rightNode.set(0, "char");
+ leftNode.set(1, "30");
+ rightNode.set(1, "30");
+
+ Krill ks = new Krill(jsonNode);
+ Result kr = ks.apply(ki);
+
+ SearchContext context = kr.getContext();
+ assertEquals(30, context.left.getLength());
+ assertEquals(30, context.right.getLength());
+
+ Match km = kr.getMatch(0);
+ assertEquals(30, km.getContext().left.getLength());
+ assertEquals(30, km.getContext().right.getLength());
+
+ String rightContext = km.getSnippetBrackets().split("]]")[1];
+ assertEquals(30, rightContext.length() - 4);
+ };
+
+ @Test
+ public void testMaxCharContextViaPropertiesAffectsSearch ()
+ throws JsonMappingException, JsonProcessingException {
+ Properties props = new Properties();
+ props.setProperty("krill.context.max.char", "80");
+ KrillProperties.updateConfigurations(props);
+ assertEquals(80, KrillProperties.maxCharContextSize);
+
+ JsonNode jsonNode = mapper.readTree(jsonQuery);
+ ArrayNode leftNode = (ArrayNode) jsonNode.at("/meta/context/left");
+ ArrayNode rightNode = (ArrayNode) jsonNode.at("/meta/context/right");
+ leftNode.set(0, "char");
+ rightNode.set(0, "char");
+ leftNode.set(1, "200");
+ rightNode.set(1, "200");
+
+ Krill ks = new Krill(jsonNode);
+ Result kr = ks.apply(ki);
+
+ SearchContext context = kr.getContext();
+ assertEquals(80, context.left.getLength());
+ assertEquals(80, context.right.getLength());
+
+ Match km = kr.getMatch(0);
+ assertEquals(80, km.getContext().left.getLength());
+ assertEquals(80, km.getContext().right.getLength());
+
+ String rightContext = km.getSnippetBrackets().split("]]")[1];
+ assertEquals(80, rightContext.length() - 4);
+ };
+
+ @Test
+ public void testMaxCharContextDoesNotAffectTokenContext ()
+ throws JsonMappingException, JsonProcessingException {
+ KrillProperties.maxCharContextSize = 10;
+
+ Krill ks = new Krill(jsonQuery);
+ Result kr = ks.apply(ki);
+
+ SearchContext context = kr.getContext();
+ assertEquals(5, context.left.getLength());
+ assertEquals(5, context.right.getLength());
+ assertTrue(context.left.isToken());
+ assertTrue(context.right.isToken());
+
+ Match km = kr.getMatch(0);
+ assertEquals(5, km.getContext().left.getLength());
+ assertEquals(5, km.getContext().right.getLength());
+ };
+}