Separate max length for token and char context.
Change-Id: I4ff9f3e454aa4dc2819a98a3c2f08acdcdec82b2
diff --git a/Changes b/Changes
index dcee624..601eb41 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,7 @@
-0.62.4 2024-05-17
+0.62.4 2024-05-22
- [feature] Make match and context size configurable (address #128,
diewald & margaretha)
+ - [enhancement] Separate max length for token and char context (margaretha)
0.62.3 2024-04-16
- [cleanup] Added getDocBitsSupplier to VirtualCorpusFilter (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/response/SearchContext.java b/src/main/java/de/ids_mannheim/korap/response/SearchContext.java
index ab38b41..7deb78a 100644
--- a/src/main/java/de/ids_mannheim/korap/response/SearchContext.java
+++ b/src/main/java/de/ids_mannheim/korap/response/SearchContext.java
@@ -1,11 +1,13 @@
package de.ids_mannheim.korap.response;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.node.*;
-import com.fasterxml.jackson.annotation.*;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.fasterxml.jackson.databind.node.TextNode;
+import de.ids_mannheim.korap.util.KrillProperties;
public class SearchContext {
ObjectMapper mapper = new ObjectMapper();
@@ -25,14 +27,14 @@
public SearchContext () {};
-
-
+
+ // EM: not used?
public SearchContext (String spanContext) {
this.spanType = true;
this.spanContext = spanContext;
};
-
+ // EM: seems to be deprecated. used in a deprecated search method
public SearchContext (boolean leftTokenContext, short leftContext,
boolean rightTokenContext, short rightContext) {
this.spanType = false;
@@ -70,65 +72,81 @@
};
public class SearchContextSide {
- private boolean type = true;
- private short length = 6;
- private short maxLength = 500;
+ private boolean isToken = true;
+ private int length = 6;
+ private int maxTokenLength = KrillProperties.maxTokenContextSize;
+ private int maxCharLength = KrillProperties.maxCharContextSize;
-
+ public SearchContextSide () {}
+
+ public int getMaxTokenLength () {
+ return maxTokenLength;
+ }
+ public void setMaxTokenLength (int maxLength) {
+ this.maxTokenLength = maxLength;
+ }
+
+ public int getMaxCharLength () {
+ return maxCharLength;
+ }
+ public void setMaxCharLength (int maxCharLength) {
+ this.maxCharLength = maxCharLength;
+ }
+
+
public boolean isToken () {
- return this.type;
+ return this.isToken;
};
public boolean isCharacter () {
- return !(this.type);
+ return !(this.isToken);
};
public SearchContextSide setToken (boolean value) {
- this.type = value;
+ this.isToken = value;
return this;
};
public SearchContextSide setCharacter (boolean value) {
- this.type = !(value);
+ this.isToken = !(value);
return this;
};
- public short getLength () {
+ public int getLength () {
return this.length;
};
- public SearchContextSide setLength (short value) {
+ public SearchContextSide setLength (int value) {
+ int maxLength = (isToken) ? maxTokenLength : maxCharLength;
+
if (value >= 0) {
if (value <= maxLength) {
this.length = value;
}
else {
- this.length = this.maxLength;
+ this.length = maxLength;
};
};
return this;
};
- public SearchContextSide setLength (int value) {
- return this.setLength((short) value);
- };
-
-
public void fromJson (JsonNode json) {
String type = json.get(0).asText();
+ int length = json.get(1).asInt(this.length);
if (type.equals("token")) {
this.setToken(true);
+
}
else if (type.equals("char")) {
this.setCharacter(true);
};
- this.setLength(json.get(1).asInt(this.length));
+ this.setLength(length);
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMaxContext.java b/src/test/java/de/ids_mannheim/korap/index/TestMaxContext.java
new file mode 100644
index 0000000..239d9eb
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMaxContext.java
@@ -0,0 +1,126 @@
+package de.ids_mannheim.korap.index;
+
+import static de.ids_mannheim.korap.TestSimple.getJsonString;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+
+import de.ids_mannheim.korap.Krill;
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.response.Match;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.response.SearchContext;
+import de.ids_mannheim.korap.util.KrillProperties;
+
+public class TestMaxContext {
+ private static KrillIndex ki;
+ private static String jsonQuery;
+ public static ObjectMapper mapper = new ObjectMapper();
+
+ @BeforeClass
+ public static void init () throws IOException {
+ ki = new KrillIndex();
+ for (String i : new String[] { "00001" }) {
+ ki.addDoc(TestMaxContext.class
+ .getResourceAsStream("/wiki/" + i + ".json.gz"), true);
+ };
+ ki.commit();
+
+ // left and right contexts: token 5
+ jsonQuery = getJsonString(TestMaxContext.class
+ .getResource("/queries/position/sentence-contain-token.json")
+ .getFile());
+ }
+
+ @Test
+ public void testTokenContextSize () throws IOException {
+
+ assertEquals(25, KrillProperties.maxTokenContextSize);
+
+ Krill ks = new Krill(jsonQuery);
+ Result kr = ks.apply(ki);
+
+ SearchContext context = kr.getContext();
+ assertEquals(KrillProperties.maxTokenContextSize,
+ context.left.getMaxTokenLength()); // default
+ assertEquals(KrillProperties.maxTokenContextSize,
+ context.right.getMaxTokenLength());
+ assertEquals(5, context.left.getLength());
+ assertEquals(5, context.right.getLength());
+
+ Match km = kr.getMatch(0);
+ assertEquals(5, km.getContext().left.getLength());
+ assertEquals(5, km.getContext().right.getLength());
+ };
+
+ @Test
+ public void searchWithLargerContextTokenSize ()
+ throws JsonMappingException, JsonProcessingException {
+ String query = new String(jsonQuery);
+ JsonNode jsonNode = mapper.readTree(query);
+ ArrayNode leftNode = (ArrayNode) jsonNode.at("/meta/context/left");
+ ArrayNode rightNode = (ArrayNode) jsonNode.at("/meta/context/right");
+ leftNode.set(1, "70");
+ rightNode.set(1, "70");
+
+ Krill ks = new Krill(jsonNode);
+ Result kr = ks.apply(ki);
+ kr = ks.apply(ki);
+
+ SearchContext context = kr.getContext();
+ assertEquals(KrillProperties.maxTokenContextSize,
+ context.left.getLength());
+ assertEquals(KrillProperties.maxTokenContextSize,
+ context.right.getLength());
+
+ Match km = kr.getMatch(0);
+ assertEquals(KrillProperties.maxTokenContextSize,
+ km.getContext().left.getLength());
+ assertEquals(KrillProperties.maxTokenContextSize,
+ km.getContext().right.getLength());
+
+ String rightContext = km.getSnippetBrackets().split("]]")[1];
+ assertEquals(KrillProperties.maxTokenContextSize,
+ rightContext.split(" ").length - 2);
+ }
+
+
+ @Test
+ public void searchWithLargerContextCharSize ()
+ throws JsonMappingException, JsonProcessingException {
+ JsonNode jsonNode = mapper.readTree(jsonQuery);
+ ArrayNode leftNode = (ArrayNode) jsonNode.at("/meta/context/left");
+ ArrayNode rightNode = (ArrayNode) jsonNode.at("/meta/context/right");
+ leftNode.set(0, "char");
+ rightNode.set(0, "char");
+ leftNode.set(1, "600");
+ rightNode.set(1, "600");
+
+ Krill ks = new Krill(jsonNode);
+ Result kr = ks.apply(ki);
+
+ SearchContext context = kr.getContext();
+ assertEquals(KrillProperties.maxCharContextSize,
+ context.left.getLength());
+ assertEquals(KrillProperties.maxCharContextSize,
+ context.right.getLength());
+
+ Match km = kr.getMatch(0);
+ assertEquals(KrillProperties.maxCharContextSize,
+ km.getContext().left.getLength());
+ assertEquals(KrillProperties.maxCharContextSize,
+ km.getContext().right.getLength());
+
+ String rightContext = km.getSnippetBrackets().split("]]")[1];
+ assertEquals(KrillProperties.maxCharContextSize,rightContext.length() -4);
+ }
+}
diff --git a/src/test/resources/krill.properties b/src/test/resources/krill.properties
index 95f40ca..3714c0c 100644
--- a/src/test/resources/krill.properties
+++ b/src/test/resources/krill.properties
@@ -7,4 +7,4 @@
krill.test = true
krill.match.max.token=50
-krill.context.max.token=3
\ No newline at end of file
+krill.context.max.token=25