Make match expansion configurable (#150)
Change-Id: Ie4eb9098f6e7352918e1fd0d3bf74615d3508e7e
diff --git a/Changes b/Changes
index 37d2ffc..e0b9400 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.62.6 2024-06-13
+ - [feature] Make match expansion configurable (close #150, margaretha)
+
0.62.5 2024-06-11
- [bugfix] cut primary data according to max values (margaretha, #143)
- [enhancement] restrict match expansion by max token and context
diff --git a/pom.xml b/pom.xml
index dbfe782..77435b0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids-mannheim.korap.krill</groupId>
<artifactId>Krill</artifactId>
- <version>0.62.5</version>
+ <version>0.62.6</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index bf14ca2..fa18740 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -1117,9 +1117,11 @@
if (spanContext[0] >= 0
&& spanContext[0] < spanContext[1]) {
-
- int maxExpansionSize = KrillProperties.maxTokenMatchSize
- + KrillProperties.maxTokenContextSize;
+
+ int maxExpansionSize = KrillProperties.maxTokenMatchSize;
+ if (KrillProperties.matchExpansionIncludeContextSize) {
+ maxExpansionSize += KrillProperties.maxTokenContextSize;
+ }
// Match needs to be cutted!
boolean cutExpansion = false;
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
index 1ebc449..6c6d5b4 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
@@ -23,6 +23,8 @@
public static int maxTokenContextSize = 60;
public static int maxCharContextSize = 500;
+ public static boolean matchExpansionIncludeContextSize = false;
+
public static String namedVCPath = "";
public static boolean isTest = false;
@@ -100,6 +102,10 @@
isTest = Boolean.parseBoolean(p);
namedVCPath = prop.getProperty("krill.namedVC", "");
+
+ String matchExpansion = prop.getProperty(
+ "krill.match." + "expansion.includeContextSize", "false");
+ matchExpansionIncludeContextSize = Boolean.parseBoolean(matchExpansion);
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 1bf2677..97aa429 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -30,6 +30,7 @@
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.response.match.MatchIdentifier;
import de.ids_mannheim.korap.response.match.PosIdentifier;
+import de.ids_mannheim.korap.util.KrillProperties;
import de.ids_mannheim.korap.util.QueryException;
@RunWith(JUnit4.class)
@@ -1219,6 +1220,8 @@
@Test
public void indexCorolaTokensBugReplicated () throws IOException, QueryException {
+ KrillProperties.matchExpansionIncludeContextSize=false;
+
KrillIndex ki = new KrillIndex();
ki.addDoc(getClass().getResourceAsStream("/others/corola-bug.json"), false);
@@ -1239,7 +1242,7 @@
String str = km.getSnippetBrackets();
assertTrue(str.contains("[<!>{drukola/l:au:a}"));
- assertFalse(str.contains("<!>]"));
+ assertTrue(str.contains("<!>]"));
km = ki.getMatchInfo("match-Corola-blog/BlogPost/370281_a_371610-p50-51", "tokens", null, null,false, false, true);
@@ -1247,6 +1250,8 @@
str = km.getSnippetBrackets();
assertTrue(str.contains("[<!>{d"));
assertTrue(str.contains("a}<!>]"));
+
+ KrillProperties.matchExpansionIncludeContextSize=true;
};
diff --git a/src/test/resources/krill.properties b/src/test/resources/krill.properties
index 3714c0c..fc26a13 100644
--- a/src/test/resources/krill.properties
+++ b/src/test/resources/krill.properties
@@ -6,5 +6,6 @@
krill.namedVC = queries/collections/named-vcs/
krill.test = true
+krill.match.expansion.includeContextSize = true
krill.match.max.token=50
krill.context.max.token=25