Remove maxTokenMatchSize variable from KrillIndex and getMatchInfo.
Allow requesting maxTokenMatchSize via Krill with the value in
KrillProperties as the limit.
Change-Id: I82a1ad2c3a81abf69168d7cc9f9a6972fb9ba49e
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 40d369e..d1e3cde 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -175,8 +175,6 @@
private HashMap termContexts;
private ObjectMapper mapper = new ObjectMapper();
- private int maxTokenMatchSize;
-
// private ByteBuffer bbTerm;
// Some initializations ...
@@ -193,7 +191,6 @@
String autoCommitStr = null;
if (prop != null) {
autoCommitStr = prop.getProperty("krill.index.commit.auto");
- this.maxTokenMatchSize = KrillProperties.maxTokenMatchSize;
}
if (autoCommitStr != null) {
@@ -243,7 +240,6 @@
this.directory = new MMapDirectory(path);
};
-
/**
* Get the version number of the index.
*
@@ -435,14 +431,6 @@
this.autoCommit = value;
};
- public int getMaxTokenMatchSize () {
- return maxTokenMatchSize;
- }
-
- public void setMaxTokenMatchSize (int maxMatchTokens) {
- this.maxTokenMatchSize = maxMatchTokens;
- }
-
/**
* Update a document in the index as a {@link FieldDocument}
* if it already exists (based on the textSigle), otherwise
@@ -984,20 +972,12 @@
boolean includeSnippets, boolean includeTokens,
boolean includeHighlights, boolean extendToSentence)
throws QueryException {
- return getMatchInfo(idString, field, info, foundry, layer, includeSpans,
- includeSnippets, includeTokens, includeHighlights,
- extendToSentence, maxTokenMatchSize);
- };
-
- public Match getMatchInfo (String idString, String field, boolean info,
- List<String> foundry, List<String> layer, boolean includeSpans,
- boolean includeSnippets, boolean includeTokens,
- boolean includeHighlights, boolean extendToSentence,
- int maxMatchTokens) throws QueryException {
+
if (DEBUG)
log.trace("Get info on {}", idString);
- Match match = new Match(maxMatchTokens, idString, includeHighlights);
+ int maxTokenMatchSize = KrillProperties.maxTokenMatchSize;
+ Match match = new Match(maxTokenMatchSize, idString, includeHighlights);
if (this.getVersion() != null)
match.setVersion(this.getVersion());
@@ -1223,8 +1203,8 @@
&& spanContext[0] < spanContext[1]) {
// Match needs to be cutted!
- if ((spanContext[1] - spanContext[0]) > maxMatchTokens) {
- int contextLength = maxMatchTokens - match.getLength();
+ if ((spanContext[1] - spanContext[0]) > maxTokenMatchSize) {
+ int contextLength = maxTokenMatchSize - match.getLength();
int halfContext = contextLength / 2;
// This is the extended context calculated
@@ -1237,8 +1217,8 @@
}
}
- match.setStartPos(maxMatchTokens,spanContext[0]);
- match.setEndPos(maxMatchTokens,spanContext[1]);
+ match.setStartPos(maxTokenMatchSize,spanContext[0]);
+ match.setEndPos(maxTokenMatchSize,spanContext[1]);
match.potentialStartPosChar = spanContext[2];
match.potentialEndPosChar = spanContext[3];
match.startMore = false;
@@ -1591,9 +1571,10 @@
? lreader.document(localDocID, fieldsSet)
: lreader.document(localDocID);
- int maxMatchSize = maxTokenMatchSize;
- if (ks.getMaxTokenMatchSize() > 0) {
- maxMatchSize = ks.getMaxTokenMatchSize();
+ int maxMatchSize = ks.getMaxTokenMatchSize();
+ if (maxMatchSize <= 0
+ || maxMatchSize > KrillProperties.maxTokenMatchSize) {
+ maxMatchSize = KrillProperties.maxTokenMatchSize;
};
// Create new Match
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java b/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java
index 3c3d0ff..262a0f4 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java
@@ -7,6 +7,7 @@
import java.io.IOException;
import java.util.ArrayList;
+import org.junit.AfterClass;
import org.junit.Test;
import de.ids_mannheim.korap.Krill;
@@ -34,36 +35,23 @@
.getResource("/queries/position/sentence-contain-token.json")
.getFile());
}
+
+ @AfterClass
+ public static void resetMaxTokenMatchSize() {
+ KrillProperties.maxTokenMatchSize = 50;
+ }
@Test
public void testLimitingMatchWithProperties () throws IOException {
- // from properties
- assertEquals(50, ki.getMaxTokenMatchSize());
-
+ // default properties file
Krill ks = new Krill(json);
Result kr = ks.apply(ki);
Match km = kr.getMatch(0);
- assertTrue(km.getLength()<ki.getMaxTokenMatchSize());
+ assertEquals(40, KrillProperties.maxTokenMatchSize);
+ assertTrue(km.getLength() < 40);
};
@Test
- public void testLimitingMatchInKrillIndex () throws IOException {
- // Limiting default match token size in KrillIndex
- ki.setMaxTokenMatchSize(2);
-
- Krill ks = new Krill(json);
- Result kr = ks.apply(ki);
- assertEquals(78, kr.getTotalResults());
-
- assertEquals(
- "... sechsthäufigste Buchstabe in deutschen Texten. [[Mit Ausnahme]<!>] von Fremdwörtern und Namen ist ...",
- kr.getMatch(0).getSnippetBrackets());
- assertEquals(
- "<span class=\"context-left\"><span class=\"more\"></span>sechsthäufigste Buchstabe in deutschen Texten. </span><span class=\"match\"><mark>Mit Ausnahme</mark><span class=\"cutted\"></span></span><span class=\"context-right\"> von Fremdwörtern und Namen ist<span class=\"more\"></span></span>",
- kr.getMatch(0).getSnippetHTML());
- }
-
- @Test
public void testLimitingMatchInKrill () throws IOException {
// Change limit via Krill
Krill ks = new Krill(json);
@@ -80,7 +68,7 @@
};
@Test
- public void testMatchInfoWithKrillConfig ()
+ public void testMatchInfo ()
throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
// Indexing test files
@@ -95,27 +83,25 @@
ArrayList<String> layer = new ArrayList<String>();
layer.add("opennlp");
- // maxMatchTokens from properties = 5
+ // maxMatchTokens from properties = 40
km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens", false,
foundry, layer, false, false, false, false, false);
assertEquals("... [[g. Artikel vornimmst, wäre es fein]] ...",
km.getSnippetBrackets());
- // lower than limit
- int maxMatchTokens = 2;
- km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens",
- false, foundry, layer, false, false, false, false, true, // extendToSentence
- maxMatchTokens);
+ // request lower than limit
+ // int maxMatchTokens = 2;
+ km = ki.getMatchInfo("match-WUD17/C94/39360-p390-392", "tokens",
+ false, foundry, layer, false, false, false, false, true);
- assertTrue(km.endCutted);
- assertEquals("... [[g. Artikel]<!>] ...", km.getSnippetBrackets());
+ assertEquals("... [[g. Artikel]] ...", km.getSnippetBrackets());
- // more than limit
- maxMatchTokens = 51;
+ // request more than limit
+ // maxMatchTokens = 51;
km = ki.getMatchInfo("match-WUD17/C94/39360-p380-431", "tokens",
- false, foundry, layer, false, false, false, false, true, // extendToSentence
- maxMatchTokens);
- assertEquals(KrillProperties.maxTokenMatchSize, (km.getSnippetBrackets().split(" ").length -2));
+ false, foundry, layer, false, false, false, false, false);
+ assertTrue(km.endCutted);
+ assertEquals(420, km.getEndPos());
}
}
diff --git a/src/test/java/de/ids_mannheim/korap/response/TestMatch.java b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
index 3b1d0df..fcc1db3 100644
--- a/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
+++ b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
@@ -6,10 +6,17 @@
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
+import de.ids_mannheim.korap.util.KrillProperties;
+
@RunWith(JUnit4.class)
public class TestMatch {
int maxMatchTokens = 50;
+
+ public TestMatch () {
+ KrillProperties.maxTokenMatchSize = 50;
+ }
+
@Test
public void testNoMatch () {
diff --git a/src/test/resources/krill.properties b/src/test/resources/krill.properties
index 3714c0c..9cfe438 100644
--- a/src/test/resources/krill.properties
+++ b/src/test/resources/krill.properties
@@ -6,5 +6,5 @@
krill.namedVC = queries/collections/named-vcs/
krill.test = true
-krill.match.max.token=50
+krill.match.max.token=40
krill.context.max.token=25