Introduce prefix cutting in with expandToContext to avoid missing matches in matchinfo view
Change-Id: I997439e3f621470d4d96e108cca25ae3692d6de9
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 738748b..0b32c8b 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1273,6 +1273,7 @@
// Search for minimal surrounding sentences
if (extendToSentence) {
+
String element = "base/s:s";
int[] spanContext = match.expandContextToSpan(element);
@@ -1281,6 +1282,22 @@
if (spanContext[0] >= 0
&& spanContext[0] < spanContext[1]) {
+
+ // Match needs to be cutted!
+ if ((spanContext[1] - spanContext[0]) > match.getMaxMatchTokens()) {
+ int contextLength = match.getMaxMatchTokens() - match.getLength();
+ int halfContext = contextLength / 2;
+
+ // This is the extended context calculated
+ int realLeftLength = match.getStartPos() - spanContext[0];
+
+ // The length is too large - cut!
+ if (realLeftLength > halfContext) {
+ match.startCutted = true;
+ spanContext[0] = match.getStartPos() - halfContext;
+ }
+ }
+
match.setStartPos(spanContext[0]);
match.setEndPos(spanContext[1]);
match.potentialStartPosChar = spanContext[2];
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index f520865..bf18bb5 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -113,7 +113,8 @@
public int potentialStartPosChar = -1, potentialEndPosChar = -1;
@JsonIgnore
- public boolean cutted = false;
+ public boolean startCutted = false;
+ public boolean endCutted = false;
private String version;
@@ -219,7 +220,6 @@
};
};
-
/**
* Private class of highlights.
* TODO: This should probably be renamed, as it not only contains highlights
@@ -345,7 +345,7 @@
this.potentialStartPosChar = bb.getInt(1);
};
- if (bb.getInt(4) > this.potentialEndPosChar && !this.cutted)
+ if (bb.getInt(4) > this.potentialEndPosChar && !this.endCutted)
this.potentialEndPosChar = bb.getInt(5);
if (DEBUG)
@@ -500,6 +500,11 @@
this.addHighlight(new Highlight(start, pagenumber));
};
+ @JsonIgnore
+ public int getMaxMatchTokens () {
+ return MAX_MATCH_TOKENS;
+ }
+
/**
* Get document id.
*/
@@ -568,7 +573,7 @@
this.startPos = pos;
if (this.endPos != -1 && (this.endPos - pos) > MAX_MATCH_TOKENS) {
this.endPos = pos + MAX_MATCH_TOKENS;
- this.cutted = true;
+ this.endCutted = true;
};
};
@@ -615,7 +620,7 @@
public void setEndPos (int pos) {
if (this.startPos != -1 && (pos - this.startPos) > MAX_MATCH_TOKENS) {
pos = this.startPos + MAX_MATCH_TOKENS;
- this.cutted = true;
+ this.endCutted = true;
};
this.endPos = pos;
};
@@ -821,6 +826,10 @@
return this.context;
};
+ @JsonIgnore
+ public int getLength () {
+ return this.getEndPos() - this.getStartPos();
+ };
// Retrieve pagebreaks in a certain area
@@ -1413,6 +1422,11 @@
// Iterate through all remaining elements
sb.append("<span class=\"match\">");
+
+ if (this.startCutted) {
+ sb.append("<span class=\"cutted\"></span>");
+ };
+
for (short i = start; i <= end; i++) {
elem = this.snippetArray.get(i);
@@ -1427,7 +1441,7 @@
sb.append(elemString);
}
};
- if (this.cutted) {
+ if (this.endCutted) {
sb.append("<span class=\"cutted\"></span>");
};
sb.append("</span>");
@@ -1465,6 +1479,10 @@
sb.append("[");
+ if (this.startCutted) {
+ sb.append("<!>");
+ };
+
// Last element of sorted array
elem = this.snippetArray.getLast();
StringBuilder rightContext = new StringBuilder();
@@ -1480,7 +1498,7 @@
sb.append(this.snippetArray.get(i).toBrackets(this));
};
- if (this.cutted) {
+ if (this.endCutted) {
sb.append("<!>");
};
sb.append("]");