Make retrieval of snippets optional in search queries (fixes #58)
Change-Id: I8bb538633e9eff89bed6343f716c59305262c6d1
diff --git a/Changes b/Changes
index 94c2480..7ce2b5a 100644
--- a/Changes
+++ b/Changes
@@ -1,8 +1,10 @@
-0.58.6 2019-05-28
+0.58.6 2019-06-04
- [bugfix] Updated cache loading (fixed #55) (diewald, margaretha)
- [bugfix] Introduce left match cutting so that
in matchinfo with expandToContext cutting won't
remove the actual match (diewald; reported by CoRoLa)
+ - [feature] Make retrieval of primarydata optional (fixes #58)
+ (diewald)
0.58.5 2019-03-18
- [bugfix] Fix bug where duplicate keys occured in
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 0b32c8b..f03d57d 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1496,12 +1496,13 @@
// The following fields should be lifted for matches
List<String> fields = (ArrayList<String>) meta.getFields().clone();
HashSet<String> fieldsSet = new HashSet<String>(fields);
+ boolean snippets = meta.hasSnippets();
// Lift all fields
if (fields.contains("@all")) {
fields = null;
}
- else {
+ else {
// Lift primary field
fieldsSet.add(field);
};
@@ -1575,8 +1576,7 @@
continue;
};
- final PositionsToOffset pto = new PositionsToOffset(atomic,
- field);
+ final PositionsToOffset pto = snippets ? new PositionsToOffset(atomic, field) : null;
// Spans spans = NearSpansOrdered();
final Spans spans = query.getSpans(atomic, (Bits) bitset,
@@ -1642,30 +1642,34 @@
// Create new Match
final Match match = new Match(pto, localDocID,
spans.start(), spans.end());
- match.setContext(kr.getContext());
- match.retrievePagebreaks("~:base/s:pb");
+ // Add snippet if existing
+ if (snippets) {
+ match.setContext(kr.getContext());
+ match.retrievePagebreaks("~:base/s:pb");
- if (DEBUG)
- log.trace("Retrieve pagebreaks from index");
+ if (DEBUG)
+ log.trace("Retrieve pagebreaks from index");
+
+ if (spans.isPayloadAvailable())
+ match.addPayload((List<byte[]>) spans.getPayload());
+ }
// Add match to Result
kr.add(match);
-
- if (spans.isPayloadAvailable())
- match.addPayload((List<byte[]>) spans.getPayload());
-
+
match.internalDocID = docID;
-
+
// Lift certain fields
if (fields != null) {
- match.populateDocument(doc, field, fields);
+ match.populateDocument(doc, snippets ? field : null, fields);
}
// Lift all fields
else {
- match.populateDocument(doc, field);
+ match.populateDocument(doc, snippets ? field : null);
};
+
if (DEBUG) {
if (match.getDocID() != null)
log.trace(
diff --git a/src/main/java/de/ids_mannheim/korap/KrillMeta.java b/src/main/java/de/ids_mannheim/korap/KrillMeta.java
index 6d715e5..8ce45c0 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillMeta.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillMeta.java
@@ -26,6 +26,9 @@
private short itemsPerResource = 0;
private SearchContext context;
+ // Per default snippets are requested
+ private boolean snippets = true;
+
private ArrayList<String> fields;
HashSet<Integer> highlights;
@@ -142,6 +145,11 @@
this.setItemsPerResource(json.get("itemsPerResource").asInt());
// Defined context
+ if (json.has("snippets")) {
+ this.snippets = json.get("snippets").asBoolean();
+ };
+
+ // Defined context
if (json.has("context"))
this.context.fromJson(json.get("context"));
@@ -261,6 +269,23 @@
};
+ /**
+ * Get if snippets should be retrieved.
+ */
+ public boolean hasSnippets () {
+ return this.snippets;
+ };
+
+
+ /**
+ * Set if snippets should be retrieved.
+ */
+ public KrillMeta setSnippets (boolean snippets) {
+ this.snippets = snippets;
+ return this;
+ };
+
+
// Get set of fields
/**
* Get the fields as a set
diff --git a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
index 4695cef..9edd50c 100644
--- a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
@@ -137,7 +137,8 @@
*/
public void populateDocument (Document doc, String field,
List<String> fields) {
- this.setPrimaryData(doc.get(field));
+ if (field != null)
+ this.setPrimaryData(doc.get(field));
this.populateFields(doc, fields);
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index bf18bb5..e1d3931 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -1137,8 +1137,10 @@
// Relevant details are missing
if (this.positionsToOffset == null || this.localDocID == -1) {
- log.warn("You have to define "
- + "positionsToOffset and localDocID first before");
+ if (DEBUG) {
+ log.warn("You have to define "
+ + "positionsToOffset and localDocID first before");
+ }
return false;
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index 282ee80..66fffc7 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -189,6 +189,16 @@
assertEquals(
"... okal. [[Der Buchstabe A hat in {1:deutschen Texten} eine durchschnittliche Häufigkeit von 6,51 %.]] Er ist damit der sechsthäufigste Buchstabe ...",
ks.apply(ki).getMatch(0).getSnippetBrackets());
+
+
+ // Do not retrieve snippets
+ meta.setSnippets(false);
+
+ Match km = ks.apply(ki).getMatch(0);
+
+ assertEquals("Ruru,Jens.Ol,Aglarech", km.toJsonNode().get("author").asText());
+ assertTrue(!km.toJsonNode().has("snippet"));
+ assertEquals("", km.getPrimaryData());
};