Added '@all' fields feature
Change-Id: Ib00c204676d65daa6a976104c3292923db5ece3e
diff --git a/Changes b/Changes
index 0f0ddee..9a7ca91 100644
--- a/Changes
+++ b/Changes
@@ -23,6 +23,8 @@
- [bugfix] Deserialization of wrapped spans (diewald)
- [bugfix] Fix KoralQuery mirroring in case of
parsing failures (diewald)
+ - [feature] Support '@all' as a 'fields' value for all meta
+ data fields (diewald)
0.51 2015-03-17
- This is a major version (prepared for the GitHub release)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index ed3b06e..7d27c17 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -944,15 +944,15 @@
if (DEBUG)
log.trace("We've found a matching document");
- HashSet<String> fields = (HashSet<String>) new Krill()
- .getMeta().getFields().clone();
-
- fields.add(field);
-
// Get terms from the document
Terms docTerms = atomic.reader().getTermVector(localDocID,
field);
+ HashSet<String> fields = (HashSet<String>) new Krill()
+ .getMeta().getFields().clone();
+
+ fields.add(field);
+
// Load the necessary fields of the document
Document doc = atomic.reader().document(localDocID, fields);
@@ -1251,6 +1251,11 @@
// Lift primary field
fields.add(field);
+ // Lift all fields
+ if (fields.contains("@all")) {
+ fields = null;
+ };
+
// Some initializations ...
int i = 0;
int startIndex = kr.getStartIndex();
@@ -1363,7 +1368,8 @@
int docID = atomic.docBase + localDocID;
// Do not load all of this, in case the doc is the same!
- Document doc = lreader.document(localDocID, fields);
+ Document doc = (fields != null) ? lreader.document(localDocID, fields) :
+ lreader.document(localDocID);
// Create new Match
Match match = new Match(pto, localDocID, spans.start(),
@@ -1377,7 +1383,15 @@
match.addPayload((List<byte[]>) spans.getPayload());
match.internalDocID = docID;
- match.populateDocument(doc, field, fields);
+
+ // Lift certain fields
+ if (fields != null) {
+ match.populateDocument(doc, field, fields);
+ }
+ // Lift all fields
+ else {
+ match.populateDocument(doc, field);
+ };
if (DEBUG) {
if (match.getDocID() != null)
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 48afadc..b324ffc 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -2,14 +2,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
+import java.util.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.AtomicReaderContext;
@@ -18,6 +11,7 @@
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.index.IndexableField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -380,6 +374,25 @@
this.addHighlight(new Highlight(target, target, id));
};
+
+ /**
+ * Populate document meta information with information coming from
+ * the index.
+ *
+ * @param doc
+ * Document object.
+ * @param field
+ * Primary data field.
+ */
+ public void populateDocument (Document doc, String field) {
+ HashSet<String> fieldList = new HashSet<>(32);
+ Iterator<IndexableField> fieldIterator = doc.getFields().iterator();
+ while (fieldIterator.hasNext())
+ fieldList.add(fieldIterator.next().name());
+
+ this.populateDocument(doc, field, fieldList);
+ };
+
/**
* Populate document meta information with information coming from
@@ -393,10 +406,12 @@
* Hash object with all supported fields.
*/
public void populateDocument (Document doc, String field,
- HashSet<String> fields) {
+ Collection<String> fields) {
this.setField(field);
this.setPrimaryData(doc.get(field));
+ // Remember - never serialize "tokens"
+
// LEGACY
if (fields.contains("corpusID"))
this.setCorpusID(doc.get("corpusID"));
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
index 3b774ac..618218f 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
@@ -176,6 +176,38 @@
assertEquals("372-377", res.at("/matches/0/pages").asText());
assertEquals("match-GOE_AGX.00002-p7-8", res.at("/matches/0/matchID")
.asText());
+
+
+ // @All fields
+ jsonString = getString(getClass().getResource(
+ "/queries/metas/fields_at_all.jsonld").getFile());
+
+ ks = new Krill(jsonString);
+ kr = ks.apply(ki);
+ mapper = new ObjectMapper();
+ res = mapper.readTree(kr.toJsonString());
+
+ assertEquals("Verlag C. H. Beck", res.at("/matches/0/publisher")
+ .asText());
+ assertEquals("Aphorismus", res.at("/matches/0/textType").asText());
+ assertEquals("Aphorismen", res.at("/matches/0/textTypeRef").asText());
+ assertEquals(
+ "Goethe, Johann Wolfgang von: Maximen und Reflexionen. Religion und Christentum, [Aphorismen], (Erstveröffentlichung: Stuttgart ; Tübingen, 1827-1842), In: Goethe, Johann Wolfgang von: Goethes Werke, Bd. 12, Schriften zur Kunst. Schriften zur Literatur. Maximen und Reflexionen, Hrsg.: Trunz, Erich. München: Verlag C. H. Beck, 1982, S. 372-377",
+ res.at("/matches/0/reference").asText());
+ assertEquals("de", res.at("/matches/0/language").asText());
+ assertEquals("opennlp#tokens", res.at("/matches/0/tokenSource")
+ .asText());
+ assertEquals(
+ "base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences",
+ res.at("/matches/0/foundries").asText());
+ assertEquals("Goethe-Korpus", res.at("/matches/0/corpusTitle").asText());
+ assertEquals("QAO-NC", res.at("/matches/0/license").asText());
+ assertEquals("Goethe: Maximen und Reflexionen, (1827-1842)",
+ res.at("/matches/0/docTitle").asText());
+ assertEquals("1827", res.at("/matches/0/creationDate").asText());
+ assertEquals("372-377", res.at("/matches/0/pages").asText());
+ assertEquals("match-GOE_AGX.00002-p7-8", res.at("/matches/0/matchID")
+ .asText());
};
diff --git a/src/test/resources/queries/metas/fields_at_all.jsonld b/src/test/resources/queries/metas/fields_at_all.jsonld
new file mode 100644
index 0000000..36d4046
--- /dev/null
+++ b/src/test/resources/queries/metas/fields_at_all.jsonld
@@ -0,0 +1,20 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "announcements" : [],
+ "errors" : [],
+ "meta" : {
+ "count":9,
+ "fields": ["@all"]
+ },
+ "query" : {
+ "@type" : "koral:token",
+ "wrap" : {
+ "@type" : "koral:term",
+ "key" : "VERB",
+ "foundry" : "xip",
+ "layer" : "pos",
+ "match" : "match:eq"
+ }
+ },
+ "warnings" : []
+}