Changed ID to docID in matches, indexed layerInfo, deserialized cutoff
diff --git a/src/main/java/de/ids_mannheim/korap/KorapDocument.java b/src/main/java/de/ids_mannheim/korap/KorapDocument.java
index e65a6f2..2a86bcf 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapDocument.java
@@ -18,6 +18,7 @@
*
* @author ndiewald
*/
+@JsonIgnoreProperties(ignoreUnknown = true)
public abstract class KorapDocument {
private KorapPrimaryData primaryData;
@@ -26,7 +27,8 @@
private String author, textClass, corpusID,
pubPlace, ID, title, subTitle,
- foundries, tokenization;
+ foundries, tokenization,
+ layerInfo;
private KorapDate pubDate;
@@ -168,4 +170,12 @@
public String getTokenization () {
return this.tokenization;
};
+
+ public void setLayerInfo (String layerInfo) {
+ this.layerInfo = layerInfo;
+ };
+
+ public String getLayerInfo () {
+ return this.layerInfo;
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 614302f..674dee8 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -137,6 +137,7 @@
fieldsToLoad.add("pubDate");
fieldsToLoad.add("corpusID");
fieldsToLoad.add("foundries");
+ fieldsToLoad.add("layerInfo");
fieldsToLoad.add("tokenization");
// don't load tokenization
@@ -671,6 +672,8 @@
match.setFoundries(doc.get("foundries"));
match.setTokenization(doc.get("tokenization"));
+ match.setLayerInfo(doc.get("layerInfo"));
+
match.setPrimaryData(
new KorapPrimaryData(doc.get(foundry))
);
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index fad0867..38e32d2 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -115,6 +115,17 @@
this.highlight.add(new int[]{ start, end, number});
};
+ @JsonProperty("docID")
+ public String getDocID () {
+ return super.getID();
+ };
+
+ @Override
+ @JsonProperty("ID")
+ public String getID () {
+ return this.getDocID() + "#match...";
+ };
+
private void _reset () {
this.processed = false;
this.snippetHTML = null;
@@ -368,6 +379,10 @@
// Retrieve last combinator on stack
lastComb = this.combine.peekLast();
+ /*
+ System.err.println("+" + lastComb.type + "|" + lastComb.number + "|" + number + "|" + eold);
+ */
+
// combinator is opening and the number is not equal to the last
// element on the balanceStack
if (lastComb.type == 1 && lastComb.number != eold) {
@@ -392,6 +407,7 @@
// Get last combinator on the stack
lastComb = this.combine.peekLast();
+ /*
// The last combinator is opening and identical to the current one
if (lastComb.type == 1 && lastComb.number == number) {
// Remove the damn thing - It's empty and uninteresting!
@@ -401,7 +417,25 @@
// Add a closer
this.combine.add(new HighlightCombinatorElement((byte) 2, number));
};
-
+ */
+
+ /*
+ System.err.println(":" + lastComb.type + "|" + lastComb.number + "|" + number);
+ */
+
+ if (lastComb.type == 1 && lastComb.number == number) {
+ while (lastComb.type == 1 && lastComb.number == number) {
+ // Remove the damn thing - It's empty and uninteresting!
+ this.combine.removeLast();
+ lastComb = this.combine.peekLast();
+ };
+ }
+ else {
+ // Add a closer
+ this.combine.add(new HighlightCombinatorElement((byte) 2, number));
+ };
+
+
// Fetch everything from the tempstack and reopen it
for (int e : tempStack) {
combine.add(new HighlightCombinatorElement((byte) 1, e));
diff --git a/src/main/java/de/ids_mannheim/korap/KorapResult.java b/src/main/java/de/ids_mannheim/korap/KorapResult.java
index d74ae0d..2723424 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapResult.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapResult.java
@@ -13,6 +13,10 @@
import com.fasterxml.jackson.databind.node.*;
import com.fasterxml.jackson.annotation.*;
+/*
+TODO: Reuse the KorapSearch code for data serialization!
+*/
+
public class KorapResult {
ObjectMapper mapper = new ObjectMapper();
diff --git a/src/main/java/de/ids_mannheim/korap/KorapSearch.java b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
index 3d36344..c25a34e 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapSearch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapSearch.java
@@ -130,6 +130,10 @@
if (json.has("startPage"))
this.setStartPage(json.get("startPage").asInt());
+ // Defined cutOff
+ if (json.has("cutOff"))
+ this.setCutOff(json.get("cutOff").asBoolean());
+
// Defined contexts
if (json.has("context")) {
JsonNode context = json.get("context");
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 879caf5..8711c0c 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -11,6 +11,7 @@
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -97,6 +98,15 @@
doc.add(new StringField(key, value, Field.Store.YES));
};
+ public void addStored (String key, String value) {
+ doc.add(new StoredField(key, value));
+ };
+
+ public void addStored (String key, int value) {
+ doc.add(new StoredField(key, value));
+ };
+
+
public void addTV (String key, String value, String tsString) {
this.addTV(key, value, new MultiTermTokenStream(tsString));
};
@@ -216,4 +226,10 @@
super.setID(ID);
this.addString("ID", ID);
};
+
+ @Override
+ public void setLayerInfo (String layerInfo) {
+ super.setLayerInfo(layerInfo);
+ this.addStored("layerInfo", layerInfo);
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index b5cabbb..2094b56 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -131,7 +131,7 @@
assertEquals(km.getPrimaryData(),"abc");
assertEquals(km.getCorpusID(),"WPD");
- assertEquals(km.getID(),"WPD-AAA-00001");
+ assertEquals(km.getDocID(),"WPD-AAA-00001");
assertEquals(km.getTextClass(),"music entertainment");
assertEquals(km.getAuthor(),"Peter Frankenfeld");
assertEquals(km.getTitle(),"Wikipedia");
@@ -194,11 +194,11 @@
assertEquals(3, kr.totalResults());
assertEquals("... Lofoten in [Norwegen], unt ...", kr.match(0).getSnippetBrackets());
- assertEquals("WPD_AAA.00002", kr.match(0).getID());
+ assertEquals("WPD_AAA.00002", kr.match(0).getDocID());
assertEquals("... es in [Norwegen] noch ...", kr.match(1).getSnippetBrackets());
- assertEquals("WPD_AAA.00002", kr.match(1).getID());
+ assertEquals("WPD_AAA.00002", kr.match(1).getDocID());
assertEquals("... Orte in [Norwegen]: Å i ...", kr.match(2).getSnippetBrackets());
- assertEquals("WPD_AAA.00005", kr.match(2).getID());
+ assertEquals("WPD_AAA.00005", kr.match(2).getDocID());
query = kq.seg("tt/l:Vokal").without("mate/m:number:sg").toQuery();