Deserialization of new index files with new metadata (hluengen)
diff --git a/CHANGES b/CHANGES
index 7d7cc59..54e8f46 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,6 @@
+0.47 2014-11-01
+ - [feature] Support new index format with more metadata (diewald)
+
0.46 2014-10-21
- [feature] Support for relation queries (margaretha)
- [feature] Wrapping of extension queries
diff --git a/pom.xml b/pom.xml
index e94e090..721e4ea 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,7 +24,7 @@
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-lucene-index</artifactId>
- <version>0.46</version>
+ <version>0.47</version>
<packaging>jar</packaging>
<name>KorAP-lucene-index</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapDocument.java b/src/main/java/de/ids_mannheim/korap/KorapDocument.java
index e8792b4..fb99f3a 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapDocument.java
@@ -21,14 +21,62 @@
private KorapPrimaryData primaryData;
@JsonIgnore
- public int internalDocID, localDocID, UID;
+ public int
+ internalDocID,
+ localDocID,
+ UID;
- private String author, textClass, corpusID,
- pubPlace, ID, title, subTitle,
- foundries, tokenization,
- layerInfo, field;
+ private KorapDate
+ pubDate,
+ // newly added
+ creationDate
+ ;
- private KorapDate pubDate;
+
+ private String
+ // No longer supported
+ ID,
+ corpusID,
+ field,
+ layerInfo,
+ tokenization,
+
+ // Still supported
+ foundries,
+ title,
+ subTitle,
+ author,
+ textClass,
+ pubPlace,
+
+ // newly added
+ textSigle,
+ docSigle,
+ corpusSigle,
+ publisher,
+ editor,
+ textType,
+ textTypeArt,
+ textTypeRef,
+ textColumn,
+ textDomain,
+ license,
+ pages,
+ fileEditionStatement,
+ biblEditionStatement,
+ reference,
+ language,
+ collTitle,
+ collSubTitle,
+ collAuthor,
+ collEditor,
+ corpusTitle,
+ corpusSubTitle,
+ keywords,
+ tokenSource,
+ layerInfos
+ ;
+
/**
* Set the publication date of the document the match occurs in.
@@ -38,12 +86,24 @@
* @see KorapDate#Constructor(String)
*/
public KorapDate setPubDate (String date) {
- // ObjectMapper mapper = new ObjectMapper();
this.pubDate = new KorapDate(date);
return this.pubDate;
};
/**
+ * Set the creation date of the document the match occurs in.
+ *
+ * @param date The date as a KorapDate compatible string representation.
+ * @return A KorapDate object for chaining.
+ * @see KorapDate#Constructor(String)
+ */
+ public KorapDate setCreationDate (String date) {
+ this.creationDate = new KorapDate(date);
+ return this.creationDate;
+ };
+
+
+ /**
* Set the publication date of the document the match occurs in.
*
* @param date The date as a KorapDate object.
@@ -54,6 +114,19 @@
return (this.pubDate = date);
};
+
+ /**
+ * Set the creation date of the document the match occurs in.
+ *
+ * @param date The date as a KorapDate object.
+ * @return A KorapDate object for chaining.
+ * @see KorapDate
+ */
+ public KorapDate setCreationDate (KorapDate date) {
+ return (this.creationDate = date);
+ };
+
+
/**
* Get the publication date of the document the match occurs in as a KorapDate object.
*/
@@ -62,6 +135,15 @@
return this.pubDate;
};
+
+ /**
+ * Get the creation date of the document the match occurs in as a KorapDate object.
+ */
+ @JsonIgnore
+ public KorapDate getCreationDate () {
+ return this.creationDate;
+ };
+
@JsonProperty("pubDate")
public String getPubDateString () {
if (this.pubDate != null)
@@ -69,6 +151,13 @@
return null;
};
+ @JsonProperty("creationDate")
+ public String getCreationDateString () {
+ if (this.creationDate != null)
+ return this.creationDate.toDisplay();
+ return null;
+ };
+
public void setAuthor (String author) {
this.author = author;
};
@@ -93,19 +182,23 @@
return this.pubPlace;
};
+ // No longer supported
public void setCorpusID (String corpusID) {
this.corpusID = corpusID;
};
+ // No longer supported
@JsonProperty("corpusID")
public String getCorpusID () {
return this.corpusID;
};
+ // No longer supported
public void setID (String ID) {
this.ID = ID;
};
+ // No longer supported
@JsonProperty("ID")
public String getID () {
return this.ID;
@@ -178,27 +271,237 @@
return this.foundries;
};
+ // No longer supported
public void setTokenization (String tokenization) {
this.tokenization = tokenization;
};
+ // No longer supported
public String getTokenization () {
return this.tokenization;
};
+ // No longer supported
public void setLayerInfo (String layerInfo) {
this.layerInfo = layerInfo;
};
+ // No longer supported
public String getLayerInfo () {
return this.layerInfo;
};
+ public void setLayerInfos (String layerInfos) {
+ this.layerInfos = layerInfos;
+ };
+
+ public String getLayerInfos () {
+ return this.layerInfos;
+ };
+
+ // No longer necessary
public void setField (String field) {
this.field = field;
};
+ // No longer necessary
public String getField () {
return this.field;
};
+
+ // This is the new text id
+ public String getTextSigle () {
+ return this.textSigle;
+ };
+
+ // This is the new text id
+ public void setTextSigle (String textSigle) {
+ this.textSigle = textSigle;
+ };
+
+ // This is the new corpus id
+ public String getCorpusSigle () {
+ return this.corpusSigle;
+ };
+
+ // This is the new corpus id
+ public void setCorpusSigle (String corpusSigle) {
+ this.corpusSigle = corpusSigle;
+ };
+
+ public String getDocSigle () {
+ return this.docSigle;
+ };
+
+ public void setDocSigle (String docSigle) {
+ this.docSigle = docSigle;
+ };
+
+ public String getPublisher () {
+ return this.publisher;
+ };
+
+ public void setPublisher (String publisher) {
+ this.publisher = publisher;
+ };
+
+ public String getEditor () {
+ return this.editor;
+ };
+
+ public void setEditor (String editor) {
+ this.editor = editor;
+ };
+
+ public String getTextType () {
+ return this.textType;
+ };
+
+ public void setTextType (String textType) {
+ this.textType = textType;
+ };
+
+ public String getTextTypeArt () {
+ return this.textTypeArt;
+ };
+
+ public void setTextTypeArt (String textTypeArt) {
+ this.textTypeArt = textTypeArt;
+ };
+
+ public String getTextTypeRef () {
+ return this.textTypeRef;
+ };
+
+ public void setTextTypeRef (String textTypeRef) {
+ this.textTypeRef = textTypeRef;
+ };
+
+ public String getTextColumn () {
+ return this.textColumn;
+ };
+
+ public void setTextColumn (String textColumn) {
+ this.textColumn = textColumn;
+ };
+
+ public String getTextDomain () {
+ return this.textDomain;
+ };
+
+ public void setTextDomain (String textDomain) {
+ this.textDomain = textDomain;
+ };
+
+ public String getLicense () {
+ return this.license;
+ };
+
+ public void setLicense (String license) {
+ this.license = license;
+ };
+
+ public String getPages () {
+ return this.pages;
+ };
+
+ public void setPages (String pages) {
+ this.pages = pages;
+ };
+
+ public String getFileEditionStatement () {
+ return this.fileEditionStatement;
+ };
+
+ public void setFileEditionStatement (String fileEditionStatement) {
+ this.fileEditionStatement = fileEditionStatement;
+ };
+
+ public String getBiblEditionStatement () {
+ return this.biblEditionStatement;
+ };
+
+ public void setBiblEditionStatement (String biblEditionStatement) {
+ this.biblEditionStatement = biblEditionStatement;
+ };
+
+ public String getReference () {
+ return this.reference;
+ };
+
+ public void setReference (String reference) {
+ this.reference = reference;
+ };
+
+ public String getLanguage () {
+ return this.language;
+ };
+
+ public void setLanguage (String language) {
+ this.language = language;
+ };
+
+ public String getCollTitle () {
+ return this.collTitle;
+ };
+
+ public void setCollTitle (String collTitle) {
+ this.collTitle = collTitle;
+ };
+
+ public String getCollSubTitle () {
+ return this.collSubTitle;
+ };
+
+ public void setCollSubTitle (String collSubTitle) {
+ this.collSubTitle = collSubTitle;
+ };
+
+ public String getCollAuthor () {
+ return this.collAuthor;
+ };
+
+ public void setCollAuthor (String collAuthor) {
+ this.collAuthor = collAuthor;
+ };
+
+ public String getCollEditor () {
+ return this.collEditor;
+ };
+
+ public void setCollEditor (String collEditor) {
+ this.collEditor = collEditor;
+ };
+
+ public String getCorpusTitle () {
+ return this.corpusTitle;
+ };
+
+ public void setCorpusTitle (String corpusTitle) {
+ this.corpusTitle = corpusTitle;
+ };
+
+ public String getCorpusSubTitle () {
+ return this.corpusSubTitle;
+ };
+
+ public void setCorpusSubTitle (String corpusSubTitle) {
+ this.corpusSubTitle = corpusSubTitle;
+ };
+
+ public String getKeywords () {
+ return this.keywords;
+ };
+
+ public void setKeywords (String keywords) {
+ this.keywords = keywords;
+ };
+
+ public String getTokenSource () {
+ return this.tokenSource;
+ };
+
+ public void setTokenSource (String tokenSource) {
+ this.tokenSource = tokenSource;
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index ab624fb..2f104a0 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -310,6 +310,7 @@
new GZIPInputStream(new FileInputStream(json)),
FieldDocument.class
);
+
return fd;
};
return this.mapper.readValue(json, FieldDocument.class);
diff --git a/src/main/java/de/ids_mannheim/korap/KorapQuery.java b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
index 075233c..38e2038 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
@@ -43,9 +43,6 @@
private String field;
private ObjectMapper json;
- // The default foundry for lemmata and pos
- private String defaultFoundry = "mate/";
-
// Logger
private final static Logger log = LoggerFactory.getLogger(KorapQuery.class);
@@ -568,7 +565,7 @@
if (json.has("foundry") && json.get("foundry").asText().length() > 0)
value.append(json.get("foundry").asText()).append('/');
- // value.append(defaultFoundry).append('/');
+ // No default foundry defined
if (json.has("layer") && json.get("layer").asText().length() > 0) {
String layer = json.get("layer").asText();
@@ -590,11 +587,6 @@
if (isCaseInsensitive && isTerm && layer.equals("s"))
layer = "i";
-
- // TEMPORARY
- if (value.length() == 0 && (layer.equals("l") || layer.equals("p")))
- value.append(defaultFoundry);
-
value.append(layer).append(':');
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 7a845be..96d8e31 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -8,6 +8,7 @@
import com.fasterxml.jackson.annotation.*;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.StringField;
@@ -59,31 +60,6 @@
// see http://www.cowtowncoder.com/blog/archives/2011/07/entry_457.html
- /*
- @JsonCreator
- public FieldDocument(Map<String,Object> props) {
- this.id = (String) props.get("id");
- this.title = (String) props.get("title");
- };
-
- public FieldDocument (String json) {
-
-
- my $primary = ->{primary}
- corpus_id, pub_date, id, text_class (Array), author (Array), title, sub_title, pub_place
-
- foreach (->{fields}) {
- foreach (data) {
- foreach () {
- }
- }
- };
-created timestamp
-last_modified timestamp or KorapDate
-
- };
-*/
-
public void addInt (String key, int value) {
doc.add(new IntField(key, value, Field.Store.YES));
};
@@ -143,12 +119,55 @@
return new MultiTermTokenStream();
};
+ /**
+ * Deserialize token stream data.
+ */
+ public void setData (Map<String,Object> node) {
+ this.setPrimaryData((String) node.get("text"));
+
+ String fieldName = (String) node.get("name");
+ MultiTermTokenStream mtts = this.newMultiTermTokenStream();
+
+ // Iterate over all tokens in stream
+ for (ArrayList<String> token : (ArrayList<ArrayList<String>>) node.get("stream")) {
+
+ // Initialize MultiTermToken
+ MultiTermToken mtt = new MultiTermToken(token.remove(0));
+
+ // Add rest of the list
+ for (String term : token)
+ mtt.add(term);
+
+ // Add MultiTermToken to stream
+ mtts.addMultiTermToken(mtt);
+ };
+
+ // Add tokenstream to fielddocument
+ this.addTV(fieldName, this.getPrimaryData(), mtts);
+
+ // Get foundry info
+ if (node.containsKey("foundries"))
+ this.setFoundries((String) node.get("foundries"));
+
+ // Get layer info
+ if (node.containsKey("layerInfos"))
+ this.setLayerInfos((String) node.get("layerInfos"));
+
+ // Get tokenSource info
+ if (node.containsKey("tokenSource"))
+ this.setTokenSource((String) node.get("tokenSource"));
+ };
+
+ /**
+ * Deserialize token stream data (LEGACY).
+ */
public void setFields (ArrayList<Map<String,Object>> fields) {
Map<String,Object> primary = fields.remove(0);
this.setPrimaryData((String) primary.get("primaryData"));
for (Map<String,Object> field : fields) {
+
String fieldName = (String) field.get("name");
MultiTermTokenStream mtts = this.newMultiTermTokenStream();
@@ -186,7 +205,7 @@
@Override
public void setTextClass (String textClass) {
super.setTextClass(textClass);
- this.addText("textClass", textClass);
+ this.addKeyword("textClass", textClass);
};
@Override
@@ -221,12 +240,22 @@
return date;
};
+ @JsonProperty("creationDate")
+ @Override
+ public KorapDate setCreationDate (String creationDate) {
+ KorapDate date = super.setCreationDate(creationDate);
+ this.addInt("creationDate", date.toString());
+ return date;
+ };
+
+ // No longer supported
@Override
public void setCorpusID (String corpusID) {
super.setCorpusID(corpusID);
this.addString("corpusID", corpusID);
};
+ // No longer supported
@Override
public void setID (String ID) {
super.setID(ID);
@@ -239,10 +268,160 @@
this.addString("UID", new Integer(ID).toString());
};
+ // No longer supported
@Override
public void setLayerInfo (String layerInfo) {
- // System.err.println(layerInfo);
super.setLayerInfo(layerInfo);
this.addStored("layerInfo", layerInfo);
};
+
+ @Override
+ public void setTextSigle (String textSigle) {
+ super.setTextSigle(textSigle);
+ this.addString("textSigle", textSigle);
+ };
+
+ @Override
+ public void setDocSigle (String docSigle) {
+ super.setDocSigle(docSigle);
+ this.addString("docSigle", docSigle);
+ };
+
+ @Override
+ public void setCorpusSigle (String corpusSigle) {
+ super.setCorpusSigle(corpusSigle);
+ this.addString("corpusSigle", corpusSigle);
+ };
+
+ @Override
+ public void setPublisher (String publisher) {
+ super.setPublisher(publisher);
+ this.addStored("publisher", publisher);
+ };
+
+ @Override
+ public void setEditor (String editor) {
+ super.setEditor(editor);
+ this.addStored("editor", editor);
+ };
+
+ @Override
+ public void setTextType (String textType) {
+ super.setTextType(textType);
+ this.addString("textType", textType);
+ };
+
+ @Override
+ public void setTextTypeArt (String textTypeArt) {
+ super.setTextTypeArt(textTypeArt);
+ this.addString("textTypeArt", textTypeArt);
+ };
+
+ @Override
+ public void setTextTypeRef (String textTypeRef) {
+ super.setTextTypeRef(textTypeRef);
+ this.addString("textTypeRef", textTypeRef);
+ };
+
+ @Override
+ public void setTextColumn (String textColumn) {
+ super.setTextColumn(textColumn);
+ this.addStored("textColumn", textColumn);
+ };
+
+ @Override
+ public void setTextDomain (String textDomain) {
+ super.setTextDomain(textDomain);
+ this.addString("textDomain", textDomain);
+ };
+
+ @Override
+ public void setLicense (String license) {
+ super.setLicense(license);
+ this.addString("license", license);
+ };
+
+ @Override
+ public void setPages (String pages) {
+ super.setPages(pages);
+ this.addStored("pages", pages);
+ };
+
+ @Override
+ public void setFileEditionStatement (String fileEditionStatement) {
+ super.setFileEditionStatement(fileEditionStatement);
+ this.addStored("fileEditionStatement", fileEditionStatement);
+ };
+
+ @Override
+ public void setBiblEditionStatement (String biblEditionStatement) {
+ super.setBiblEditionStatement(biblEditionStatement);
+ this.addStored("biblEditionStatement", biblEditionStatement);
+ };
+
+ @Override
+ public void setReference (String reference) {
+ super.setReference(reference);
+ this.addStored("reference", reference);
+ };
+
+ @Override
+ public void setLanguage (String language) {
+ super.setLanguage(language);
+ this.addString("language", language);
+ };
+
+ @Override
+ public void setCollTitle (String collTitle) {
+ super.setCollTitle(collTitle);
+ this.addText("collTitle", collTitle);
+ };
+
+ @Override
+ public void setCollSubTitle (String collSubTitle) {
+ super.setCollSubTitle(collSubTitle);
+ this.addText("collSubTitle", collSubTitle);
+ };
+
+ @Override
+ public void setCollAuthor (String collAuthor) {
+ super.setCollAuthor(collAuthor);
+ this.addText("collAuthor", collAuthor);
+ };
+
+ @Override
+ public void setCollEditor (String collEditor) {
+ super.setCollEditor(collEditor);
+ this.addText("collEditor", collEditor);
+ };
+
+ @Override
+ public void setCorpusTitle (String corpusTitle) {
+ super.setCorpusTitle(corpusTitle);
+ this.addText("corpusTitle", corpusTitle);
+ };
+
+ @Override
+ public void setCorpusSubTitle (String corpusSubTitle) {
+ super.setCorpusSubTitle(corpusSubTitle);
+ this.addText("corpusSubTitle", corpusSubTitle);
+ };
+
+ @Override
+ public void setKeywords (String keywords) {
+ super.setKeywords(keywords);
+ this.addKeyword("keywords", keywords);
+ };
+
+ @Override
+ public void setTokenSource (String tokenSource) {
+ super.setTokenSource(tokenSource);
+ this.addStored("tokenSource", tokenSource);
+ };
+
+ @Override
+ public void setFoundries (String foundries) {
+ super.setFoundries(foundries);
+ this.addKeyword("foundries", foundries);
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/util/KorapDate.java b/src/main/java/de/ids_mannheim/korap/util/KorapDate.java
index bc3d717..8eafeb4 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KorapDate.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KorapDate.java
@@ -106,7 +106,7 @@
if (sb.length() < 8) {
sb.append("00");
- if (sb.length() < 6) {
+ if (sb.length() < 8) {
sb.append("00");
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
index 241d122..4d5256b 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKorapQueryJSON.java
@@ -18,8 +18,6 @@
@RunWith(JUnit4.class)
public class TestKorapQueryJSON {
- private String defaultFoundry = "mate/";
-
@Test
public void queryJSONBsp1 () throws QueryException {
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp1.jsonld").getFile());
@@ -37,7 +35,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp1b.jsonld").getFile());
// [base=foo]|([base=foo][base=bar]) meta author=Goethe&year=1815
- assertEquals(sqwi.toQuery().toString(), "spanOr([tokens:"+defaultFoundry+"l:foo, spanNext(tokens:"+defaultFoundry+"l:foo, tokens:"+defaultFoundry+"l:bar)])");
+ assertEquals(sqwi.toQuery().toString(), "spanOr([tokens:mate/l:foo, spanNext(tokens:mate/l:foo, tokens:mate/l:bar)])");
};
@@ -46,7 +44,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp2.jsonld").getFile());
// ([base=foo]|[base=bar])[base=foobar]
- assertEquals(sqwi.toQuery().toString(), "spanNext(spanOr([tokens:"+defaultFoundry+"l:foo, tokens:"+defaultFoundry+"l:bar]), tokens:"+defaultFoundry+"l:foobar)");
+ assertEquals(sqwi.toQuery().toString(), "spanNext(spanOr([tokens:mate/l:foo, tokens:mate/l:bar]), tokens:mate/l:foobar)");
};
@Test
@@ -54,7 +52,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp3.jsonld").getFile());
// shrink({[base=Mann]})
- assertEquals(sqwi.toQuery().toString(), "shrink(0: {0: tokens:"+defaultFoundry+"l:Mann})");
+ assertEquals(sqwi.toQuery().toString(), "shrink(0: {0: tokens:mate/l:Mann})");
};
@Test
@@ -62,7 +60,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp4.jsonld").getFile());
// shrink({[base=foo]}[orth=bar])
- assertEquals(sqwi.toQuery().toString(), "shrink(0: spanNext({0: tokens:"+defaultFoundry+"l:foo}, tokens:s:bar))");
+ assertEquals(sqwi.toQuery().toString(), "shrink(0: spanNext({0: tokens:mate/l:foo}, tokens:s:bar))");
};
@Test
@@ -70,7 +68,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp5.jsonld").getFile());
// shrink(1:[base=Der]{1:[base=Mann]})
- assertEquals(sqwi.toQuery().toString(), "shrink(1: spanNext(tokens:"+defaultFoundry+"l:Der, {1: tokens:"+defaultFoundry+"l:Mann}))");
+ assertEquals(sqwi.toQuery().toString(), "shrink(1: spanNext(tokens:mate/l:Der, {1: tokens:mate/l:Mann}))");
};
@Test
@@ -78,7 +76,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp6.jsonld").getFile());
// [base=katze]
- assertEquals(sqwi.toQuery().toString(), "tokens:"+defaultFoundry+"l:Katze");
+ assertEquals(sqwi.toQuery().toString(), "tokens:mate/l:Katze");
};
@Test
@@ -95,7 +93,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp9.jsonld").getFile());
// [base=Katze&orth=Katzen]
- assertEquals(sqwi.toQuery().toString(), "spanSegment(tokens:"+defaultFoundry+"l:Katze, tokens:s:Katzen)");
+ assertEquals(sqwi.toQuery().toString(), "spanSegment(tokens:mate/l:Katze, tokens:s:Katzen)");
};
@Test
@@ -112,7 +110,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp10.jsonld").getFile());
// [base=Katze][orth=und][orth=Hunde]
- assertEquals(sqwi.toQuery().toString(), "spanNext(spanNext(tokens:"+defaultFoundry+"l:Katze, tokens:s:und), tokens:s:Hunde)");
+ assertEquals(sqwi.toQuery().toString(), "spanNext(spanNext(tokens:mate/l:Katze, tokens:s:und), tokens:s:Hunde)");
};
@Test
@@ -134,14 +132,14 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp12.jsonld").getFile());
// contains(<np>,[base=Mann])
- assertEquals(sqwi.toQuery().toString(), "spanContain(<tokens:np />, tokens:"+defaultFoundry+"l:Mann)");
+ assertEquals(sqwi.toQuery().toString(), "spanContain(<tokens:np />, tokens:mate/l:Mann)");
};
@Test
public void queryJSONBsp13 () throws QueryException {
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp13.jsonld").getFile());
- assertEquals(sqwi.toQuery().toString(), "spanStartsWith(<tokens:np />, tokens:mate/p:Det)");
+ assertEquals(sqwi.toQuery().toString(), "spanStartsWith(<tokens:np />, tokens:p:Det)");
};
@Test
@@ -173,7 +171,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp16.jsonld").getFile());
// [(base=bar|base=foo)&orth=foobar]
- assertEquals(sqwi.toQuery().toString(), "spanSegment(spanOr([tokens:"+defaultFoundry+"l:bar, tokens:"+defaultFoundry+"l:foo]), tokens:s:foobar)");
+ assertEquals(sqwi.toQuery().toString(), "spanSegment(spanOr([tokens:mate/l:bar, tokens:mate/l:foo]), tokens:s:foobar)");
};
@Test
@@ -181,7 +179,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/bsp17.jsonld").getFile());
// within(<np>,[base=Mann])
- assertEquals(sqwi.toQuery().toString(), "spanContain(<tokens:np />, tokens:"+defaultFoundry+"l:Mann)");
+ assertEquals(sqwi.toQuery().toString(), "spanContain(<tokens:np />, tokens:mate/l:Mann)");
};
@Test
@@ -261,8 +259,7 @@
SpanQueryWrapper sqwi = jsonQuery(getClass().getResource("/queries/cosmas20.json").getFile());
// "MORPH(V) #IN(R) #ELEM(S)"
- // TODO: Uses defaultfoundry!
- assertEquals(sqwi.toQuery().toString(), "shrink(1: spanEndsWith(<tokens:s />, {1: tokens:mate/p:V}))");
+ assertEquals(sqwi.toQuery().toString(), "shrink(1: spanEndsWith(<tokens:s />, {1: tokens:p:V}))");
};
@Test
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index 596d096..50fe017 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -476,6 +476,63 @@
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
};
+
+ @Test
+ public void searchJSONnewJSON () throws IOException {
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ FieldDocument fd = ki.addDocFile(
+ 1,getClass().getResource("/goe/AGA-03828.json.gz").getFile(), true
+ );
+ ki.commit();
+
+ assertEquals(fd.getUID(), 1);
+ assertEquals(fd.getTextSigle(), "GOE_AGA.03828");
+ assertEquals(fd.getDocSigle(), "GOE_AGA");
+ assertEquals(fd.getCorpusSigle(), "GOE");
+ assertEquals(fd.getTitle() , "Autobiographische Einzelheiten");
+ assertNull(fd.getSubTitle());
+ assertEquals(fd.getTextType(), "Autobiographie");
+ assertNull(fd.getTextTypeArt());
+ assertNull(fd.getTextTypeRef());
+ assertNull(fd.getTextColumn());
+ assertNull(fd.getTextDomain());
+ assertEquals(fd.getPages(), "529-547");
+ assertEquals(fd.getLicense(), "QAO-NC");
+ assertEquals(fd.getCreationDate().toString(), "18200000");
+ assertEquals(fd.getPubDate().toString(), "19820000");
+ assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
+ assertNull(fd.getTextClass());
+ assertEquals(fd.getLanguage(), "de");
+ assertEquals(fd.getPubPlace(), "München");
+ assertEquals(fd.getCorpusTitle(), "Goethes Werke");
+ assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von: Autobiographische Einzelheiten, (Geschrieben bis 1832), In: Goethe, Johann Wolfgang von: Goethes Werke, Bd. 10, Autobiographische Schriften II, Hrsg.: Trunz, Erich. München: Verlag C. H. Beck, 1982, S. 529-547");
+ assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
+ assertEquals(fd.getCollEditor(), "Trunz, Erich");
+ assertEquals(fd.getCollEditor(), "Trunz, Erich");
+ assertNull(fd.getEditor());
+ assertNull(fd.getFileEditionStatement());
+ assertNull(fd.getBiblEditionStatement());
+ assertNull(fd.getCollTitle());
+ assertNull(fd.getCollSubTitle());
+ assertNull(fd.getCollAuthor());
+ assertNull(fd.getCorpusSubTitle());
+ assertNull(fd.getKeywords());
+
+ assertEquals(fd.getTokenSource(), "opennlp#tokens");
+ assertEquals(fd.getFoundries(), "base base/paragraphs base/sentences corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences");
+ assertEquals(fd.getLayerInfos(), "base/s=spans corenlp/c=spans corenlp/ne=tokens corenlp/p=tokens corenlp/s=spans glemm/l=tokens mate/l=tokens mate/m=tokens mate/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens tt/s=spans");
+
+ KorapSearch ks = new KorapSearch(
+ new KorapQuery("tokens").seg("mate/m:case:nom").with("mate/m:number:pl")
+ );
+ KorapResult kr = ks.run(ki);
+
+ assertEquals(148, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
+ };
@Test
diff --git a/src/test/resources/goe/AGA-03828.json.gz b/src/test/resources/goe/AGA-03828.json.gz
new file mode 100644
index 0000000..1271de6
--- /dev/null
+++ b/src/test/resources/goe/AGA-03828.json.gz
Binary files differ