Updated new metadata scheme
diff --git a/src/main/java/de/ids_mannheim/korap/KorapDocument.java b/src/main/java/de/ids_mannheim/korap/KorapDocument.java
index fb99f3a..e3dc9a0 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapDocument.java
@@ -66,12 +66,14 @@
biblEditionStatement,
reference,
language,
- collTitle,
- collSubTitle,
- collAuthor,
- collEditor,
corpusTitle,
corpusSubTitle,
+ corpusAuthor,
+ corpusEditor,
+ docTitle,
+ docSubTitle,
+ docAuthor,
+ docEditor,
keywords,
tokenSource,
layerInfos
@@ -441,38 +443,6 @@
this.language = language;
};
- public String getCollTitle () {
- return this.collTitle;
- };
-
- public void setCollTitle (String collTitle) {
- this.collTitle = collTitle;
- };
-
- public String getCollSubTitle () {
- return this.collSubTitle;
- };
-
- public void setCollSubTitle (String collSubTitle) {
- this.collSubTitle = collSubTitle;
- };
-
- public String getCollAuthor () {
- return this.collAuthor;
- };
-
- public void setCollAuthor (String collAuthor) {
- this.collAuthor = collAuthor;
- };
-
- public String getCollEditor () {
- return this.collEditor;
- };
-
- public void setCollEditor (String collEditor) {
- this.collEditor = collEditor;
- };
-
public String getCorpusTitle () {
return this.corpusTitle;
};
@@ -489,6 +459,54 @@
this.corpusSubTitle = corpusSubTitle;
};
+ public String getCorpusAuthor () {
+ return this.corpusAuthor;
+ };
+
+ public void setCorpusAuthor (String corpusAuthor) {
+ this.corpusAuthor = corpusAuthor;
+ };
+
+ public String getCorpusEditor () {
+ return this.corpusEditor;
+ };
+
+ public void setCorpusEditor (String corpusEditor) {
+ this.corpusEditor = corpusEditor;
+ };
+
+ public String getDocTitle () {
+ return this.docTitle;
+ };
+
+ public void setDocTitle (String docTitle) {
+ this.docTitle = docTitle;
+ };
+
+ public String getDocSubTitle () {
+ return this.docSubTitle;
+ };
+
+ public void setDocSubTitle (String docSubTitle) {
+ this.docSubTitle = docSubTitle;
+ };
+
+ public String getDocAuthor () {
+ return this.docAuthor;
+ };
+
+ public void setDocAuthor (String docAuthor) {
+ this.docAuthor = docAuthor;
+ };
+
+ public String getDocEditor () {
+ return this.docEditor;
+ };
+
+ public void setDocEditor (String docEditor) {
+ this.docEditor = docEditor;
+ };
+
public String getKeywords () {
return this.keywords;
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 96d8e31..8c07720 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -372,27 +372,27 @@
};
@Override
- public void setCollTitle (String collTitle) {
- super.setCollTitle(collTitle);
- this.addText("collTitle", collTitle);
+ public void setDocTitle (String docTitle) {
+ super.setDocTitle(docTitle);
+ this.addText("docTitle", docTitle);
};
@Override
- public void setCollSubTitle (String collSubTitle) {
- super.setCollSubTitle(collSubTitle);
- this.addText("collSubTitle", collSubTitle);
+ public void setDocSubTitle (String docSubTitle) {
+ super.setDocSubTitle(docSubTitle);
+ this.addText("docSubTitle", docSubTitle);
};
@Override
- public void setCollAuthor (String collAuthor) {
- super.setCollAuthor(collAuthor);
- this.addText("collAuthor", collAuthor);
+ public void setDocAuthor (String docAuthor) {
+ super.setDocAuthor(docAuthor);
+ this.addText("docAuthor", docAuthor);
};
@Override
- public void setCollEditor (String collEditor) {
- super.setCollEditor(collEditor);
- this.addText("collEditor", collEditor);
+ public void setDocEditor (String docEditor) {
+ super.setDocEditor(docEditor);
+ this.addStored("docEditor", docEditor);
};
@Override
@@ -408,6 +408,18 @@
};
@Override
+ public void setCorpusAuthor (String corpusAuthor) {
+ super.setCorpusAuthor(corpusAuthor);
+ this.addText("corpusAuthor", corpusAuthor);
+ };
+
+ @Override
+ public void setCorpusEditor (String corpusEditor) {
+ super.setCorpusEditor(corpusEditor);
+ this.addStored("corpusEditor", corpusEditor);
+ };
+
+ @Override
public void setKeywords (String keywords) {
super.setKeywords(keywords);
this.addKeyword("keywords", keywords);
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index 50fe017..bcb2343 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -506,24 +506,28 @@
assertNull(fd.getTextClass());
assertEquals(fd.getLanguage(), "de");
assertEquals(fd.getPubPlace(), "München");
- assertEquals(fd.getCorpusTitle(), "Goethes Werke");
assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von: Autobiographische Einzelheiten, (Geschrieben bis 1832), In: Goethe, Johann Wolfgang von: Goethes Werke, Bd. 10, Autobiographische Schriften II, Hrsg.: Trunz, Erich. München: Verlag C. H. Beck, 1982, S. 529-547");
assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
- assertEquals(fd.getCollEditor(), "Trunz, Erich");
- assertEquals(fd.getCollEditor(), "Trunz, Erich");
assertNull(fd.getEditor());
assertNull(fd.getFileEditionStatement());
assertNull(fd.getBiblEditionStatement());
- assertNull(fd.getCollTitle());
- assertNull(fd.getCollSubTitle());
- assertNull(fd.getCollAuthor());
- assertNull(fd.getCorpusSubTitle());
assertNull(fd.getKeywords());
assertEquals(fd.getTokenSource(), "opennlp#tokens");
assertEquals(fd.getFoundries(), "base base/paragraphs base/sentences corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences");
assertEquals(fd.getLayerInfos(), "base/s=spans corenlp/c=spans corenlp/ne=tokens corenlp/p=tokens corenlp/s=spans glemm/l=tokens mate/l=tokens mate/m=tokens mate/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens tt/s=spans");
+
+ assertEquals(fd.getCorpusTitle(), "Goethes Werke");
+ assertNull(fd.getCorpusSubTitle());
+ assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
+ assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
+
+ assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
+ assertNull(fd.getDocSubTitle());
+ assertNull(fd.getDocEditor());
+ assertNull(fd.getDocAuthor());
+
KorapSearch ks = new KorapSearch(
new KorapQuery("tokens").seg("mate/m:case:nom").with("mate/m:number:pl")
);
@@ -533,7 +537,71 @@
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
};
-
+
+ @Test
+ public void searchJSONnewJSON2 () throws IOException {
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ FieldDocument fd = ki.addDocFile(
+ 1,getClass().getResource("/bzk/D59-00089.json.gz").getFile(), true
+ );
+ ki.commit();
+
+ assertEquals(fd.getUID(), 1);
+ assertEquals(fd.getTextSigle(), "BZK_D59.00089");
+ assertEquals(fd.getDocSigle(), "BZK_D59");
+ assertEquals(fd.getCorpusSigle(), "BZK");
+ assertEquals(fd.getTitle() , "Saragat-Partei zerfällt");
+ assertEquals(fd.getPubDate().toString(), "19590219");
+
+ assertNull(fd.getSubTitle());
+ assertNull(fd.getAuthor());
+ assertNull(fd.getEditor());
+ assertEquals(fd.getPubPlace(), "Berlin");
+ assertNull(fd.getPublisher());
+ assertEquals(fd.getTextType(), "Zeitung: Tageszeitung");
+ assertNull(fd.getTextTypeArt());
+ assertEquals(fd.getTextTypeRef(), "Tageszeitung");
+ assertEquals(fd.getTextDomain(), "Politik");
+ assertEquals(fd.getCreationDate().toString(), "19590219");
+ assertEquals(fd.getLicense(), "ACA-NC-LC");
+ assertEquals(fd.getTextColumn(), "POLITIK");
+ assertNull(fd.getPages());
+ assertEquals(fd.getTextClass(), "politik ausland");
+ assertNull(fd.getFileEditionStatement());
+ assertNull(fd.getBiblEditionStatement());
+
+ assertEquals(fd.getLanguage(), "de");
+ assertEquals(fd.getReference(), "Neues Deutschland, [Tageszeitung], 19.02.1959, Jg. 14, Berliner Ausgabe, S. 7. - Sachgebiet: Politik, Originalressort: POLITIK; Saragat-Partei zerfällt");
+ assertNull(fd.getPublisher());
+ assertNull(fd.getKeywords());
+
+ assertEquals(fd.getTokenSource(), "opennlp#tokens");
+
+ assertEquals(fd.getFoundries(), "base base/paragraphs base/sentences corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences");
+
+ assertEquals(fd.getLayerInfos(), "base/s=spans corenlp/c=spans corenlp/ne=tokens corenlp/p=tokens corenlp/s=spans glemm/l=tokens mate/l=tokens mate/m=tokens mate/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens tt/s=spans");
+
+ assertEquals(fd.getCorpusTitle(), "Bonner Zeitungskorpus");
+ assertNull(fd.getCorpusSubTitle());
+ assertNull(fd.getCorpusAuthor());
+ assertNull(fd.getCorpusEditor());
+
+ assertEquals(fd.getDocTitle(), "Neues Deutschland");
+ assertEquals(fd.getDocSubTitle(), "Organ des Zentralkomitees der Sozialistischen Einheitspartei Deutschlands");
+ assertNull(fd.getDocEditor());
+ assertNull(fd.getDocAuthor());
+
+ KorapSearch ks = new KorapSearch(
+ new KorapQuery("tokens").seg("mate/m:case:nom").with("mate/m:number:sg")
+ );
+ KorapResult kr = ks.run(ki);
+
+ assertEquals(6, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
+ };
@Test
public void searchJSONCollection () throws IOException {
diff --git a/src/test/resources/bzk/D59-00089.json.gz b/src/test/resources/bzk/D59-00089.json.gz
new file mode 100644
index 0000000..5fb74a3
--- /dev/null
+++ b/src/test/resources/bzk/D59-00089.json.gz
Binary files differ
diff --git a/src/test/resources/goe/AGA-03828.json.gz b/src/test/resources/goe/AGA-03828.json.gz
index 1271de6..e8ffb7a 100644
--- a/src/test/resources/goe/AGA-03828.json.gz
+++ b/src/test/resources/goe/AGA-03828.json.gz
Binary files differ