Test json serialization of pagebreak retrieval
Change-Id: Id38f3b3a5287f7be81fbce59d47d1d7c34f6c1c6
diff --git a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
index d367382..d89c8cc 100644
--- a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
@@ -63,7 +63,8 @@
corpusSubTitle, corpusAuthor, corpusEditor, textType, textTypeArt,
textTypeRef, textColumn, textDomain, fileEditionStatement,
biblEditionStatement, publisher, reference, language, license,
- pages, keywords,
+ // pages,
+ keywords,
// Meta information regarding annotations
tokenSource, layerInfos;
@@ -188,9 +189,10 @@
this.setLanguage(doc.get("language"));
if (fields.contains("license"))
this.setLicense(doc.get("license"));
+ /*
if (fields.contains("pages"))
this.setPages(doc.get("pages"));
-
+ */
if (fields.contains("biblEditionStatement"))
this.setBiblEditionStatement(doc.get("biblEditionStatement"));
if (fields.contains("fileEditionStatement"))
@@ -821,26 +823,26 @@
};
- /**
+ /*
* Get the page numbers of the text as a string.
*
* @return The page numbers of the text as a string.
- */
public String getPages () {
return this.pages;
};
+ */
- /**
+ /*
* Set the page numbers of the text as a string.
*
* @param pages
* The page numbers of the text as a string.
- */
+ *
public void setPages (String pages) {
this.pages = pages;
};
-
+ */
/**
* Get the file edition statement of the text as a string.
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 19f1ec5..89e839a 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -31,6 +31,8 @@
The character offset may need a special encoding in Lucene
To store the character offsets directly (not in the payloads),
to make this less messy and speed things up.
+
+ TODO: license is now called "availability"!
*/
/**
@@ -422,12 +424,13 @@
this.addString("license", license);
};
-
+ /*
@Override
public void setPages (String pages) {
super.setPages(pages);
this.addStored("pages", pages);
};
+ */
@Override
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillDocument.java
index 162e5eb..26f2202 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillDocument.java
@@ -102,8 +102,10 @@
krd.setLicense("cc");
assertEquals("cc", krd.getLicense());
- krd.setPages("56-78");
- assertEquals("56-78", krd.getPages());
+ /*
+ krd.setPages("56-78");
+ assertEquals("56-78", krd.getPages());
+ */
krd.setFileEditionStatement("no problemo 1");
assertEquals("no problemo 1", krd.getFileEditionStatement());
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index de69724..79fe59b 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -25,6 +25,7 @@
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.query.QueryBuilder;
import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.response.Match;
import de.ids_mannheim.korap.response.SearchContext;
@RunWith(JUnit4.class)
@@ -514,7 +515,7 @@
assertNull(fd.getTextTypeRef());
assertNull(fd.getTextColumn());
assertNull(fd.getTextDomain());
- assertEquals(fd.getPages(), "529-547");
+ // assertEquals(fd.getPages(), "529-547");
assertEquals(fd.getLicense(), "QAO-NC");
assertEquals(fd.getCreationDate().toString(), "18200000");
assertEquals(fd.getPubDate().toString(), "19820000");
@@ -571,6 +572,80 @@
assertEquals(25, kr.getItemsPerPage());
};
+
+ @Test
+ public void searchJSONwithPagebreaks () throws IOException {
+ // Construct index
+ KrillIndex ki = new KrillIndex();
+ // Indexing test files
+ FieldDocument fd = ki.addDoc(1,
+ getClass().getResourceAsStream("/goe/AGA-03828-pb.json.gz"), true);
+ ki.commit();
+
+ assertEquals(fd.getUID(), 1);
+ assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
+ assertEquals(fd.getDocSigle(), "GOE/AGA");
+ assertEquals(fd.getCorpusSigle(), "GOE");
+ assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
+ assertNull(fd.getSubTitle());
+ assertEquals(fd.getTextType(), "Autobiographie");
+ assertNull(fd.getTextTypeArt());
+ assertNull(fd.getTextTypeRef());
+ assertNull(fd.getTextColumn());
+ assertNull(fd.getTextDomain());
+ // assertEquals(fd.getPages(), "529-547");
+ // assertEquals(fd.getAvailability(), "QAO-NC");
+ assertEquals(fd.getCreationDate().toString(), "18200000");
+ assertEquals(fd.getPubDate().toString(), "19820000");
+ assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
+ assertNull(fd.getTextClass());
+ assertEquals(fd.getLanguage(), "de");
+ assertEquals(fd.getPubPlace(), "München");
+ assertEquals(fd.getReference(),
+ "Goethe, Johann Wolfgang von:"
+ + " Autobiographische Einzelheiten,"
+ + " (Geschrieben bis 1832), In: Goethe,"
+ + " Johann Wolfgang von: Goethes Werke,"
+ + " Bd. 10, Autobiographische Schriften"
+ + " II, Hrsg.: Trunz, Erich. München: "
+ + "Verlag C. H. Beck, 1982, S. 529-547");
+ assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
+ assertNull(fd.getEditor());
+ assertNull(fd.getFileEditionStatement());
+ assertNull(fd.getBiblEditionStatement());
+ assertNull(fd.getKeywords());
+
+ assertEquals(fd.getTokenSource(), "base#tokens_aggr");
+ assertEquals(fd.getFoundries(),
+ "dereko dereko/structure "+
+ "dereko/structure/base-sentences-paragraphs-pagebreaks");
+ assertEquals(fd.getLayerInfos(), "dereko/s=spans");
+
+ assertEquals(fd.getCorpusTitle(), "Goethes Werke");
+ assertNull(fd.getCorpusSubTitle());
+ assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
+ assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
+ assertEquals(fd.getDocTitle(),
+ "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
+ assertNull(fd.getDocSubTitle());
+ assertNull(fd.getDocEditor());
+ assertNull(fd.getDocAuthor());
+
+ Krill ks = new Krill(new QueryBuilder("tokens").seg("s:der"));
+ Result kr = ks.apply(ki);
+
+ assertEquals(kr.getTotalResults(), 97);
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
+
+ Match m = kr.getMatch(5);
+ assertEquals("Start page", m.getStartPage(), 529);
+
+ ObjectMapper mapper = new ObjectMapper();
+ JsonNode res = mapper.readTree(m.toJsonString());
+ assertEquals(529, res.at("/pages/0").asInt());
+ };
+
@Test
public void searchJSONnewJSON2 () throws IOException {
@@ -600,7 +675,7 @@
assertEquals(fd.getCreationDate().toString(), "19590219");
assertEquals(fd.getLicense(), "ACA-NC-LC");
assertEquals(fd.getTextColumn(), "POLITIK");
- assertNull(fd.getPages());
+ // assertNull(fd.getPages());
assertEquals(fd.getTextClass(), "politik ausland");
assertNull(fd.getFileEditionStatement());
assertNull(fd.getBiblEditionStatement());
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
index 7a70f13..dfc7030 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
@@ -178,7 +178,7 @@
assertEquals("Goethe: Maximen und Reflexionen, (1827-1842)",
res.at("/matches/0/docTitle").asText());
assertEquals("1827", res.at("/matches/0/creationDate").asText());
- assertEquals("372-377", res.at("/matches/0/pages").asText());
+ // assertEquals("372-377", res.at("/matches/0/pages").asText());
assertEquals("match-GOE_AGX.00002-p7-8",
res.at("/matches/0/matchID").asText());
@@ -211,7 +211,7 @@
assertEquals("Goethe: Maximen und Reflexionen, (1827-1842)",
res.at("/matches/0/docTitle").asText());
assertEquals("1827", res.at("/matches/0/creationDate").asText());
- assertEquals("372-377", res.at("/matches/0/pages").asText());
+ // assertEquals("372-377", res.at("/matches/0/pages").asText());
assertEquals("match-GOE_AGX.00002-p7-8",
res.at("/matches/0/matchID").asText());
};