Clean up for release and test MatchExport
Change-Id: Ib4b6cdb8b835cce03698fcfddeb6c4cf26f2079e
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/CsvExporter.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/CsvExporter.java
index 5e5b723..d5791d9 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/CsvExporter.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/CsvExporter.java
@@ -10,7 +10,7 @@
*/
public class CsvExporter extends MatchAggregator implements Exporter {
- private ObjectMapper mapper = new ObjectMapper();
+ private static final ObjectMapper mapper = new ObjectMapper();
@Override
public String getMimeType () {
@@ -22,8 +22,8 @@
public String getSuffix () {
return "csv";
};
+
-
@Override
public void writeHeader (Writer w) throws IOException {
this.addRecord(
@@ -43,15 +43,18 @@
);
};
+
@Override
public void addMatch (JsonNode n, Writer w) throws IOException {
MatchExport m = mapper.treeToValue(n, MatchExport.class);
Snippet s = m.getSnippetO();
- String left = s.getLeft();
- String mark = s.getMark();
- String right = s.getRight();
+ String left = s.getLeft(),
+ mark = s.getMark(),
+ right = s.getRight();
+ // For CSV export the Snippet
+ // fragments are trimmed
if (left != null)
left = left.trim();
@@ -78,7 +81,9 @@
};
- // Add a CSV row to the CSV stream
+ /*
+ * Add a CSV row to the CSV stream
+ */
private void addRecord (Writer w, String[] ss) throws IOException {
this.addCell(w , ss[0]);
for (int i = 1; i < 10; i++) {
@@ -89,7 +94,9 @@
};
- // Add a CSV cell to the CSV row
+ /*
+ * Add a CSV cell to the CSV row
+ */
private void addCell (Writer w, String s) throws IOException {
// If meta characters exist, make a quote
@@ -99,6 +106,9 @@
s.contains(" ") ||
s.contains("\t") ||
s.contains(";")) {
+
+ // Iterate over all characters
+ // and turn '"' into '""'.
w.append('"');
for (int i = 0; i < s.length(); i++) {
final char c = s.charAt(i);
@@ -112,6 +122,7 @@
w.append('"');
}
+ // No escaping required
else {
w.append(s);
};
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Exporter.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Exporter.java
index 122d03d..62cf5c3 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Exporter.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Exporter.java
@@ -1,13 +1,15 @@
package de.ids_mannheim.korap.plkexport;
+
import com.fasterxml.jackson.databind.JsonNode;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.ResponseBuilder;
import java.io.IOException;
import java.io.Writer;
-
import org.glassfish.jersey.media.sse.EventOutput;
-
+/**
+ * Exporter interface every exporter needs to satisfy.
+ */
interface Exporter {
// Implemented by MatchAggregator
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/JsonExporter.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/JsonExporter.java
index de4ac51..ded4068 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/JsonExporter.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/JsonExporter.java
@@ -66,21 +66,22 @@
w.append(',');
w.append("\"matches\":[");
- }
+ };
+
@Override
public void writeFooter (Writer w) throws IOException {
w.append("]}");
};
+
@Override
public void addMatch (JsonNode n, Writer w) throws IOException {
- if (firstMatch) {
+ if (firstMatch)
firstMatch = false;
- }
- else {
+ else
w.append(',');
- };
+
w.append(n.toString());
return;
};
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchAggregator.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchAggregator.java
index ca924e1..8535262 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchAggregator.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchAggregator.java
@@ -33,46 +33,82 @@
* Base class for collecting matches and header information
* for exporters implementing the Exporter interface.
*/
-
public class MatchAggregator {
private final Properties prop = ExWSConf.properties(null);
- private ObjectMapper mapper = new ObjectMapper();
+ private static final ObjectMapper mapper = new ObjectMapper();
+ // In-memory and persistant writer for data
private Writer writer;
-
private File file;
-
+
+ // Meta information for result exports
private JsonNode meta, query, collection;
private String fname, queryString, corpusQueryString, src;
private boolean timeExceeded = false;
- private int totalResults = -1;
- private int maxResults = -1;
- private int fetchedResults = 0;
-
+
+ // Result calculations (partially for progress)
+ private int totalResults = -1,
+ maxResults = -1,
+ fetchedResults = 0;
+
+ // Event writer for progress
private EventOutput evOut;
-
+
+
+ /**
+ * MimeType of the exporter -
+ * defaults to "text/plain" but
+ * should be overwritten.
+ */
public String getMimeType() {
return "text/plain";
};
+
+ /**
+ * Suffix of the exported file -
+ * defaults to "txt" but
+ * should be overwritten.
+ */
public String getSuffix() {
return "txt";
};
+
+ /**
+ * Total results of exportable matches.
+ */
public int getTotalResults() {
return this.totalResults;
};
+
+ /**
+ * Indicator if time was exceeded when
+ * fetching all matches. This means
+ * that "totalResults" needs
+ * to be treated as a minimum value.
+ */
public boolean hasTimeExceeded() {
return this.timeExceeded;
};
+
+ /**
+ * Set the file name of the file to
+ * be exported.
+ */
public void setFileName (String fname) {
this.fname = fname;
};
+
+ /**
+ * Get the file name of the file to
+ * be exported.
+ */
public String getFileName () {
String s = this.fname;
if (s == null)
@@ -82,22 +118,42 @@
return sanitizeFileName(s);
};
+
+ /**
+ * Set the query string.
+ */
public void setQueryString (String query) {
this.queryString = query;
};
+
+ /**
+ * Get the query string.
+ */
public String getQueryString () {
return this.queryString;
};
+
+ /**
+ * Set the corpus query string.
+ */
public void setCorpusQueryString (String query) {
this.corpusQueryString = query;
};
+
+ /**
+ * Get the corpus query string.
+ */
public String getCorpusQueryString () {
return this.corpusQueryString;
};
+
+ /**
+ * Set the source information.
+ */
public void setSource (String host, String path) {
StringBuilder s = new StringBuilder(32);
if (host != null)
@@ -109,49 +165,94 @@
this.src = s.toString();
};
+
+ /**
+ * Get the source information.
+ */
public String getSource () {
return this.src;
};
+
+ /**
+ * Set the meta JSON blob.
+ */
public void setMeta (JsonNode meta) {
this.meta = meta;
};
+
+ /**
+ * Get the meta JSON blob.
+ */
public JsonNode getMeta () {
return this.meta;
};
+
+ /**
+ * Set the query JSON blob.
+ */
public void setQuery (JsonNode query) {
this.query = query;
};
- // Needs to be set before first addMatch
- public void setMaxResults (int maxResults) {
- this.maxResults = maxResults;
- };
-
- public int getMaxResults () {
- return this.maxResults;
- };
+ /**
+ * Get the query JSON blob.
+ */
public JsonNode getQuery () {
return this.query;
};
+
+ /**
+ * Set the collection JSON blob.
+ */
public void setCollection (JsonNode collection) {
this.collection = collection;
};
+
+ /**
+ * Get the collection JSON blob.
+ */
public JsonNode getCollection () {
return this.collection;
};
+
+ /**
+ * Set the maximum results to be fetched.
+ *
+ * This needs to be set prior to the first
+ * "addMatch" so it can be taken into account.
+ */
+ public void setMaxResults (int maxResults) {
+ this.maxResults = maxResults;
+ };
+
+
+ /**
+ * Get the maximum results to be fetched.
+ */
+ public int getMaxResults () {
+ return this.maxResults;
+ };
+
+
+ /**
+ * Get the export ID which is the pointer
+ * to where the system can find the temporary
+ * generated file.
+ */
public String getExportID () {
if (this.file == null)
return "";
return this.file.getName();
};
+
/**
* Set the file based on the export ID
*/
@@ -161,64 +262,43 @@
exportID
);
}
+
+ /**
+ * Write header for exportation.
+ *
+ * Should be overwritten.
+ */
public void writeHeader (Writer w) throws IOException { };
+
+
+ /**
+ * Write footer for exportation.
+ *
+ * Should be overwritten.
+ */
public void writeFooter (Writer w) throws IOException { };
+
+
+ /**
+ * Write a single match.
+ *
+ * Should be overwritten.
+ */
public void addMatch (JsonNode n, Writer w) throws IOException { };
+
+ /**
+ * Set the event stream for progress feedback.
+ */
public void setSse (EventOutput eventOutput) {
this.evOut = eventOutput;
};
-
- private File getFileDirectory () {
-
- String fileDir = prop.getProperty(
- "conf.file_dir",
- System.getProperty("java.io.tmpdir")
- );
-
- File dir = new File(fileDir);
-
- // Create directory if not yet existing
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- else if (!dir.canWrite()) {
- fileDir = System.getProperty("java.io.tmpdir");
- System.err.println("Unable to write to directory");
- System.err.println("Fallback to " + fileDir);
- dir = new File(fileDir);
- };
- return dir;
- };
- // Send the progress
- private void sendProgress () {
-
- if (this.evOut == null || this.maxResults == 0)
- return;
-
- if (this.evOut.isClosed())
- return;
-
- int calc = (int) Math.ceil(((double) this.fetchedResults / this.maxResults) * 100);
-
- final OutboundEvent.Builder eventBuilder = new OutboundEvent.Builder();
- eventBuilder.name("Progress");
- eventBuilder.data(String.valueOf(calc));
-
- try {
- this.evOut.write(eventBuilder.build());
- } catch (IOException e) {
- return;
- };
- };
-
/**
- * Force creation of a file, even when only a few
- * matches are requested.
+ * Force the creation of a file, even when only
+ * a few matches are requested.
*/
public void forceFile () {
@@ -235,34 +315,34 @@
dir
);
- // better delete after it is not needed anymore
- // this.file.deleteOnExit();
-
String s = null;
+ // Take temporary data from the in-memory writer
if (writer != null)
s = writer.toString();
- // Establish writer
+ // Establish persistant writer
writer = new BufferedWriter(new FileWriter(this.file, true));
- // Add in memory string
+ // Add in-memory string
if (s != null)
writer.write(s);
}
- catch (IOException e) {
- // Will rely on in-memory data
+ // If data can't be stored on disk, the writer will
+ // rely on in-memory data, which may or may not work in
+ // different contexts.
+ catch (IOException e) {
return;
};
};
- };
-
+ };
+
/**
- * Create new match aggregator and parse initial Json
- * file to get header information and initial matches.
+ * Parse initial JSON file to get header information
+ * and initial matches.
*/
public boolean init (String resp) throws IOException, JsonParseException {
@@ -270,15 +350,15 @@
return false;
JsonParser parser = mapper.getFactory().createParser(resp);
- JsonNode actualObj = mapper.readTree(parser);
+ JsonNode root = mapper.readTree(parser);
- if (actualObj == null)
+ if (root == null)
return false;
- JsonNode meta = actualObj.get("meta");
+ JsonNode meta = root.get("meta");
this.setMeta(meta);
- this.setQuery(actualObj.get("query"));
- this.setCollection(actualObj.get("collection"));
+ this.setQuery(root.get("query"));
+ this.setCollection(root.get("collection"));
if (meta != null) {
if (meta.has("totalResults")) {
@@ -296,14 +376,24 @@
writer = new StringWriter();
};
+ // Write header to exporter
this.writeHeader(writer);
- return this.iterateThroughMatches(
- actualObj.get("matches")
- );
+ // Go on by iterating through matches
+ return this.iterateThroughMatches(root.get("matches"));
};
+
+ /**
+ * Finalize the export stream.
+ */
+ public Exporter finish() throws IOException {
+ this.writeFooter(this.writer);
+ this.writer.close();
+ return (Exporter) this;
+ };
+
/**
* Append more matches to the result set.
*/
@@ -313,26 +403,15 @@
this.forceFile();
JsonParser parser = mapper.getFactory().createParser(resp);
- JsonNode actualObj = mapper.readTree(parser);
+ JsonNode root = mapper.readTree(parser);
- if (actualObj == null)
+ if (root == null)
return false;
- return this.iterateThroughMatches(
- actualObj.get("matches")
- );
+ return this.iterateThroughMatches(root.get("matches"));
};
- /**
- * Finalize the export stream.
- */
- public Exporter finish() throws IOException {
- this.writeFooter(this.writer);
- this.writer.close();
- return (Exporter) this;
- };
-
/**
* Serve response entity, either as a string or as a file.
*/
@@ -366,10 +445,14 @@
);
};
+
+ /*
+ * Iterate through all matches
+ */
+ private boolean iterateThroughMatches (JsonNode mNodes)
+ throws IOException {
- // Iterate through all matches
- private boolean iterateThroughMatches (JsonNode mNodes) throws IOException {
-
+ // Send progress information
this.sendProgress();
if (mNodes == null)
@@ -378,6 +461,8 @@
// Iterate over the results of the current file
Iterator<JsonNode> mNode = mNodes.elements();
while (mNode.hasNext()) {
+
+ // Stop if all relevant matches are fetched
if (this.maxResults > 0 &&
this.fetchedResults >= this.maxResults) {
return false;
@@ -387,4 +472,58 @@
};
return true;
};
+
+
+ /*
+ * Get the directory where all temporary files are stored.
+ */
+ private File getFileDirectory () {
+
+ String fileDir = prop.getProperty(
+ "conf.file_dir",
+ System.getProperty("java.io.tmpdir")
+ );
+
+ File dir = new File(fileDir);
+
+ // Create directory if not yet existing
+ if (!dir.exists()) {
+ dir.mkdir();
+ }
+
+ // Directory is unwritable - fallback
+ else if (!dir.canWrite()) {
+ fileDir = System.getProperty("java.io.tmpdir");
+ System.err.println("Unable to write to directory");
+ System.err.println("Fallback to " + fileDir);
+ dir = new File(fileDir);
+ };
+ return dir;
+ };
+
+
+ /*
+ * Send a single progress event to the event stream.
+ */
+ private void sendProgress () {
+
+ if (this.evOut == null || this.maxResults == 0)
+ return;
+
+ if (this.evOut.isClosed())
+ return;
+
+ int calc = (int) Math.ceil(((double) this.fetchedResults / this.maxResults) * 100);
+
+ final OutboundEvent.Builder eventBuilder = new OutboundEvent.Builder();
+ eventBuilder.name("Progress");
+ eventBuilder.data(String.valueOf(calc));
+
+ try {
+ this.evOut.write(eventBuilder.build());
+ }
+ catch (IOException e) {
+ return;
+ };
+ };
};
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchExport.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchExport.java
index b2275fe..0c45b48 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchExport.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchExport.java
@@ -2,74 +2,73 @@
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+/**
+ * Representation of a match.
+ */
@JsonIgnoreProperties(ignoreUnknown = true)
public class MatchExport {
- private String textSigle;
- private String author;
- private String pubDate;
+ private String textSigle,
+ author,
+ pubDate,
+ snippet,
+ title;
+
private Snippet snippeto;
- private String snippet;
- private String title;
-
+ /**
+ * Get author of the match.
+ */
public String getAuthor () {
return author;
- }
+ };
- public void setAuthor (String author) {
- this.author = author;
- }
-
-
+ /**
+ * Get textSigle of the match.
+ */
public String getTextSigle () {
return textSigle;
- }
+ };
-
- public void setTextSigle (String textSigle) {
- this.textSigle = textSigle;
- }
-
-
- public Snippet getSnippetO () {
- return snippeto;
- }
-
-
- public void setSnippetO (Snippet snippet) {
- this.snippeto = snippet;
- }
-
-
- public String getSnippet () {
- return snippet;
- }
-
-
- public void setSnippet (String snippet) {
- this.snippet = snippet;
- this.snippeto = new Snippet(this.snippet);
- }
-
-
+
+ /**
+ * Get title of the match.
+ */
public String getTitle () {
return title;
- }
+ };
-
- public void setTitle (String title) {
- this.title = title;
- }
-
-
+
+ /**
+ * Get publication date of the match.
+ */
public String getPubDate () {
return pubDate;
- }
+ };
+
+ /**
+ * Get snippet object of the match.
+ */
+ public Snippet getSnippetO () {
+ return snippeto;
+ };
+
- public void setPubDate (String pubDate) {
- this.pubDate = pubDate;
- }
-}
+ /**
+ * Get snippet string of the match.
+ */
+ public String getSnippetString () {
+ return snippet;
+ };
+
+
+ /*
+ * Override setter object.
+ */
+ private void setSnippet (String snippet) {
+ this.snippet = snippet;
+ this.snippeto = new Snippet(this.snippet);
+ };
+};
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/PluginServer.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/PluginServer.java
index ecd59a5..a0d2c83 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/PluginServer.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/PluginServer.java
@@ -14,6 +14,9 @@
import org.glassfish.jersey.servlet.ServletContainer;
+/**
+ * Server to provide the export web service
+ */
public class PluginServer {
public static void main (String[] args) throws Exception {
@@ -24,7 +27,7 @@
Properties properties = ExWSConf.properties(null);
- //Default: Server is available under http://localhost:7070/
+ // Default: Server is available under http://localhost:7070/
String portStr = properties.getProperty("server.port", "7070");
String host = properties.getProperty("server.host", "localhost");
int port = Integer.parseInt(portStr);
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java
index 66f22b0..9f0fc74 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java
@@ -18,19 +18,14 @@
/**
* Streaming RTF exporter.
*/
-/*
- * TODO:
- * - Create a template
- */
public class RtfExporter extends MatchAggregator implements Exporter {
- private static final String HLINE = "{\\pard\\brdrb\\brdrs\\brdrw2\\brsp20\\par}\n";
+ // Horizontal line
+ private static final String HLINE =
+ "{\\pard\\brdrb\\brdrs\\brdrw2\\brsp20\\par}\n";
- private boolean firstMatch;
+ private static final ObjectMapper mapper = new ObjectMapper();
- private ObjectMapper mapper = new ObjectMapper();
-
- // final static Charset charset = Charset.forName("Windows-1252");
final static CharsetEncoder charsetEncoder =
Charset
.forName("Windows-1252")
@@ -38,12 +33,6 @@
.onMalformedInput(REPORT)
.onUnmappableCharacter(REPORT);
- StringBuilder sb;
-
- {
- firstMatch = true;
- sb = new StringBuilder(256);
- }
@Override
public String getMimeType () {
@@ -61,13 +50,16 @@
public void writeHeader (Writer w) throws IOException {
w.append("{")
.append("\\rtf1\\ansi\\deff0\n")
+
+ // Color table
.append("{\\colortbl;\\red0\\green0\\blue0;\\red127\\green127\\blue127;\\red255\\green255\\blue255;}\n")
+
+ // Font table
.append("{\\fonttbl{\\f0\\fcharset0 Times New Roman;}{\\f1\\fcharset1 Courier;}}\n");
+ // Footer on every page, containing the page number
w.append("{\\footer\\pard\\qr\\fs18\\f0 ");
rtfText(w, "@ Institut für Deutsche Sprache, Mannheim");
-
- // Page number
w.append(" \\endash \\chpgn /{\\field{\\*\\fldinst{\\fs18\\f0 NUMPAGES}}}");
w.append("\\par}\n");
@@ -80,6 +72,7 @@
w.append("\n{\\pard \\par}\n");
+ // Add info table
this.addInfoTable(w);
};
@@ -145,20 +138,25 @@
};
};
+
+ /*
+ * Table with meta information about the export.
+ */
private void addInfoTable (Writer w) throws IOException {
+ // Query information
String q = this.getQueryString();
-
- // Add Information table
if (q != null && q.length() > 0) {
this.addInfoRow(w, "Query", this.getQueryString());
};
+ // Corpus query information
q = this.getCorpusQueryString();
if (q != null && q.length() > 0) {
this.addInfoRow(w, "Corpus", q);
};
+ // Match count information
if (this.getTotalResults() != -1) {
StringBuilder str = new StringBuilder(32);
if (this.hasTimeExceeded()) {
@@ -172,16 +170,19 @@
this.addInfoRow(w, "Count", str.toString());
};
+ // Fetched match count information
if (this.getTotalResults() == -1 ||
this.getTotalResults() > this.getMaxResults()) {
this.addInfoRow(w, "Fetched", this.getMaxResults());
};
+ // Source information
q = this.getSource();
if (q != null && q.length() > 0) {
this.addInfoRow(w, "Source", q);
};
+ // Version information
if (this.getMeta() != null && this.getMeta().has("version")) {
this.addInfoRow(w, "Backend-Version", this.getMeta().get("version").asText());
};
@@ -190,14 +191,20 @@
};
- // Add information row
+ /*
+ * Add information table row
+ */
private void addInfoRow (Writer w, String title, int value) throws IOException {
this.addInfoRow(w, title, Integer.toString(value));
};
- // Add information row
+ /*
+ * Add information tablerow
+ */
private void addInfoRow (Writer w, String title, String value) throws IOException {
+
+ // Some border and color informations
w.append("{\\trowd\\trql\\lttrow")
.append("\\clbrdrt\\brdrs\\clbrdrl\\brdrs\\clbrdrb\\brdrs")
.append("\\clpadl80\\clpadt80\\clpadb80\\clpadr80\\clcbpat2\\cellx2000")
@@ -210,7 +217,11 @@
w.append("\\cell\\row}\n");
};
- // Get version for RTF document
+
+ /*
+ * Get version of the plugin
+ * (maybe read from pom?)
+ */
private Version getVersion () {
return new Version(
ExWSConf.VERSION_MAJOR,
@@ -222,7 +233,12 @@
);
};
- // Based on jrtf by Christian Ullenboom
+
+ /*
+ * Convert a string to RTF compliant encoding.
+ *
+ * Based on jrtf by Christian Ullenboom
+ */
private static void rtfText(Writer w, String rawText) throws IOException {
char c;
for (int i = 0; i < rawText.length(); i++) {
@@ -262,6 +278,6 @@
catch (CharacterCodingException err) {
};
};
- }
+ };
};
};
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Service.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Service.java
index 59f6807..dcedfc6 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Service.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Service.java
@@ -56,16 +56,48 @@
import freemarker.template.Template;
/**
- * TODO:
+ * TODO for release:
+ * - Rename to "Kalamar-Plugin-Export".
+ * - Remove 'plugin' root folder.
+ * - Localize.
* - Delete the temp file of the export at the end
- * of the serving.
- * - Do not expect all meta data per match.
+ * of serving.
+ * - Add date info.
+ * - Add opaque source, in case source is an internal IP.
+ * - Change to "Leibniz-Institut" in Copyright notice.
+ * - Improve Readme.
+ * - Initialize progress bar with indeterminate state.
+ * - 100 matches as default for export form.
+ * - Rename MatchExport to Match.
+ * - Rename MatchExport/SnippetO to Snippet.
+ * - Test ExWsConf.
+ *
+ * TODO:
* - Abort processing when eventsource is closed.
+ * - Do not expect all meta data per match.
* - Upgrade default pageSize to 50.
* - Add loading marker.
- * - Add hitc to form.
+ * - Use a central logging mechanism.
* - Add infos to JsonExporter.
- * - Add date info.
+ * - Check pageSize after init (so pageSize is not
+ * greater than what the server supports).
+ * - Restrict CORS to meaningful sources.
+ * - Add arbitrary information for RTF header
+ * - Add Citation information.
+ * - Add information regarding max_exp_limit
+ * to export form.
+ * - Maybe set matches from parent window
+ * (if available) as export default (if
+ * smaller than max_exp_limit)
+ * - IDS-internal user should be allowed 100.000
+ * matches per export, while external users
+ * should be limited to 10.000.
+ * - Add 1000-separator to numbers.
+ *
+ * IDEAS:
+ * - Create a template mechanism for RTF export.
+ * - Prettify VC in RTF export (maybe similar to
+ * the visualisation in Kalamar)
*/
@Path("/")
@@ -97,8 +129,11 @@
@Context
private HttpServletRequest req;
- // Private method to run the export,
- // either static or streaming
+
+ /*
+ * Private method to run the export,
+ * either static or streaming
+ */
private Exporter export(String fname,
String format,
String q,
@@ -109,14 +144,14 @@
EventOutput eventOutput
) throws WebApplicationException {
- // These parameters are required
+ // These parameters are mandatory
String[][] params = {
{ "format", format },
{ "q", q },
{ "ql", ql }
};
- // Check that all parameters are available
+ // Check that all mandatory parameters are available
for (int i = 0; i < params.length; i++) {
if (params[i][1] == null || params[i][1].trim().isEmpty())
throw new WebApplicationException(
@@ -160,23 +195,23 @@
.port(Integer.parseInt(port))
.scheme(scheme)
.queryParam("q", q)
- // .queryParam("context", "sentence")
- .queryParam("context", "40-t,40-t") // Not yet supported
+ .queryParam("context", "40-t,40-t")
.queryParam("ql", ql)
.queryParam("count", pageSize)
;
+ // Not yet supported:
+ // .queryParam("context", "sentence")
+
if (cq != null && cq.length() > 0)
uri = uri.queryParam("cq", cq);
- if (path != "") {
+ if (path != "")
uri = uri.path(path);
- };
// Get client IP, in case service is behind a proxy
- String xff = "";
// Get auth (temporarily) via Session riding
- String auth = "";
+ String xff = "", auth = "";
if (req != null) {
xff = getClientIP(req.getHeader("X-Forwarded-For"));
if (xff == "")
@@ -193,34 +228,44 @@
resource = client.target(uri.build());
reqBuilder = resource.request(MediaType.APPLICATION_JSON);
resp = authBuilder(reqBuilder, xff, auth).get(String.class);
-
- } catch (Exception e) {
+ }
+
+ catch (Exception e) {
throw new WebApplicationException(
responseForm(Status.BAD_GATEWAY, "Unable to reach Backend")
);
}
+ // Get and initialize exporter based on requested format
Exporter exp = getExporter(format);
exp.setMaxResults(maxResults);
exp.setQueryString(q);
exp.setCorpusQueryString(cq);
exp.setSource(host, path);
- // set filename based on query (if not already set)
- if (fname != null) {
+ // Set filename
+ if (fname != null)
exp.setFileName(fname);
- };
- // set progress mechanism, if required
+ // Set progress mechanism, if passed
if (eventOutput != null) {
exp.setSse(eventOutput);
+
+ // Progress requires the creation
+ // of temporary files
exp.forceFile();
};
- // Initialize exporter (with meta data and first matches)
+ // Initialize export with meta data
+ // and first matches
try {
+
+ // TODO:
+ // Check return value.
exp.init(resp);
- } catch (Exception e) {
+ }
+
+ catch (Exception e) {
throw new WebApplicationException(
responseForm(
Status.INTERNAL_SERVER_ERROR,
@@ -231,24 +276,26 @@
// Calculate how many results to fetch
int fetchCount = exp.getTotalResults();
- if (exp.hasTimeExceeded() || fetchCount > maxResults) {
+ if (exp.hasTimeExceeded() || fetchCount > maxResults)
fetchCount = maxResults;
- }
- // fetchCount may be different to maxResults now, so reset after init
+ // fetchCount may be different to maxResults now,
+ // so reset after init (for accurate progress)
exp.setMaxResults(fetchCount);
- // The first page was already enough - ignore paging
- if (fetchCount <= pageSize) {
- cutoff = true;
- };
-
// If only one page should be exported there is no need
- // for a temporary export file
- if (cutoff) {
+ // for a temporary export file, unless progress is
+ // requested. In case all matches are already fetched,
+ // stop here as well.
+ if (cutoff || fetchCount <= pageSize) {
+
try {
+
+ // Close all export writers
exp.finish();
- } catch (Exception e) {
+ }
+
+ catch (Exception e) {
throw new WebApplicationException(
responseForm(
Status.INTERNAL_SERVER_ERROR,
@@ -259,7 +306,9 @@
return exp;
};
- // Page through all results
+ /*
+ * Page through all results
+ */
// It's not important anymore to get totalResults
uri.queryParam("cutoff", "true");
@@ -271,6 +320,7 @@
// Iterate over all results
for (int i = pageSize; i <= fetchCount; i+=pageSize) {
+
resource = client.target(uri.build(i));
reqBuilder = resource.request(MediaType.APPLICATION_JSON);
resp = authBuilder(reqBuilder, xff, auth).get(String.class);
@@ -280,9 +330,12 @@
break;
}
+ // Close all export writers
exp.finish();
- } catch (Exception e) {
+ }
+
+ catch (Exception e) {
throw new WebApplicationException(
responseForm(
Status.INTERNAL_SERVER_ERROR,
@@ -296,22 +349,25 @@
/**
- * WebService calls Kustvakt Search Webservices and returns
- * response as json (all of the response) and
- * as rtf (matches)
+ * WebService that retrieves data from the Kustvakt
+ * Webservice and returns response in different formats.
+ *
+ * Returns an octet stream.
*
* @param fname
* file name
* @param format
- * the file format value rtf or json.
+ * the file format value
* @param q
* the query
+ * @param cq
+ * the corpus query
* @param ql
* the query language
* @param cutoff
- * Export more than the first page
- *
- *
+ * Only export the first page
+ * @param hitc
+ * Number of matches to fetch
*/
@POST
@Path("export")
@@ -324,7 +380,6 @@
@FormParam("ql") String ql,
@FormParam("cutoff") String cutoffStr,
@FormParam("hitc") int hitc
- // @FormParam("islimit") String il
) throws IOException {
Exporter exp = export(fname, format, q, cq, ql, cutoffStr, hitc, null);
@@ -334,8 +389,25 @@
/**
- * Progress based counterpart to staticExport,
- * that requires a GET due to the JavaScript API.
+ * WebService that retrieves data from the Kustvakt
+ * Webservice and returns response in different formats.
+ *
+ * Returns an event stream.
+ *
+ * @param fname
+ * file name
+ * @param format
+ * the file format value
+ * @param q
+ * the query
+ * @param cq
+ * the corpus query
+ * @param ql
+ * the query language
+ * @param cutoff
+ * Only export the first page
+ * @param hitc
+ * Number of matches to fetch
*/
@GET
@Path("export")
@@ -351,10 +423,11 @@
@QueryParam("hitc") int hitc
) throws InterruptedException {
- // https://www.baeldung.com/java-ee-jax-rs-sse
- // https://www.howopensource.com/2016/01/java-sse-chat-example/
- // https://csetutorials.com/jersey-sse-tutorial.html
- // https://eclipse-ee4j.github.io/jersey.github.io/documentation/latest/sse.html
+ // See
+ // https://www.baeldung.com/java-ee-jax-rs-sse
+ // https://www.howopensource.com/2016/01/java-sse-chat-example/
+ // https://csetutorials.com/jersey-sse-tutorial.html
+ // https://eclipse-ee4j.github.io/jersey.github.io/documentation/latest/sse.html
final EventOutput eventOutput = new EventOutput();
@@ -362,7 +435,9 @@
if (eventOutput.isClosed())
return Response.ok("EventSource closed").build();
- new Thread(new Runnable() {
+ new Thread(
+ new Runnable() {
+
@Override
public void run() {
final OutboundEvent.Builder eventBuilder = new OutboundEvent.Builder();
@@ -371,14 +446,7 @@
eventBuilder.data("init");
eventOutput.write(eventBuilder.build());
Exporter exp = export(
- fname,
- format,
- q,
- cq,
- ql,
- cutoffStr,
- hitc,
- eventOutput
+ fname, format, q, cq, ql, cutoffStr, hitc, eventOutput
);
if (eventOutput.isClosed())
@@ -388,17 +456,26 @@
eventBuilder.data(exp.getExportID() + ";" + exp.getFileName());
eventOutput.write(eventBuilder.build());
- } catch (Exception e) {
+ }
+
+ catch (Exception e) {
try {
if (eventOutput.isClosed())
return;
+
eventBuilder.name("Error");
eventBuilder.data(e.getMessage());
eventOutput.write(eventBuilder.build());
- } catch (IOException ioe) {
- throw new RuntimeException("Error when writing event output.", ioe);
+ }
+
+ catch (IOException ioe) {
+ throw new RuntimeException(
+ "Error when writing event output.", ioe
+ );
};
- } finally {
+ }
+
+ finally {
try {
if (eventOutput.isClosed())
return;
@@ -407,8 +484,12 @@
eventBuilder.data("done");
eventOutput.write(eventBuilder.build());
eventOutput.close();
- } catch (IOException ioClose) {
- throw new RuntimeException("Error when closing the event output.", ioClose);
+ }
+
+ catch (IOException ioClose) {
+ throw new RuntimeException(
+ "Error when closing the event output.", ioClose
+ );
}
};
return;
@@ -422,8 +503,15 @@
/**
- * This is the relocation target to which the event
+ * Relocation target to which the event
* stream points to.
+ *
+ * Returns an octet stream.
+ *
+ * @param fname
+ * file name
+ * @param file
+ * the file to fetch
*/
@GET
@Path("export/{file}")
@@ -434,12 +522,13 @@
) {
String format = getExtension(fileStr);
-
+
// Get exporter object
Exporter exp = getExporter(format);
- if (fname != null) {
+
+ if (fname != null)
exp.setFileName(fname);
- };
+
exp.setFile(fileStr);
// Return without init
@@ -447,6 +536,11 @@
};
+ /**
+ * The export form.
+ *
+ * Returns a HTML file.
+ */
@GET
@Path("export")
@Produces(MediaType.TEXT_HTML)
@@ -455,6 +549,11 @@
};
+ /**
+ * The export script.
+ *
+ * Returns a static JavaScript file.
+ */
@GET
@Path("export.js")
@Produces("application/javascript")
@@ -464,7 +563,10 @@
.build();
};
- // Get exporter by format
+
+ /*
+ * Get exporter object by format
+ */
private Exporter getExporter (String format) {
// Choose the correct exporter
if (format.equals("json"))
@@ -476,22 +578,28 @@
};
- // Decorate request with auth headers
- private Invocation.Builder authBuilder (Invocation.Builder reqBuilder,
- String xff,
- String auth) {
- if (xff != "") {
+ /*
+ * Decorate request with auth headers
+ */
+ private Invocation.Builder authBuilder (
+ Invocation.Builder reqBuilder,
+ String xff,
+ String auth
+ ) {
+
+ if (xff != "")
reqBuilder = reqBuilder.header("X-Forwarded-For", xff);
- };
- if (auth != "") {
+
+ if (auth != "")
reqBuilder = reqBuilder.header("Authorization", auth);
- };
return reqBuilder;
};
- // Get authorization token from cookie
+ /*
+ * Get authorization token from cookie
+ */
private String authFromCookie (HttpServletRequest r) {
// This is a temporary solution using session riding - only
@@ -507,14 +615,12 @@
for (int i = 0; i < cookies.length; i++) {
// Check the valid path
- if (cookiePath != "" && cookies[i].getPath() != cookiePath) {
+ if (cookiePath != "" && cookies[i].getPath() != cookiePath)
continue;
- };
// Ignore irrelevant cookies
- if (!cookies[i].getName().matches("^kalamar(-.+?)?$")) {
+ if (!cookies[i].getName().matches("^kalamar(-.+?)?$"))
continue;
- };
// Get the value
String b64 = cookies[i].getValue();
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java
index 3af898d..a5f2ef0 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Snippet.java
@@ -3,22 +3,32 @@
import java.util.regex.Pattern;
import java.util.regex.Matcher;
+/**
+ * Representation of a match snippet.
+ */
public class Snippet {
private String left, right, mark;
private boolean leftMore, rightMore, cuttedMark;
+ // Pattern to get Snippet match and contexts
private static Pattern snippetP =
Pattern.compile("^(?i)<span[^>]+class=\"(?:[^\"]* )?context-left(?:[^\"]* )?\">(.*?)</span>" +
"<span[^>]+class=\"(?:[^\"]* )?match(?:[^\"]* )?\">(.+?)</span>" +
"<span[^>]+class=\"(?:[^\"]* )?context-right(?:[^\"]* )?\">(.*?)</span>$");
+ // Pattern to check if more context is available
private static Pattern moreP =
Pattern.compile("(?i)<span[^>]+class=\"more\"></span>");
+ // Pattern to check if the match is actually larger, but was cutted down
private static Pattern cuttedP =
Pattern.compile("(?i)<span[^>]+class=\"cutted\"></span>");
-
+
+
+ /**
+ * Constructor for Snippet parsing
+ */
public Snippet (String snippetstr) {
// Match with precise algorithm
@@ -35,7 +45,7 @@
left = m.replaceAll("");
this.leftMore = true;
};
- this.setLeft(unescapeHTML(left));
+ this.left = unescapeHTML(left);
};
m = cuttedP.matcher(mark);
@@ -44,7 +54,7 @@
this.cuttedMark = true;
};
- this.setMark(unescapeHTML(mark.replaceAll("</?mark[^>]*>", "")));
+ this.mark = unescapeHTML(mark.replaceAll("</?mark[^>]*>", ""));
if (right != null) {
m = moreP.matcher(right);
@@ -52,72 +62,80 @@
right = m.replaceAll("");
this.rightMore = true;
};
- this.setRight(unescapeHTML(right));
+ this.right = unescapeHTML(right);
};
}
- // Simpler mark-split algorithm
+ // Simpler mark-split algorithm, mainly used for testing
else {
String[] splitted = snippetstr
.replaceAll("(?i)</?span[^>]*>","")
.split("(?i)</?mark[^>]*>");
if (splitted[0] != null) {
- this.setLeft(splitted[0]);
+ this.left = splitted[0];
};
if (splitted[1] != null) {
- this.setMark(splitted[1]);
+ this.mark = splitted[1];
};
if (splitted[2] != null) {
- this.setRight(splitted[2]);
+ this.right = splitted[2];
};
return;
};
- }
+ };
+
+ /**
+ * Get the left context
+ */
public String getLeft () {
return left;
- }
+ };
- public void setLeft (String left) {
- this.left = left;
- }
-
-
+ /**
+ * Get the right context.
+ */
public String getRight () {
return right;
- }
+ };
- public void setRight (String right) {
- this.right = right;
- }
-
-
+ /**
+ * Get the marked match.
+ */
public String getMark () {
return mark;
- }
+ };
- public void setMark (String mark) {
- this.mark = mark;
- }
-
-
+ /**
+ * Get information if there is more context to the left.
+ */
public boolean hasMoreLeft () {
return leftMore;
};
+ /**
+ * Get information if there is more context to the right.
+ */
public boolean hasMoreRight () {
return rightMore;
};
+ /**
+ * Get information if the match was cutted.
+ */
public boolean isCutted () {
return cuttedMark;
};
-
+
+
+ /*
+ * Unescape HTML entities.
+ */
private static String unescapeHTML (String text) {
if (text == null)
return "";
@@ -129,4 +147,4 @@
.replace(">", ">")
.replace("&", "&");
};
-}
+};
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Util.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Util.java
index cc024e9..169615d 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Util.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Util.java
@@ -3,7 +3,14 @@
import java.io.IOException;
import java.io.*;
+/**
+ * Utility class to provide helper functions.
+ */
public class Util {
+
+ /**
+ * Sanitize a file name to not containe invalid characters.
+ */
public static String sanitizeFileName (String fname) {
return fname
.replaceAll("[^\\p{L}0-9\\(\\)\\-\\_]", "-")
@@ -15,6 +22,9 @@
;
};
+ /**
+ * Create a string representation of an inputstream.
+ */
public static String streamToString (InputStream in) {
StringBuilder sb = new StringBuilder();
@@ -25,7 +35,9 @@
while ((line = br.readLine()) != null) {
sb.append(line + System.lineSeparator());
}
- } catch (IOException e) {
+ }
+
+ catch (IOException e) {
e.printStackTrace();
}
diff --git a/plugin/src/test/java/de/ids_mannheim/korap/plkexport/MatchTest.java b/plugin/src/test/java/de/ids_mannheim/korap/plkexport/MatchTest.java
new file mode 100644
index 0000000..6ab922b
--- /dev/null
+++ b/plugin/src/test/java/de/ids_mannheim/korap/plkexport/MatchTest.java
@@ -0,0 +1,45 @@
+package de.ids_mannheim.korap.plkexport;
+
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.fail;
+import org.junit.Test;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.core.JsonProcessingException;
+
+import de.ids_mannheim.korap.plkexport.MatchExport;
+
+public class MatchTest {
+
+ private static final ObjectMapper mapper = new ObjectMapper();
+
+ @Test
+ public void testSimple () throws JsonProcessingException, IOException {
+
+ String match = "{\"author\":\"Goethe\","+
+ "\"title\":\"Title1\","+
+ "\"pubDate\":\"20051103\","+
+ "\"textSigle\":\"RTF/G59/34284\","+
+ "\"snippet\":\"<span class=\\\"context-left\\\"></span><span class=\\\"match\\\"><mark>Und dafür, dass</mark><span class=\\\"cutted\\\"></span></span><span class=\\\"context-right\\\"> meine IP öffentlich angezeigt wird. Über die IP kann man auf den Wohnort, den Provider und bei Aufenthalt am Arbeitsplatz auf den Arbeitgeber schließen, über Konto nicht. -- 09:24, 17. Dez. 2011 (CET) Bist Du denn nicht mehr selber Arbeitgeber? -- 09:31<span class=\\\"more\\\"></span></span>\"}";
+
+ MatchExport matchObj = mapper.readValue(match, MatchExport.class);
+
+ assertEquals(matchObj.getAuthor(), "Goethe");
+ assertEquals(matchObj.getTitle(), "Title1");
+ assertEquals(matchObj.getPubDate(), "20051103");
+ assertEquals(matchObj.getTextSigle(), "RTF/G59/34284");
+
+ assertTrue(matchObj.getSnippetString().contains("<span class"));
+
+ assertEquals(matchObj.getSnippetO().getMark(),
+ "Und dafür, dass");
+
+ };
+
+};