Improve paging mechanism to rely on offsets only
Also introduce hitc support to limit exported hits and
move max_page_limit to configuration file

Change-Id: Iccf9cc5f3497232dd76a917a059f67c9926b4942
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/ExWSConf.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/ExWSConf.java
index 11413ae..72e9468 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/ExWSConf.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/ExWSConf.java
@@ -13,14 +13,7 @@
 import java.util.Properties;
 
 public class ExWSConf {
-    /*
-     * maximum hits to be exported
-     * TODO: Define this constants after discussing it. 
-     * Maybe we need a distinction between users at the IDS and external users
-     * See also: https://www.ids-mannheim.de/cosmas2/script-app/hilfe/sitzung.html
-     */
-    public static final int MAX_EXP_LIMIT = 10000;
-    
+
     // Version of Export Plugin
     public static final int VERSION_MAJOR = 0;
     public static final int VERSION_MINOR = 1;
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Exporter.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Exporter.java
index b00ea47..a54460f 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Exporter.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/Exporter.java
@@ -8,14 +8,14 @@
 interface Exporter {
 
     // Implemented by MatchAggregator
-    public void init (String s) throws IOException;
+    public boolean init (String s) throws IOException;
     public void setMeta(JsonNode n);
     public void setQuery(JsonNode n);
     public void setCollection(JsonNode n);
     public JsonNode getMeta();
     public JsonNode getQuery();
     public JsonNode getCollection();
-    public void appendMatches (String s) throws IOException;
+    public boolean appendMatches (String s) throws IOException;
     public String getFileName ();
     public void setFileName (String s);
     public String getQueryString ();
@@ -24,6 +24,7 @@
     public void setCorpusQueryString (String s);
     public int getTotalResults ();
     public boolean hasTimeExceeded ();
+    public void setMaxResults (int m);
 
     // Implemented by Exporter
     public ResponseBuilder serve();
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/IdsExportService.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/IdsExportService.java
index e70790e..c36bb7c 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/IdsExportService.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/IdsExportService.java
@@ -48,15 +48,12 @@
 /**
  * TODO:
  * - Delete the temp file of the export at the end
- * - Right now, the web service returns one page (cutoff=1) or
- *   all pages.
  * - Do not expect all meta data per match.
- * - Handle timeout results (with minimum total results).
- * - Use offset instead of page parameter
  * - Add progress mechanism.
  * - Add CSV export format.
  * - Add table layout to RTF information.
  * - Add loading marker.
+ * - Add hitc to form.
  */
 
 @Path("/")
@@ -116,9 +113,9 @@
         @FormParam("q") String q,
         @FormParam("cq") String cq,
         @FormParam("ql") String ql,
-        @FormParam("cutoff") String cutoffStr
+        @FormParam("cutoff") String cutoffStr,
         // @FormParam("islimit") String il,
-        // @FormParam("hitc") int hitc
+        @FormParam("hitc") int hitc
         ) throws IOException {
 
         // These parameters are required
@@ -138,24 +135,32 @@
                             + params[i][0] + "\"" + " is missing or empty")
                     .build());
         };
-
-        int totalResults = -1;
         
         // Retrieve cutoff value
         boolean cutoff = false;
-        if (cutoffStr != null && (cutoffStr.equals("true") || cutoffStr.equals("1"))) {
+        if (cutoffStr != null && (
+                cutoffStr.equals("true") ||
+                cutoffStr.equals("1"))
+            ) {
             cutoff = true;
         };
 
         ResponseBuilder builder = null;
         Client client = ClientBuilder.newClient();
 
-        String scheme = properties.getProperty("api.scheme", "https");
-        String port   = properties.getProperty("api.port", "8089");
-        String host   = properties.getProperty("api.host", "localhost");
-        String path   = properties.getProperty("api.path", "");
-        int pageSize  = Integer.parseInt(properties.getProperty("conf.page_size", "5"));
+        // Load configuration values
+        String scheme  = properties.getProperty("api.scheme", "https");
+        String port    = properties.getProperty("api.port", "8089");
+        String host    = properties.getProperty("api.host", "localhost");
+        String path    = properties.getProperty("api.path", "");
+        int pageSize   = Integer.parseInt(properties.getProperty("conf.page_size", "5"));
+        int maxResults = Integer.parseInt(properties.getProperty("conf.max_exp_limit", "10000"));
 
+        // Adjust the number of requested hits
+        if (hitc > 0 && hitc < maxResults) {
+            maxResults = hitc;
+        };
+               
         // Create initial search uri
         UriBuilder uri = UriBuilder.fromPath("/api/v1.0/search")
             .host(host)
@@ -169,12 +174,11 @@
 
         if (cq != null)
             uri = uri.queryParam("cq", cq);
-
         
         if (path != "") {
             uri = uri.path(path);
         };
-
+       
         uri = uri.queryParam("count", pageSize);
 
         // Get client IP, in case service is behind a proxy
@@ -183,9 +187,8 @@
         String auth = "";
         if (req != null) {
             xff = getClientIP(req.getHeader("X-Forwarded-For"));
-            if (xff == "") {
+            if (xff == "")
                 xff = req.getRemoteAddr();
-            };
 
             auth = authFromCookie(req);
         };
@@ -208,16 +211,15 @@
         Exporter exp;
 
         // Choose the correct exporter
-        if (format.equals("json")) {
+        if (format.equals("json"))
             exp = new JsonExporter();
-        }
-        else {
+        else
             exp = new RtfExporter();
-        };
 
+        exp.setMaxResults(maxResults);
         exp.setQueryString(q);
         exp.setCorpusQueryString(cq);
-        
+       
         // set filename based on query (if not already set)
         if (fname != null) {
             exp.setFileName(fname);
@@ -226,6 +228,7 @@
         // Initialize exporter (with meta data and first matches)
         try {
             exp.init(resp);
+
         } catch (Exception e) {
 
             throw new WebApplicationException(
@@ -234,47 +237,48 @@
                     e.getMessage()
                     )
                 );
-        }
+        };
 
+
+        /*
+         * Calculate how many results to fetch
+         */
+        int fetchCount = exp.getTotalResults();
+        if (exp.hasTimeExceeded() || fetchCount > maxResults) {
+            fetchCount = maxResults;
+        };
+
+        // The first page was already enough
+        if (fetchCount <= pageSize) {
+            cutoff = true;
+        };
+        
         // If only one page should be exported there is no need
         // for a temporary export file
         if (cutoff) {
             builder = exp.serve();
         }
 
-        // Page through results
+        // Page through all results
         else {
 
-            /*
-             * Get total results
-             */
-            totalResults = exp.getTotalResults();
+            // It's not important anymore to get totalResults
+            uri.queryParam("cutoff", "true");
 
-            /*
-             *  Get number of pages and the number of hits 
-             *  which should be exported at the last page
-             */
-            int pg = 1;
-            if (totalResults % pageSize > 0) {
-                pg = totalResults / pageSize + 1;
-            }
-            else {
-                pg = totalResults / pageSize;
-            }
-
+            // Set offset for paging as a template
             uri.queryParam("offset", "{offset}");
 
             try {
             
                 // Iterate over all results
-                for (int i = 2; i <= pg; i++) {
-                    resource = client.target(
-                        uri.build((i * pageSize) - pageSize)
-                        );
-               
+                for (int i = pageSize; i <= fetchCount; i+=pageSize) {
+                    resource = client.target(uri.build(i));
                     reqBuilder = resource.request(MediaType.APPLICATION_JSON);
                     resp = authBuilder(reqBuilder, xff, auth).get(String.class);
-                    exp.appendMatches(resp);
+
+                    // Stop when no more matches are allowed
+                    if (!exp.appendMatches(resp))
+                        break;
                 }
             } catch (Exception e) {
                 throw new WebApplicationException(
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchAggregator.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchAggregator.java
index c482526..26a96c0 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchAggregator.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/MatchAggregator.java
@@ -44,6 +44,8 @@
     private String fname, queryString, corpusQueryString;
     private boolean timeExceeded = false;
     private int totalResults = -1;
+    private int maxResults = -1;
+    private int fetchedResults = 0;
 
     public String getMimeType() {
         return "text/plain";
@@ -56,7 +58,7 @@
     public int getTotalResults() {
         return this.totalResults;
     };
-
+    
     public boolean hasTimeExceeded() {
         return this.timeExceeded;
     };
@@ -102,6 +104,15 @@
         this.query = query;
     };
 
+    // Needs to be set before first addMatch
+    public void setMaxResults (int maxResults) {
+        this.maxResults = maxResults;
+    };
+
+    public int getMaxResults () {
+        return this.maxResults;
+    };
+    
     public JsonNode getQuery () {
         return this.query;
     };
@@ -123,20 +134,20 @@
      * Create new match aggregator and parse initial Json
      * file to get header information and initial matches.
      */
-    public void init (String resp) throws IOException, JsonParseException {
+    public boolean init (String resp) throws IOException, JsonParseException {
 
         this.file = null;
 
         matches = new LinkedList();
 
         if (resp == null)
-            return;
-        
+            return false;
+
         JsonParser parser = mapper.getFactory().createParser(resp);
         JsonNode actualObj = mapper.readTree(parser);
-        
+
         if (actualObj == null)
-            return;
+            return false;
 
         JsonNode meta = actualObj.get("meta");
         this.setMeta(meta);
@@ -155,24 +166,17 @@
         writer = new StringWriter();
 
         this.writeHeader(writer);
-        
-        JsonNode mNodes = actualObj.get("matches");
 
-        if (mNodes == null)
-            return;
-        
-        // Iterate over the results of the current file
-        Iterator<JsonNode> mNode = mNodes.elements();
-        while (mNode.hasNext()) {
-            this.addMatch(mNode.next(), writer);
-        };
+        return this.iterateThroughMatches(
+            actualObj.get("matches")
+            );
     };
 
 
     /**
      * Append more matches to the result set.
      */
-    public void appendMatches (String resp) throws IOException {
+    public boolean appendMatches (String resp) throws IOException {
 
         // Open a temp file if not already opened
         if (this.file == null) {
@@ -196,19 +200,11 @@
         JsonNode actualObj = mapper.readTree(parser);
 
         if (actualObj == null)
-            return;
+            return false;
         
-        JsonNode mNodes = actualObj.get("matches");
-
-        if (mNodes == null)
-            return;
-
-        Iterator<JsonNode> mNode = mNodes.elements();
-        
-        MatchExport match;
-        while (mNode.hasNext()) {
-            this.addMatch(mNode.next(), writer);
-        };
+        return this.iterateThroughMatches(
+            actualObj.get("matches")
+            );
     };
 
 
@@ -249,4 +245,23 @@
         //   Return exporter error
         return Response.status(500).entity("error");
     };
+
+
+    // Iterate through all matches
+    private boolean iterateThroughMatches (JsonNode mNodes) throws IOException {
+        if (mNodes == null)
+            return false;
+        
+        // Iterate over the results of the current file
+        Iterator<JsonNode> mNode = mNodes.elements();
+        while (mNode.hasNext()) {
+            this.addMatch(mNode.next(), writer);
+            this.fetchedResults++;
+            if (this.maxResults > 0 &&
+                this.fetchedResults > this.maxResults) {
+                return false;
+            };
+        };
+        return true;
+    };
 };
diff --git a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java
index fc74d2e..b1f085f 100644
--- a/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java
+++ b/plugin/src/main/java/de/ids_mannheim/korap/plkexport/RtfExporter.java
@@ -103,6 +103,13 @@
             w.append("\\par}\n");
         };
 
+        if (this.getTotalResults() == -1 ||
+            this.getTotalResults() > this.getMaxResults()) {
+            w.append("{\\pard Fetched: \\f1 ");
+            w.append(Integer.toString(this.getMaxResults()));
+            w.append("\\par}\n");
+        };
+
         // Add line
         w.append("{\\pard\\brdrb\\brdrs\\brdrw2\\brsp20\\par}\n");
     };
diff --git a/plugin/src/main/resources/exportPlugin.conf b/plugin/src/main/resources/exportPlugin.conf
index dfd029d..0d3374a 100644
--- a/plugin/src/main/resources/exportPlugin.conf
+++ b/plugin/src/main/resources/exportPlugin.conf
@@ -13,4 +13,10 @@
 asset.scheme=https
 
 # Default configuration
-conf.page_size=5
\ No newline at end of file
+conf.page_size=5
+
+# Maximum hits to be exported
+# TODO: Define this constants after discussing it. 
+# Maybe we need a distinction between users at the IDS and external users
+# See also: https://www.ids-mannheim.de/cosmas2/script-app/hilfe/sitzung.html
+conf.max_exp_limit=10000
\ No newline at end of file
diff --git a/plugin/src/test/java/de/ids_mannheim/korap/plkexport/IdsExportServiceTest.java b/plugin/src/test/java/de/ids_mannheim/korap/plkexport/IdsExportServiceTest.java
index d356786..d0fe1dd 100644
--- a/plugin/src/test/java/de/ids_mannheim/korap/plkexport/IdsExportServiceTest.java
+++ b/plugin/src/test/java/de/ids_mannheim/korap/plkexport/IdsExportServiceTest.java
@@ -110,7 +110,7 @@
         frmap.add("fname", filenamej);
         frmap.add("format", "json");
         frmap.add("q", "Wasser");
-        frmap.add("cutoff", "true");
+        frmap.add("cutoff", "1");
         frmap.add("ql", "poliqarp");
 
         String message;
@@ -483,6 +483,7 @@
         frmap.add("format", "rtf");
         frmap.add("q", "Plagegeist");
         frmap.add("ql", "poliqarp");
+        frmap.add("hitc", "30");
         String filenamer = "dateiPagingRtf";
         frmap.putSingle("fname", filenamer);
 
@@ -491,11 +492,27 @@
         assertEquals("Request RTF: Http Response should be 200: ",
                 Status.OK.getStatusCode(), responsertf.getStatus());
 
+        // With maxResults
         String str = responsertf.readEntity(String.class);
         assertTrue("Page 1 content", str.contains("Ironhoof"));
         assertTrue("Page 2 content", str.contains("Sinologie"));
         assertTrue("Unicode handling", str.contains("Hintergr\\u252\\'fcnde"));
         assertTrue("TotalResults", str.contains("Count: \\f1 9\\"));
+        assertFalse("Fetched", str.contains("Fetched:"));
+
+        frmap.putSingle("hitc", "7");
+
+        responsertf = target("/export").request()
+            .post(Entity.form(frmap));
+        assertEquals("Request RTF: Http Response should be 200: ",
+                Status.OK.getStatusCode(), responsertf.getStatus());
+
+        str = responsertf.readEntity(String.class);
+        assertTrue("Page 1 content", str.contains("Ironhoof"));
+        assertTrue("Page 2 content", str.contains("Sinologie"));
+        assertTrue("Unicode handling", str.contains("Hintergr\\u252\\'fcnde"));
+        assertTrue("TotalResults", str.contains("Count: \\f1 9\\"));
+        assertTrue("Fetched", str.contains("Fetched: \\f1 7\\"));
     }
 
     
@@ -534,6 +551,7 @@
         frmap.add("format", "rtf");
         frmap.add("q", "Plagegeist");
         frmap.add("ql", "poliqarp");
+        frmap.add("hitc", "30");
         String filenamer = "dateiPagingRtf";
         frmap.putSingle("fname", filenamer);
 
@@ -587,6 +605,7 @@
         frmap.add("format", "json");
         frmap.add("q", "Plagegeist");
         frmap.add("ql", "poliqarp");
+        frmap.add("hitc", "30");
         String filenamer = "dateiPagingJson";
         frmap.putSingle("fname", filenamer);
 
@@ -607,6 +626,65 @@
         assertTrue(obj.at("/matches/0/snippet").asText().contains("<span class=\"context-right\">&quot;"));
         assertTrue(obj.at("/matches/0/snippet").asText().contains("wie wär's"));
     }    
+
+    @Test
+    public void testExportWsJsonWithMaxHitcFirstPage () throws IOException {
+
+        // This should ensure here to check that page 2 is not loaded
+        mockClient.reset().when(
+            request()
+            .withMethod("GET")
+            .withPath("/api/v1.0/search")
+            .withQueryStringParameter("q", "Plagegeist")
+            .withQueryStringParameter("count", "5")
+            .withQueryStringParameter("offset", "5")
+            )
+            .respond(
+                response()
+                .withHeader("Content-Type: application/json; charset=utf-8")
+                .withBody(getFixture("response_broken.json"))
+                .withStatusCode(200)
+                );
+
+        mockClient.when(
+            request()
+            .withMethod("GET")
+            .withPath("/api/v1.0/search")
+            .withQueryStringParameter("q", "Plagegeist")
+            )
+            .respond(
+                response()
+                .withHeader("Content-Type: application/json; charset=utf-8")
+                .withBody(getFixture("response_plagegeist_1.json"))
+                .withStatusCode(200)
+                );
+
+        MultivaluedHashMap<String, String> frmap = new MultivaluedHashMap<String, String>();
+        frmap.add("format", "json");
+        frmap.add("q", "Plagegeist");
+        frmap.add("ql", "poliqarp");
+        frmap.add("hitc", "3");
+        String filenamer = "dateiPagingJson";
+        frmap.putSingle("fname", filenamer);
+
+        Response responsejson = target("/export").request()
+            .post(Entity.form(frmap));
+        assertEquals("Request RTF: Http Response should be 200: ",
+                Status.OK.getStatusCode(), responsejson.getStatus());
+
+        String str = responsejson.readEntity(String.class);
+        JsonParser parser = mapper.getFactory().createParser(str);
+        JsonNode obj = mapper.readTree(parser);
+
+        assertEquals(obj.at("/query/@type").asText(),"koral:token");
+        assertEquals(obj.at("/meta/totalResults").asInt(),9);
+        assertEquals(obj.at("/matches/0/matchID").asText(),"match-WUD17/G59/34284-p4238-4239");
+        assertEquals(obj.at("/matches/1/matchID").asText(),"match-WUD17/C53/60524-p736-737");
+        assertEquals(obj.at("/matches/2/matchID").asText(),"match-WUD17/J34/49397-p19826-19827");
+        assertFalse(obj.has("/matches/3"));
+        assertTrue(obj.at("/matches/0/snippet").asText().contains("<span class=\"context-right\">&quot;"));
+        assertTrue(obj.at("/matches/0/snippet").asText().contains("wie wär's"));
+    }    
     
 
     @Test