Vectorize corpusQuery and corpusStats

Change-Id: If2deeeeef2b2d64169dd21e5514dac6f8e458b32
diff --git a/NAMESPACE b/NAMESPACE
index 3488a72..a34e671 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@
 export(bind_cols)
 export(ci)
 export(complete)
+export(expand_grid)
 export(group_by)
 export(mutate)
 export(select)
@@ -27,11 +28,13 @@
 import(R.cache)
 import(dplyr)
 import(httr)
+import(jsonlite)
 import(methods)
 import(purrr)
 import(tidyr)
 import(utils)
 importFrom(broom,tidy)
+importFrom(dplyr,bind_cols)
 importFrom(dplyr,group_by)
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
@@ -40,4 +43,7 @@
 importFrom(lubridate,year)
 importFrom(magrittr,"%>%")
 importFrom(stats,prop.test)
+importFrom(tibble,as_tibble)
+importFrom(tibble,rownames_to_column)
 importFrom(tidyr,complete)
+importFrom(tidyr,expand_grid)
diff --git a/R/KorAPConnection.R b/R/KorAPConnection.R
index fa8f1f5..85d8997 100644
--- a/R/KorAPConnection.R
+++ b/R/KorAPConnection.R
@@ -9,7 +9,6 @@
 #' @import dplyr
 #' @import purrr
 #' @import tidyr
-#'
 
 #' @export
 KorAPConnection <- setClass("KorAPConnection", slots=c(KorAPUrl="character", apiVersion="character", apiUrl="character", userAgent="character", timeout="numeric", verbose="logical", cache="logical"))
@@ -61,6 +60,9 @@
 
 setGeneric("apiCall", function(kco, ...)  standardGeneric("apiCall") )
 
+## quiets concerns of R CMD check re: the .'s that appear in pipelines
+if(getRversion() >= "2.15.1")  utils::globalVariables(c("."))
+
 #' @aliases apiCall
 #' @rdname KorAPConnection-class
 #' @param kco KorAPConnection object
diff --git a/R/KorAPCorpusStats.R b/R/KorAPCorpusStats.R
index 7767adf..3513d5d 100644
--- a/R/KorAPCorpusStats.R
+++ b/R/KorAPCorpusStats.R
@@ -13,12 +13,16 @@
 #' @slot paragraphs number of paragraphs
 setClass("KorAPCorpusStats", slots=c(vc="character", documents="numeric", tokens="numeric", sentences="numeric", paragraphs="numeric"))
 
+log.info <- function(v,  ...) {
+  cat(ifelse(v, paste0(...), ""))
+}
 setGeneric("corpusStats", function(kco, ...)  standardGeneric("corpusStats") )
 
 #' Fetch information about a (virtual) corpus
 #' @param kco \code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}
 #' @param vc string describing the virtual corpus. An empty string (default) means the whole corpus, as far as it is license-wise accessible.
 #' @param verbose logical. If \code{TRUE}, additional diagnostics are printed.
+#' @param as.df return result as data frame instead of as S4 object?
 #' @return \code{KorAPCorpusStats} object with the slots \code{documents}, \code{tokens}, \code{sentences}, \code{paragraphs}
 #'
 #' @examples
@@ -29,16 +33,24 @@
 #'
 #' @aliases corpusStats
 #' @export
-setMethod("corpusStats", "KorAPConnection",  function(kco, vc="", verbose = kco@verbose) {
-  url <- paste0(kco@apiUrl, 'statistics?cq=', URLencode(vc, reserved=TRUE))
-  if (verbose) {
-    cat("Calculating size of corpus \"", vc,"\"", sep="")
-  }
-  res <- apiCall(kco, url)
-  if (verbose) {
-    cat("\n")
-  }
-  new("KorAPCorpusStats", vc = vc, documents = res$documents, tokens = res$tokens, sentences = res$sentences, paragraphs = res$paragraphs)
+setMethod("corpusStats", "KorAPConnection",  function(kco, vc="", verbose = kco@verbose, as.df = FALSE) {
+  ifelse(length(vc) > 1,
+    return(
+      do.call(rbind,
+              Map(function(cq) corpusStats(kco, cq, verbose, as.df = TRUE), vc))
+    ), {
+    url <- paste0(kco@apiUrl, 'statistics?cq=', URLencode(vc, reserved=TRUE))
+    log.info(verbose, "Calculating size of corpus \"", vc,"\"", sep="")
+    res <- apiCall(kco, url)
+    log.info(verbose, "\n")
+    ifelse(as.df,
+           return(data.frame(vc=vc, res, stringsAsFactors = FALSE)),
+           return(new("KorAPCorpusStats", vc = vc, documents = res$documents,
+                      tokens = res$tokens,
+                      sentences = res$sentences,
+                      paragraphs = res$paragraphs))
+    )
+  })
 })
 
 #' @rdname KorAPCorpusStats-class
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index 3cc0b37..f71a4f3 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -4,11 +4,12 @@
 #' New \code{KorAPQuery} objects are typically created by the \code{\link{corpusQuery}} method.
 #'
 #' @include KorAPConnection.R
+#' @import jsonlite
 #' @import tidyr
 #' @import dplyr
 #' @import httr
 #'
-#'
+#' @include RKorAPClient.R
 
 #' @export
 KorAPQuery <- setClass("KorAPQuery", slots = c(
@@ -76,9 +77,8 @@
   }
 }
 
-KorAPQueryStringFromUrl <- function(KorAPUrl) {
-  return(URLdecode(gsub(".*[?&]q=([^&]*).*", '\\1', KorAPUrl, perl = TRUE)))
-}
+## quiets concerns of R CMD check re: the .'s that appear in pipelines
+if(getRversion() >= "2.15.1")  utils::globalVariables(c("."))
 
 #' Method corpusQuery
 #'
@@ -93,6 +93,7 @@
 #' @param fields (meta)data fields that will be fetched for every match.
 #' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficent rights (not yet implemented).
 #' @param verbose print some info
+#' @param as.df return result as data frame instead of as S4 object?
 #' @return A \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
 #' A corresponding URL to be used within a web browser is contained in \code{@webUIRequestUrl}
 #' Please make sure to check \code{$collection$rewrites} to see if any unforseen access rewrites of the query's virtual corpus had to be performed.
@@ -115,12 +116,12 @@
 #'   fetchAll() %>%
 #'   slot("collectedMatches") %>%
 #'   mutate(year = lubridate::year(pubDate)) %>%
-#'   dplyr::select(year) %>%
+#'   select(year) %>%
 #'   group_by(year) %>%
 #'   summarise(Count = n()) %>%
 #'   mutate(Freq = mapply(function(f, y)
 #'     f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) %>%
-#'   dplyr::select(-Count) %>%
+#'   select(-Count) %>%
 #'   complete(year = min(year):max(year), fill = list(Freq = 0)) %>%
 #'   plot(type = "l")
 #'
@@ -132,47 +133,55 @@
 #' @aliases corpusQuery
 #' @export
 setMethod("corpusQuery", "KorAPConnection",
-          function(kco, query, vc="", KorAPUrl, metadataOnly = TRUE, ql = "poliqarp", fields = defaultFields,
-                   accessRewriteFatal = TRUE, verbose = kco@verbose) {
-            defaultFields <- c("corpusSigle", "textSigle", "pubDate",  "pubPlace",
-                               "availability", "textClass", "snippet")
-            contentFields <- c("snippet")
-            fields <- fields[!fields %in% contentFields]
-
-            if (missing(query) && missing(KorAPUrl) ||  ! (missing(query) || missing(KorAPUrl))) {
-              stop("Exactly one of the parameters query and KorAPUrl must be specified.")
-            }
-            if (missing(query)) {
-              query <- QueryParameterFromUrl(KorAPUrl, "q")
-              vc <- QueryParameterFromUrl(KorAPUrl, "cq")
-              ql <- QueryParameterFromUrl(KorAPUrl, "ql")
-            }
-            request <- paste0('?q=', URLencode(query, reserved=TRUE),
-                              ifelse(vc != '', paste0('&cq=', URLencode(vc, reserved=TRUE)), ''), '&ql=', ql)
-            webUIRequestUrl <- paste0(kco@KorAPUrl, request)
-            requestUrl <- paste0(kco@apiUrl, 'search', request,
-                                 '&fields=', paste(fields, collapse = ","),
-                                 ifelse(metadataOnly, '&access-rewrite-disabled=true', ''))
-            if (verbose) {
-              cat("Searching \"", query, "\" in \"", vc, "\"", sep="")
-            }
-            res = apiCall(kco, paste0(requestUrl, '&count=0'))
-            if (verbose) {
-              cat(" took ", res$meta$benchmark, "\n", sep="")
-            }
-            KorAPQuery(
-              korapConnection = kco,
-              nextStartIndex = 0,
-              fields = fields,
-              requestUrl = requestUrl,
-              request = request,
-              totalResults = res$meta$totalResults,
-              vc = vc,
-              apiResponse = res,
-              webUIRequestUrl = webUIRequestUrl,
-              hasMoreMatches = (res$meta$totalResults > 0),
-            )
-          })
+  function(kco,
+           query = ifelse(missing(KorAPUrl),
+                     stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE),
+                     httr::parse_url(KorAPUrl)$query$q),
+           vc = ifelse(missing(KorAPUrl), "", httr::parse_url(KorAPUrl)$query$cq),
+           KorAPUrl,
+           metadataOnly = TRUE,
+           ql = ifelse(missing(KorAPUrl), "poliqarp", httr::parse_url(KorAPUrl)$query$ql),
+           fields = c("corpusSigle", "textSigle", "pubDate",  "pubPlace",
+                      "availability", "textClass", "snippet"),
+           accessRewriteFatal = TRUE,
+           verbose = kco@verbose,
+           as.df = FALSE) {
+    ifelse(length(query) > 1 , {
+           #grid <- expand_grid(query=query, vc=vc)
+           return(
+             do.call(rbind,
+                     Map(function(q, cq) corpusQuery(kco, query=q, vc=cq,
+                                                     verbose=verbose, as.df = TRUE), query, vc))
+           )}, {
+             contentFields <- c("snippet")
+             fields <- fields[!fields %in% contentFields]
+             request <- paste0('?q=', URLencode(query, reserved=TRUE),
+                      ifelse(vc != '', paste0('&cq=', URLencode(vc, reserved=TRUE)), ''), '&ql=', ql)
+             webUIRequestUrl <- paste0(kco@KorAPUrl, request)
+             requestUrl <- paste0(kco@apiUrl, 'search', request,
+                                  '&fields=', paste(fields, collapse = ","),
+                                  ifelse(metadataOnly, '&access-rewrite-disabled=true', ''))
+             log.info(verbose, "Searching \"", query, "\" in \"", vc, "\"", sep="")
+             res = apiCall(kco, paste0(requestUrl, '&count=0'))
+             log.info(verbose, " took ", res$meta$benchmark, "\n", sep="")
+             ifelse(as.df,
+                    return(data.frame(query=query,
+                                      totalResults=res$meta$totalResults,
+                                      vc=vc,
+                                      webUIRequestUrl=webUIRequestUrl, stringsAsFactors = FALSE)),
+                    return(KorAPQuery(
+                      korapConnection = kco,
+                      nextStartIndex = 0,
+                      fields = fields,
+                      requestUrl = requestUrl,
+                      request = request,
+                      totalResults = res$meta$totalResults,
+                      vc = vc,
+                      apiResponse = res,
+                      webUIRequestUrl = webUIRequestUrl,
+                      hasMoreMatches = (res$meta$totalResults > 0),
+                    )))})
+  })
 
 #' Fetch the next bunch of results of a KorAP query.
 #'
diff --git a/R/reexports.R b/R/reexports.R
index f6624ba..1a4d729 100644
--- a/R/reexports.R
+++ b/R/reexports.R
@@ -10,6 +10,9 @@
 #' @importFrom magrittr %>%
 #' @usage lhs \%>\% rhs
 NULL
+#' @importFrom dplyr bind_cols
+#' @export
+dplyr::bind_cols
 #' @importFrom dplyr mutate
 #' @export
 dplyr::mutate
@@ -25,6 +28,18 @@
 #' @importFrom tidyr complete
 #' @export
 tidyr::complete
+#' @importFrom tidyr expand_grid
+#' @export
+tidyr::expand_grid
 #' @importFrom lubridate year
 #' @export
 lubridate::year
+#' @importFrom tibble as_tibble rownames_to_column
+#' @export
+tibble::as_tibble
+tibble::rownames_to_column
+#' @importFrom broom tidy
+#' @export
+broom::tidy
+#' @importFrom ggplot2 ggplot geom_bar geom_line
+#' @export
diff --git a/man/corpusQuery-KorAPConnection-method.Rd b/man/corpusQuery-KorAPConnection-method.Rd
index e623297..49e636b 100644
--- a/man/corpusQuery-KorAPConnection-method.Rd
+++ b/man/corpusQuery-KorAPConnection-method.Rd
@@ -6,9 +6,16 @@
 \alias{corpusQuery}
 \title{Method corpusQuery}
 \usage{
-\S4method{corpusQuery}{KorAPConnection}(kco, query, vc = "", KorAPUrl,
-  metadataOnly = TRUE, ql = "poliqarp", fields = defaultFields,
-  accessRewriteFatal = TRUE, verbose = kco@verbose)
+\S4method{corpusQuery}{KorAPConnection}(kco,
+  query = ifelse(missing(KorAPUrl),
+  stop("At least one of the parameters query and KorAPUrl must be specified.",
+  call. = FALSE), httr::parse_url(KorAPUrl)$query$q),
+  vc = ifelse(missing(KorAPUrl), "", httr::parse_url(KorAPUrl)$query$cq),
+  KorAPUrl, metadataOnly = TRUE, ql = ifelse(missing(KorAPUrl),
+  "poliqarp", httr::parse_url(KorAPUrl)$query$ql),
+  fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace",
+  "availability", "textClass", "snippet"), accessRewriteFatal = TRUE,
+  verbose = kco@verbose, as.df = FALSE)
 }
 \arguments{
 \item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}}
@@ -28,6 +35,8 @@
 \item{accessRewriteFatal}{abort if query or given vc had to be rewritten due to insufficent rights (not yet implemented).}
 
 \item{verbose}{print some info}
+
+\item{as.df}{return result as data frame instead of as S4 object?}
 }
 \value{
 A \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
@@ -55,12 +64,12 @@
   fetchAll() \%>\%
   slot("collectedMatches") \%>\%
   mutate(year = lubridate::year(pubDate)) \%>\%
-  dplyr::select(year) \%>\%
+  select(year) \%>\%
   group_by(year) \%>\%
   summarise(Count = n()) \%>\%
   mutate(Freq = mapply(function(f, y)
     f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) \%>\%
-  dplyr::select(-Count) \%>\%
+  select(-Count) \%>\%
   complete(year = min(year):max(year), fill = list(Freq = 0)) \%>\%
   plot(type = "l")
 
diff --git a/man/corpusStats-KorAPConnection-method.Rd b/man/corpusStats-KorAPConnection-method.Rd
index 486ee1c..8a13183 100644
--- a/man/corpusStats-KorAPConnection-method.Rd
+++ b/man/corpusStats-KorAPConnection-method.Rd
@@ -7,7 +7,7 @@
 \title{Fetch information about a (virtual) corpus}
 \usage{
 \S4method{corpusStats}{KorAPConnection}(kco, vc = "",
-  verbose = kco@verbose)
+  verbose = kco@verbose, as.df = FALSE)
 }
 \arguments{
 \item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}}
@@ -15,6 +15,8 @@
 \item{vc}{string describing the virtual corpus. An empty string (default) means the whole corpus, as far as it is license-wise accessible.}
 
 \item{verbose}{logical. If \code{TRUE}, additional diagnostics are printed.}
+
+\item{as.df}{return result as data frame instead of as S4 object?}
 }
 \value{
 \code{KorAPCorpusStats} object with the slots \code{documents}, \code{tokens}, \code{sentences}, \code{paragraphs}
diff --git a/man/reexports.Rd b/man/reexports.Rd
index 81287d2..b92085b 100644
--- a/man/reexports.Rd
+++ b/man/reexports.Rd
@@ -3,12 +3,16 @@
 \docType{import}
 \name{reexports}
 \alias{reexports}
+\alias{bind_cols}
 \alias{mutate}
 \alias{select}
 \alias{group_by}
 \alias{summarise}
 \alias{complete}
+\alias{expand_grid}
 \alias{year}
+\alias{as_tibble}
+\alias{tidy}
 \title{Objects exported from other packages}
 \keyword{internal}
 \description{
@@ -16,10 +20,14 @@
 below to see their documentation.
 
 \describe{
-  \item{dplyr}{\code{\link[dplyr]{mutate}}, \code{\link[dplyr]{select}}, \code{\link[dplyr]{group_by}}, \code{\link[dplyr]{summarise}}}
+  \item{broom}{\code{\link[broom]{tidy}}}
+
+  \item{dplyr}{\code{\link[dplyr]{bind_cols}}, \code{\link[dplyr]{mutate}}, \code{\link[dplyr]{select}}, \code{\link[dplyr]{group_by}}, \code{\link[dplyr]{summarise}}}
 
   \item{lubridate}{\code{\link[lubridate]{year}}}
 
-  \item{tidyr}{\code{\link[tidyr]{complete}}}
+  \item{tibble}{\code{\link[tibble]{as_tibble}}}
+
+  \item{tidyr}{\code{\link[tidyr]{complete}}, \code{\link[tidyr]{expand_grid}}}
 }}