Migrate to S4 Classes
Change-Id: I27e9452e9d59f6b414898390074908ec0e91e2a9
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index 1d36e8d..c02b87b 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -1,13 +1,67 @@
+#' Class KorAPQuery
+#'
+#' \code{KorAPQuery} objetcs represent the current state of a query to a KorAP server.
+#' New \code{KorAPQuery} objects are typically created by the \code{\link{corpusQuery}} method.
+#'
+#' @include KorAPConnection.R
#' @import jsonlite
-#' @import curl
-#' @import utils
+#' @import httr
+#'
+#'
-defaultFields <- c("corpusSigle", "textSigle", "pubDate", "pubPlace",
- "availability", "textClass", "snippet")
+#' @export
+KorAPQuery <- setClass("KorAPQuery", slots = c(
+ "request",
+ "vc",
+ "totalResults",
+ "nextStartIndex",
+ "fields",
+ "requestUrl",
+ "webUIRequestUrl",
+ "apiResponse",
+ "collectedMatches",
+ "hasMoreMatches"
+))
-contentFields <- c("snippet")
+#' Method initialize
+#'
+#' @rdname KorAPQuery-class
+#' @param .Object …
+#' @param request query part of the request URL
+#' @param vc definition of a virtual corpus
+#' @param totalResults number of hits the query has yielded
+#' @param nextStartIndex at what index to start the next fetch of query results
+#' @param fields what data / metadata fields should be collected
+#' @param requestUrl complete URL of the API request
+#' @param webUIRequestUrl URL of a web frontend request corresponding to the API request
+#' @param apiResponse data-frame representation of the JSON response of the API request
+#' @param hasMoreMatches boolean that signals if more query results can be fetched
+#' @param collectedMatches matches already fetched from the KorAP-API-server
+#' @export
+setMethod("initialize", "KorAPQuery",
+ function(.Object, request = NULL, vc="", totalResults=0, nextStartIndex=0, fields=c("corpusSigle", "textSigle", "pubDate", "pubPlace",
+ "availability", "textClass", "snippet"),
+ requestUrl="", webUIRequestUrl = "", apiResponse = NULL, hasMoreMatches= FALSE, collectedMatches = NULL) {
+ .Object <- callNextMethod()
+ .Object@request = request
+ .Object@vc = vc
+ .Object@totalResults = totalResults
+ .Object@nextStartIndex = nextStartIndex
+ .Object@fields = fields
+ .Object@requestUrl = requestUrl
+ .Object@webUIRequestUrl = webUIRequestUrl
+ .Object@apiResponse = apiResponse
+ .Object@hasMoreMatches = hasMoreMatches
+ .Object@collectedMatches = collectedMatches
+ .Object
+ })
-maxResultsPerPage <- 50;
+setGeneric("corpusQuery", function(kco, ...) standardGeneric("corpusQuery") )
+setGeneric("fetchAll", function(kqo, ...) standardGeneric("fetchAll") )
+setGeneric("fetchNext", function(kqo, ...) standardGeneric("fetchNext") )
+setGeneric("fetchRest", function(kqo, ...) standardGeneric("fetchRest") )
+
+maxResultsPerPage <- 50
QueryParameterFromUrl <- function(url, parameter) {
regex <- paste0(".*[?&]", parameter, "=([^&]*).*")
@@ -22,8 +76,11 @@
return(URLdecode(gsub(".*[?&]q=([^&]*).*", '\\1', KorAPUrl, perl = TRUE)))
}
-#' Send a query to a KorAP connection.
-#' @param con object obtained from \code{\link{KorAPConnection}}, that contains all necessary connection information
+#' Method corpusQuery
+#'
+#' Perform a corpus query via a connection to a KorAP-API-server.
+#'
+#' @param kco \code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}
#' @param query string that contains the corpus query. The query langauge depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}
#' @param vc string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.
#' @param KorAPUrl instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.
@@ -32,99 +89,106 @@
#' @param fields (meta)data fields that will be fetched for every match
#' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficent rights (not yet implemented)
#' @param verbose print some info
-#' @return A KorAP query object that, among other information, contains the total number of results in \code{$meta$totalResults}. The resulting object can be used to fetch all query results (with \code{\link{KorAPFetchAll}}) or the next page of results (with \code{\link{KorAPFetchNext}}).
-#' A correspunding URL to be used within a web browser is contained in \code{$webUIRequestUrl}
+#' @return A \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
+#' A corresponding URL to be used within a web browser is contained in \code{@webUIRequestUrl}
#' Please make sure to check \code{$collection$rewrites} to see if any unforseen access rewrites of the query's virtual corpus had to be performed.
#'
#' @examples
-#' con <- KorAPConnection()
-#' q <- KorAPQuery(con, "Ameisenplage")
-#' q <- KorAPFetchAll(q)
-#' summary(q$collectedMatches)
+#' kco <- new("KorAPConnection")
+#' kqo <- corpusQuery(kco, "Ameisenplage")
+#' kqo <- fetchAll(kqo)
+#' kqo
#'
-#' q <- KorAPQuery(con,
+#' kqo <- corpusQuery(kco,
#' KorAPUrl = "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp")
-#' q$meta$totalResults
+#' kqo
#'
-#' q <- KorAPQuery(con, "Ameisenplage")
-#' q <- KorAPFetchAll(q, verbose=TRUE)
-#' tokensPerYear <- function(year) { return(KorAPCorpusStats(con, paste("pubDate in", year))$tokens) }
-#' df <- as.data.frame(table(as.numeric(format(q$collectedMatches$pubDate,"%Y")), dnn="year"),
+#' q <- corpusQuery(kco, "Ameisenplage")
+#' q <- fetchAll(q, verbose=TRUE)
+#' tokensPerYear <- function(year) { return(corpusStats(kco, paste("pubDate in", year))@tokens) }
+#' df <- as.data.frame(table(as.numeric(format(q@collectedMatches$pubDate,"%Y")), dnn="year"),
#' stringsAsFactors = FALSE)
#' df$ipm <- 1000000 * df$Freq / tokensPerYear(df$year)
#' plot(df$year, df$ipm, type="l")
#'
-#' @seealso \code{\link{KorAPConnection}}, \code{\link{KorAPFetchNext}}, \code{\link{KorAPFetchRest}}, \code{\link{KorAPFetchAll}}, \code{\link{KorAPCorpusStats}}
+#' @seealso \code{\link{KorAPConnection}}, \code{\link{fetchNext}}, \code{\link{fetchRest}}, \code{\link{fetchAll}}, \code{\link{corpusStats}}
#'
#' @references
#' \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
#'
+#' @aliases corpusQuery
#' @export
-KorAPQuery <- function(con, query, vc="", KorAPUrl, metadataOnly = TRUE, ql = "poliqarp", fields = defaultFields,
- accessRewriteFatal = TRUE, verbose=FALSE) {
- if (missing(query) && missing(KorAPUrl) || ! (missing(query) || missing(KorAPUrl))) {
- stop("Exactly one of the parameters query and KorAPUrl must be specified.")
- }
- if (missing(query)) {
- query <- QueryParameterFromUrl(KorAPUrl, "q")
- vc <- QueryParameterFromUrl(KorAPUrl, "vc")
- ql <- QueryParameterFromUrl(KorAPUrl, "ql")
- }
- request <- paste0('?q=', URLencode(query, reserved=TRUE),
- ifelse(vc != '', paste0('&cq=', URLencode(vc, reserved=TRUE)), ''), '&ql=', ql)
- webUIRequestUrl <- paste0(con$KorAPUrl, request)
- requestUrl <- paste0(con$apiUrl, 'search', request,
- '&fields=', paste(defaultFields, collapse = ","),
- ifelse(metadataOnly, '&access-rewrite-disabled=true', ''))
- if (verbose) {
- cat(paste0(webUIRequestUrl, "\n"))
- }
- result <- fromJSON(paste0(requestUrl, '&count=1'))
- result$fields <- fields[!fields %in% contentFields]
- result$requestUrl <- requestUrl
- result$request <- request
- result$vc <- vc
- result$webUIRequestUrl <- webUIRequestUrl
- result$nextStartIndex <- 0
- result$hasMoreMatches <- (result$meta$totalResults > 0)
- return(result)
-}
+setMethod("corpusQuery", "KorAPConnection",
+ function(kco, query, vc="", KorAPUrl, metadataOnly = TRUE, ql = "poliqarp", fields = defaultFields,
+ accessRewriteFatal = TRUE, verbose=FALSE) {
+ defaultFields <- c("corpusSigle", "textSigle", "pubDate", "pubPlace",
+ "availability", "textClass", "snippet")
+ contentFields <- c("snippet")
+
+ if (missing(query) && missing(KorAPUrl) || ! (missing(query) || missing(KorAPUrl))) {
+ stop("Exactly one of the parameters query and KorAPUrl must be specified.")
+ }
+ if (missing(query)) {
+ query <- QueryParameterFromUrl(KorAPUrl, "q")
+ vc <- QueryParameterFromUrl(KorAPUrl, "vc")
+ ql <- QueryParameterFromUrl(KorAPUrl, "ql")
+ }
+ request <- paste0('?q=', URLencode(query, reserved=TRUE),
+ ifelse(vc != '', paste0('&cq=', URLencode(vc, reserved=TRUE)), ''), '&ql=', ql)
+ webUIRequestUrl <- paste0(kco@KorAPUrl, request)
+ requestUrl <- paste0(kco@apiUrl, 'search', request,
+ '&fields=', paste(defaultFields, collapse = ","),
+ ifelse(metadataOnly, '&access-rewrite-disabled=true', ''))
+ if (verbose) {
+ cat(paste0(webUIRequestUrl, "\n"))
+ }
+ res = fromJSON(paste0(requestUrl, '&count=1'))
+ KorAPQuery(
+ nextStartIndex = 0,
+ fields = fields[!fields %in% contentFields],
+ requestUrl = requestUrl,
+ request = request,
+ totalResults = res$meta$totalResults,
+ vc = vc,
+ apiResponse = res,
+ webUIRequestUrl = webUIRequestUrl,
+ hasMoreMatches = (res$meta$totalResults > 0),
+ )
+ })
#' Fetch the next bunch of results of a KorAP query.
-#' @param queryObject object obtained from \code{\link{KorAPQuery}}
+#'
+#' @param kqo object obtained from \code{\link{corpusQuery}}
#' @param offset start offset for query results to fetch
#' @param maxFetch maximum number of query results to fetch
#' @param verbose print progress information if true
-#' @return The \code{queryObject} input parameter with updated fields \code{$collectedMatches}, \code{$matches} (latest bunch only), \code{$nextStartIndex}, , \code{$hasMoreMatches}
-#'
-#' @examples
-#' q <- KorAPFetchNext(KorAPQuery(KorAPConnection(), "Ameisenplage"))
-#'
-#' @seealso \code{\link{KorAPFetchRest}}, \code{\link{KorAPFetchAll}}
+#' @return The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches}
#'
#' @references
#' \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
#'
+#' @aliases fetchNext
+#' @rdname KorAPQuery-class
#' @export
-KorAPFetchNext <- function(queryObject, offset = queryObject$nextStartIndex, maxFetch = maxResultsPerPage, verbose = FALSE) {
- if (queryObject$meta$totalResults == 0 || offset >= queryObject$meta$totalResults) {
- return(queryObject)
+setMethod("fetchNext", "KorAPQuery", function(kqo, offset = kqo@nextStartIndex, maxFetch = maxResultsPerPage, verbose = FALSE) {
+ if (kqo@totalResults == 0 || offset >= kqo@totalResults) {
+ return(kqo)
}
page <- 1
results <- 0
pubDate <- NULL # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
- collectedMatches <- queryObject$collectedMatches
+ collectedMatches <- kqo@collectedMatches
repeat {
- res <- fromJSON(paste0(queryObject$requestUrl, '&count=', min(ifelse(!is.na(maxFetch), maxFetch - results, maxResultsPerPage), maxResultsPerPage) ,'&offset=', offset + results))
- if (res$meta$totalResults == 0) { return(data.frame()) }
- for (field in queryObject$fields) {
+ res <- fromJSON(paste0(kqo@requestUrl, '&count=', min(ifelse(!is.na(maxFetch), maxFetch - results, maxResultsPerPage), maxResultsPerPage) ,'&offset=', offset + results))
+ if (res$meta$totalResults == 0) { return(kqo) }
+ for (field in kqo@fields) {
if (!field %in% colnames(res$matches)) {
res$matches[, field] <- NA
}
}
- currentMatches <- res$matches[queryObject$fields]
+ currentMatches <- res$matches[kqo@fields]
factorCols <- colnames(subset(currentMatches, select=-c(pubDate)))
currentMatches[factorCols] <- lapply(currentMatches[factorCols], factor)
currentMatches$pubDate = as.Date(currentMatches$pubDate, format = "%Y-%m-%d")
@@ -142,50 +206,72 @@
break
}
}
- res$nextStartIndex <- res$meta$startIndex + res$meta$itemsPerPage
- res$fields <- queryObject$fields
- res$requestUrl <- queryObject$requestUrl
- res$request <- queryObject$request
- res$webUIRequestUrl <- queryObject$webUIRequestUrl
- res$hasMoreMatches <- (res$meta$totalResults > res$nextStartIndex)
- res$collectedMatches <- collectedMatches
- return(res)
-}
+ nextStartIndex <- min(res$meta$startIndex + res$meta$itemsPerPage, res$meta$totalResults)
+ KorAPQuery(nextStartIndex = nextStartIndex,
+ fields = kqo@fields,
+ requestUrl = kqo@requestUrl,
+ request = kqo@request,
+ totalResults = res$meta$totalResults,
+ vc = kqo@vc,
+ webUIRequestUrl = kqo@webUIRequestUrl,
+ hasMoreMatches = (res$meta$totalResults > nextStartIndex),
+ apiResponse = res,
+ collectedMatches = collectedMatches)
+})
#' Fetch all results of a KorAP query.
-#' @param queryObject object obtained from \code{\link{KorAPQuery}}
-#' @param verbose print progress information if true
-#' @return The \code{queryObject} input parameter with updated fields \code{$collectedMatches}, \code{$matches} (latest bunch only), \code{$nextStartIndex}, \code{$hasMoreMatches}
#'
#' @examples
-#' q <- KorAPFetchAll(KorAPQuery(KorAPConnection(), "Ameisenplage"))
-#' q$collectedMatches
+#' q <- fetchAll(corpusQuery(new("KorAPConnection"), "Ameisenplage"))
+#' q@collectedMatches
#'
-#' @seealso \code{\link{KorAPFetchRest}}, \code{\link{KorAPFetchNext}}
-#'
-#' @references
-#' \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
-#'
+#' @aliases fetchAll
+#' @rdname KorAPQuery-class
#' @export
-KorAPFetchAll <- function(queryObject, verbose = FALSE) {
- return(KorAPFetchNext(queryObject, offset = 0, maxFetch = NA, verbose = verbose))
+setMethod("fetchAll", "KorAPQuery", function(kqo, verbose = FALSE) {
+ return(fetchNext(kqo, offset = 0, maxFetch = NA, verbose = verbose))
+})
+
+#' Fetches the remaining results of a KorAP query.
+#'
+#' @examples
+#' q <- fetchRest(fetchNext(corpusQuery(new("KorAPConnection"), "Ameisenplage")))
+#' q@collectedMatches
+#'
+#' @aliases fetchRest
+#' @rdname KorAPQuery-class
+#' @export
+setMethod("fetchRest", "KorAPQuery", function(kqo, verbose = FALSE) {
+ return(fetchNext(kqo, maxFetch = NA, verbose = verbose))
+})
+
+#´ format()
+#' @rdname KorAPQuery-class
+#' @param x KorAPQuery object
+#' @param ... further arguments passed to or from other methods
+#' @export
+format.KorAPQuery <- function(x, ...) {
+ cat("<KorAPQuery>\n")
+ q <- x
+ aurl = parse_url(q@request)
+ cat(" Query: ", aurl$query$q, "\n")
+ if (!is.null(aurl$query$vc) && aurl$query$vc != "") {
+ cat("Virtual corpus: ", aurl$query$vc, "\n")
+ }
+ if (!is.null(q@collectedMatches)) {
+ cat("==============================================================================================================", "\n")
+ print(summary(q@collectedMatches))
+ cat("==============================================================================================================", "\n")
+ }
+ cat(" Total results: ", q@totalResults, "\n")
+ cat(" Fetched results: ", q@nextStartIndex, "\n")
}
-#' Fetches all remaining results of a KorAP query.
-#' @param queryObject object obtained from \code{\link{KorAPQuery}}
-#' @param verbose print progress information if true
-#' @return The \code{queryObject} input parameter with updated fields \code{$collectedMatches}, \code{$matches} (latest bunch only), \code{$nextStartIndex}, \code{$hasMoreMatches}
+#' show()
#'
-#' @examples
-#' q <- KorAPFetchRest(KorAPFetchNext(KorAPQuery(KorAPConnection(), "Ameisenplage")))
-#' q$collectedMatches
-#'
-#' @seealso \code{\link{KorAPFetchAll}}, \code{\link{KorAPFetchNext}}
-#'
-#' @references
-#' \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
-#'
+#' @rdname KorAPQuery-class
+#' @param object KorAPQuery object
#' @export
-KorAPFetchRest <- function(queryObject, verbose = FALSE) {
- return(KorAPFetchNext(queryObject, maxFetch = NA, verbose = verbose))
-}
+setMethod("show", "KorAPQuery", function(object) {
+ format(object)
+})