Add namespace and some documentation
diff --git a/R/KorAPConnection.R b/R/KorAPConnection.R
index 1fdc587..469f228 100644
--- a/R/KorAPConnection.R
+++ b/R/KorAPConnection.R
@@ -1,11 +1,21 @@
#' @import jsonlite
#' @import curl
-library(jsonlite)
-
defaultKorAPUrl <- "https://korap.ids-mannheim.de/"
-KorAPConnection <- function(KorAPUrl=defaultKorAPUrl, apiVersion='v1.0', apiUrl = paste0(KorAPUrl, 'api/' ,apiVersion, '/')) {
+#' \code{KorAPQuery} initiates a connect to some KorAP server.
+#' @param KorAPUrl instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.
+#' @return object that contains all necessary connection information and can be used on \code{\link{KorAPQuery}}
+#' @export
+KorAPConnection <- function(KorAPUrl=defaultKorAPUrl, apiVersion='v1.0', apiUrl = NA) {
+ m <-regexpr("https?://[^?]+", KorAPUrl, perl = TRUE)
+ KorAPUrl <- regmatches(KorAPUrl, m)
+ if (!endsWith(KorAPUrl, '/')) {
+ KorAPUrl <- paste0(KorAPUrl, "/")
+ }
+ if (is.na(apiUrl)) {
+ apiUrl = paste0(KorAPUrl, 'api/', apiVersion, '/')
+ }
con <- data.frame(apiUrl, KorAPUrl, apiVersion)
return(con)
}
diff --git a/R/KorAPCorpusStats.R b/R/KorAPCorpusStats.R
new file mode 100644
index 0000000..b9e44cf
--- /dev/null
+++ b/R/KorAPCorpusStats.R
@@ -0,0 +1,14 @@
+#' @import jsonlite
+#' @import curl
+
+#' @export
+KorAPCorpusStats <- function(con, vc = NA, query = NA) {
+ if ((is.na(query) && is.na(vc)) || !(is.na(query) || is.na(vc))) {
+ stop("Exaclty one of the parameters query and vc must be specified.")
+ }
+ if (is.na(vc)) {
+ vc = query$vc
+ }
+ url <- paste0(con$apiUrl, 'statistics?cq=', URLencode(vc, reserved=TRUE))
+ return(fromJSON(url))
+}
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index 6898a37..8bc822f 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -1,35 +1,76 @@
#' @import jsonlite
#' @import curl
-library(jsonlite)
-
defaultFields <- c("corpusSigle", "textSigle", "pubDate", "pubPlace",
- "availability", "textClass")
+ "availability", "textClass", "snippet")
-derekoStats <- function(vc='') {
- return(fromJSON(paste0(apiurl, 'statistics?cq=',
- URLencode(vc, reserved=TRUE))))
+contentFields <- c("snippet")
+
+QueryParameterFromUrl <- function(url, parameter) {
+ regex <- paste0(".*[?&]", parameter, "=([^&]*).*")
+ if (grepl(regex, url)) {
+ return(gsub(regex, '\\1', url, perl = TRUE))
+ } else {
+ return("")
+ }
}
-KorAPQuery <- function(con, query, vc="", ql="poliqarp", fields=defaultFields) {
+KorAPQueryStringFromUrl <- function(KorAPUrl) {
+ return(URLdecode(gsub(".*[?&]q=([^&]*).*", '\\1', KorAPUrl, perl = TRUE)))
+}
+
+#' \code{KorAPQuery} perform a query on the KorAP server.
+#' @param con object obtained from \code{\link{KorAPConnection}}, that contains all necessary connection information
+#' @param query string that contains the corpus query. The query langauge depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}
+#' @param vc string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.
+#' @param KorAPUrl instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.
+#' @param metaDataOnly boolean that determines wether queries should return only metadata without any snippets. This can also be useful to prevent query rewrites.
+#' @param ql string to choose the query language
+#' @param fields (meta)data fields that will be fetch for every matcch
+#'
+#' @return A KorAP query object that, among other information, contains the total number of results in \code{$meta$totalResults}. The resulting object can be used to fetch all (\code{\link{KorAPFetchAll}) or the next page of results (\code{\link{KorAPFetchNext}}). Please make sure to check \code{$collection$rewrites} to see if any unforseen rewrites of the query had to be performed.
+#'
+#' @examples
+#' q <- KorAPQuery(con, "Ameisenplage")
+#' q <- KorAPQuery(KorAPConnection(), "Ameisenplage")
+#' q <- KorAPQuery(con, KorAPUrl = "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp&cutoff=1")
+#'
+#' @references
+#' \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
+#'
+#' @export
+KorAPQuery <- function(con, query = NA, vc=NA, KorAPUrl = NA, metadataOnly=FALSE, ql="poliqarp", fields=defaultFields) {
+ if (is.na(query) && is.na(KorAPUrl) || ! (is.na(query) || is.na(KorAPUrl))) {
+ stop("Exaclty one of the parameters query and KorAPUrl must be specified.")
+ }
+ if (is.na(query)) {
+ query <- QueryParameterFromUrl(KorAPUrl, "q")
+ vc <- QueryParameterFromUrl(KorAPUrl, "vc")
+ ql <- QueryParameterFromUrl(KorAPUrl, "ql")
+ }
request <- paste0('?q=', URLencode(query, reserved=TRUE),
- ifelse(vc != '', paste0('&cq=', URLencode(vc, reserved=TRUE)), ''),
+ ifelse(vc != '', paste0('&vc=', URLencode(vc, reserved=TRUE)), ''),
'&ql=', ql);
webUIRequestUrl <- paste0(con$KorAPUrl, request)
+ if (is.na(vc)) {
+ vc <-""
+ }
requestUrl <- paste0(con$apiUrl, 'search', request,
'&fields=', paste(defaultFields, collapse = ","),
- '&access-rewrite-disabled=true')
+ ifelse(metadataOnly, '&access-rewrite-disabled=true', ''))
result <- fromJSON(paste0(requestUrl, '&count=1'))
- result$fields <- fields
+ result$fields <- fields[!metadataOnly || !fields %in% contentFields]
result$requestUrl <- requestUrl
result$request <- request
+ result$vc <- vc
result$webUIRequestUrl <- webUIRequestUrl
result$nextStartIndex <- 0
result$hasMoreMatches <- (result$meta$totalResults > 0)
return(result)
}
+#' @export
KorAPFetchAll <- function(query, verbose=FALSE) {
if (query$meta$totalResults == 0) { return(data.frame()) }
@@ -66,6 +107,7 @@
return(allMatches)
}
+#' @export
KorAPFetchNext <- function(query, offset=query$nextStartIndex, verbose=FALSE) {
if (query$nextStartIndex >= query$meta$totalResults) {
query$hasMoreMatches <- FALSE