Add function frequencyQuery (corpusQuery + corpusStats + ci)
Change-Id: Icb7ed900ea588f606a812d1e3122867a81cefaa2
diff --git a/NAMESPACE b/NAMESPACE
index a34e671..091ce17 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -23,6 +23,7 @@
exportMethods(fetchAll)
exportMethods(fetchNext)
exportMethods(fetchRest)
+exportMethods(frequencyQuery)
exportMethods(initialize)
exportMethods(show)
import(R.cache)
@@ -44,6 +45,7 @@
importFrom(magrittr,"%>%")
importFrom(stats,prop.test)
importFrom(tibble,as_tibble)
+importFrom(tibble,remove_rownames)
importFrom(tibble,rownames_to_column)
importFrom(tidyr,complete)
importFrom(tidyr,expand_grid)
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index f71a4f3..868089c 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -65,6 +65,7 @@
setGeneric("fetchAll", function(kqo, ...) standardGeneric("fetchAll") )
setGeneric("fetchNext", function(kqo, ...) standardGeneric("fetchNext") )
setGeneric("fetchRest", function(kqo, ...) standardGeneric("fetchRest") )
+setGeneric("frequencyQuery", function(kco, ...) standardGeneric("frequencyQuery") )
maxResultsPerPage <- 50
@@ -94,7 +95,8 @@
#' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficent rights (not yet implemented).
#' @param verbose print some info
#' @param as.df return result as data frame instead of as S4 object?
-#' @return A \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
+#' @param expand logical that deicdes if \code{query} and \code{vc} parameters are expanded to all of their combinations
+#' @return Depending on the \code{as.df} parameter, a table or a \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
#' A corresponding URL to be used within a web browser is contained in \code{@webUIRequestUrl}
#' Please make sure to check \code{$collection$rewrites} to see if any unforseen access rewrites of the query's virtual corpus had to be performed.
#'
@@ -145,13 +147,15 @@
"availability", "textClass", "snippet"),
accessRewriteFatal = TRUE,
verbose = kco@verbose,
+ expand = length(vc) != length(query),
as.df = FALSE) {
- ifelse(length(query) > 1 , {
- #grid <- expand_grid(query=query, vc=vc)
- return(
+ ifelse(length(query) > 1 || length(vc) > 1, {
+ grid <- { if (expand) expand_grid(query=query, vc=vc) else tibble(query=query, vc=vc) }
+ return(
do.call(rbind,
- Map(function(q, cq) corpusQuery(kco, query=q, vc=cq,
- verbose=verbose, as.df = TRUE), query, vc))
+ Map(function(q, cq) corpusQuery(kco, query=q, vc=cq, ql=ql,
+ verbose=verbose, as.df = TRUE), grid$query, grid$vc)) %>%
+ remove_rownames()
)}, {
contentFields <- c("snippet")
fields <- fields[!fields %in% contentFields]
@@ -185,6 +189,8 @@
#' Fetch the next bunch of results of a KorAP query.
#'
+#' \bold{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
+#'
#' @param kqo object obtained from \code{\link{corpusQuery}}
#' @param offset start offset for query results to fetch
#' @param maxFetch maximum number of query results to fetch
@@ -281,6 +287,29 @@
return(fetchNext(kqo, maxFetch = NA, verbose = verbose))
})
+#' Query relative frequency of search term(s)
+#'
+#' \bold{\code{frequencyQuery}} combines \code{\link{corpusQuery}}, \code{\link{corpusStats}} and
+#' \code{\link{ci}} to compute a table with the relative frequencies and
+#' confidence intervals of one ore multiple search terms across one or multiple
+#' virtual corpora.
+#'
+#' @aliases frequencyQuery
+#' @rdname KorAPQuery-class
+#' @examples
+#' new("KorAPConnection", verbose = TRUE) %>%
+#' frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
+#'
+#' @param kco \code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}
+#' @param query string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.
+#' @export
+setMethod("frequencyQuery", "KorAPConnection",
+ function(kco, query, vc = "", ...) {
+ corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df=TRUE, ...) %>%
+ mutate(tokens=corpusStats(kco, vc=vc, as.df=TRUE)$tokens) %>%
+ ci()
+})
+
#´ format()
#' @rdname KorAPQuery-class
#' @param x KorAPQuery object
diff --git a/R/ci.R b/R/ci.R
index b9dd987..1d54977 100644
--- a/R/ci.R
+++ b/R/ci.R
@@ -2,8 +2,12 @@
#' Add confidence interval and relative frequency variables
#'
#' Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
-#' 1. relative frequency (\code{f}) 2. lower bound of a confidence interval
-#' (\code{ci.low}) 3. upper bound of a confidence interval
+#' 1. relative frequency (\code{f})
+#' 2. lower bound of a confidence interval (\code{ci.low})
+#' 3. upper bound of a confidence interval
+#'
+#' @seealso
+#' \code{ci} is alread included in \code{\link{frequencyQuery}}
#'
#' @param df table with columns for absolute and total frequencies.
#' @param x column with the observed absolute frequency.
@@ -13,6 +17,7 @@
#'
#' @export
#' @importFrom stats prop.test
+#' @importFrom tibble remove_rownames
#' @examples
#' library(ggplot2)
#' kco <- new("KorAPConnection", verbose=TRUE)
diff --git a/man/KorAPQuery-class.Rd b/man/KorAPQuery-class.Rd
index b5c9a8a..2a1fa8b 100644
--- a/man/KorAPQuery-class.Rd
+++ b/man/KorAPQuery-class.Rd
@@ -11,6 +11,8 @@
\alias{fetchAll}
\alias{fetchRest,KorAPQuery-method}
\alias{fetchRest}
+\alias{frequencyQuery,KorAPConnection-method}
+\alias{frequencyQuery}
\alias{format.KorAPQuery}
\alias{show,KorAPQuery-method}
\title{Class KorAPQuery}
@@ -31,6 +33,8 @@
\S4method{fetchRest}{KorAPQuery}(kqo,
verbose = kqo@korapConnection@verbose)
+\S4method{frequencyQuery}{KorAPConnection}(kco, query, vc = "", ...)
+
\method{format}{KorAPQuery}(x, ...)
\S4method{show}{KorAPQuery}(object)
@@ -68,10 +72,14 @@
\item{verbose}{print progress information if true}
-\item{x}{KorAPQuery object}
+\item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}}
+
+\item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.}
\item{...}{further arguments passed to or from other methods}
+\item{x}{KorAPQuery object}
+
\item{object}{KorAPQuery object}
}
\value{
@@ -80,6 +88,13 @@
\description{
\code{KorAPQuery} objetcs represent the current state of a query to a KorAP server.
New \code{KorAPQuery} objects are typically created by the \code{\link{corpusQuery}} method.
+
+\bold{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
+
+\bold{\code{frequencyQuery}} combines \code{\link{corpusQuery}}, \code{\link{corpusStats}} and
+\code{\link{ci}} to compute a table with the relative frequencies and
+confidence intervals of one ore multiple search terms across one or multiple
+virtual corpora.
}
\examples{
q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll()
@@ -88,6 +103,9 @@
q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll()
q@collectedMatches
+new("KorAPConnection", verbose = TRUE) \%>\%
+ frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
+
}
\references{
\url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
diff --git a/man/ci.Rd b/man/ci.Rd
index e4f7e54..381adb0 100644
--- a/man/ci.Rd
+++ b/man/ci.Rd
@@ -18,8 +18,9 @@
}
\description{
Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
-1. relative frequency (\code{f}) 2. lower bound of a confidence interval
-(\code{ci.low}) 3. upper bound of a confidence interval
+1. relative frequency (\code{f})
+2. lower bound of a confidence interval (\code{ci.low})
+3. upper bound of a confidence interval
}
\examples{
library(ggplot2)
@@ -32,3 +33,6 @@
geom_point() + geom_line() + geom_ribbon(alpha=.3)
}
+\seealso{
+\code{ci} is alread included in \code{\link{frequencyQuery}}
+}
diff --git a/man/corpusQuery-KorAPConnection-method.Rd b/man/corpusQuery-KorAPConnection-method.Rd
index 49e636b..1368ee7 100644
--- a/man/corpusQuery-KorAPConnection-method.Rd
+++ b/man/corpusQuery-KorAPConnection-method.Rd
@@ -15,7 +15,8 @@
"poliqarp", httr::parse_url(KorAPUrl)$query$ql),
fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace",
"availability", "textClass", "snippet"), accessRewriteFatal = TRUE,
- verbose = kco@verbose, as.df = FALSE)
+ verbose = kco@verbose, expand = length(vc) != length(query),
+ as.df = FALSE)
}
\arguments{
\item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}}
@@ -36,10 +37,12 @@
\item{verbose}{print some info}
+\item{expand}{logical that deicdes if \code{query} and \code{vc} parameters are expanded to all of their combinations}
+
\item{as.df}{return result as data frame instead of as S4 object?}
}
\value{
-A \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
+Depending on the \code{as.df} parameter, a table or a \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
A corresponding URL to be used within a web browser is contained in \code{@webUIRequestUrl}
Please make sure to check \code{$collection$rewrites} to see if any unforseen access rewrites of the query's virtual corpus had to be performed.
}