blob: 1bfa9ce1196736ce9236a1dee8105b9dfd7f4063 [file] [log] [blame]
Marc Kupietze95108e2019-09-18 13:23:58 +02001% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/KorAPQuery.R
3\docType{class}
4\name{KorAPQuery-class}
5\alias{KorAPQuery-class}
6\alias{KorAPQuery}
7\alias{initialize,KorAPQuery-method}
Marc Kupietzdbd431a2021-08-29 12:17:45 +02008\alias{corpusQuery,KorAPConnection-method}
9\alias{corpusQuery}
Marc Kupietze95108e2019-09-18 13:23:58 +020010\alias{fetchNext,KorAPQuery-method}
11\alias{fetchNext}
12\alias{fetchAll,KorAPQuery-method}
13\alias{fetchAll}
14\alias{fetchRest,KorAPQuery-method}
15\alias{fetchRest}
Marc Kupietz38a9d682024-12-06 16:17:09 +010016\alias{buildWebUIRequestUrlFromString}
Marc Kupietzdbd431a2021-08-29 12:17:45 +020017\alias{buildWebUIRequestUrl}
Marc Kupietze95108e2019-09-18 13:23:58 +020018\alias{format.KorAPQuery}
19\alias{show,KorAPQuery-method}
20\title{Class KorAPQuery}
21\usage{
Marc Kupietz76685f52019-11-25 17:46:06 +010022\S4method{initialize}{KorAPQuery}(
23 .Object,
24 korapConnection = NULL,
25 request = NULL,
26 vc = "",
27 totalResults = 0,
28 nextStartIndex = 0,
29 fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability",
Marc Kupietz2078bde2023-08-27 16:46:15 +020030 "textClass", "snippet", "tokens"),
Marc Kupietz76685f52019-11-25 17:46:06 +010031 requestUrl = "",
32 webUIRequestUrl = "",
33 apiResponse = NULL,
34 hasMoreMatches = FALSE,
35 collectedMatches = NULL
36)
Marc Kupietze95108e2019-09-18 13:23:58 +020037
Marc Kupietzdbd431a2021-08-29 12:17:45 +020038\S4method{corpusQuery}{KorAPConnection}(
39 kco,
Marc Kupietza824d502025-05-02 15:40:23 +020040 query = if (missing(KorAPUrl)) {
41
Marc Kupietzdbd431a2021-08-29 12:17:45 +020042 stop("At least one of the parameters query and KorAPUrl must be specified.", call. =
Marc Kupietza824d502025-05-02 15:40:23 +020043 FALSE)
44 } else {
45 httr2::url_parse(KorAPUrl)$query$q
46 },
Marc Kupietzf9129592025-01-26 19:17:54 +010047 vc = if (missing(KorAPUrl)) "" else httr2::url_parse(KorAPUrl)$query$cq,
Marc Kupietzdbd431a2021-08-29 12:17:45 +020048 KorAPUrl,
49 metadataOnly = TRUE,
Marc Kupietzf9129592025-01-26 19:17:54 +010050 ql = if (missing(KorAPUrl)) "poliqarp" else httr2::url_parse(KorAPUrl)$query$ql,
Marc Kupietzdbd431a2021-08-29 12:17:45 +020051 fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability",
Marc Kupietz2078bde2023-08-27 16:46:15 +020052 "textClass", "snippet", "tokens"),
Marc Kupietzdbd431a2021-08-29 12:17:45 +020053 accessRewriteFatal = TRUE,
54 verbose = kco@verbose,
55 expand = length(vc) != length(query),
Marc Kupietzd9b2fd72023-04-17 19:08:50 +020056 as.df = FALSE,
57 context = NULL
Marc Kupietzdbd431a2021-08-29 12:17:45 +020058)
59
Marc Kupietz76685f52019-11-25 17:46:06 +010060\S4method{fetchNext}{KorAPQuery}(
61 kqo,
62 offset = kqo@nextStartIndex,
63 maxFetch = maxResultsPerPage,
Marc Kupietzdbd431a2021-08-29 12:17:45 +020064 verbose = kqo@korapConnection@verbose,
65 randomizePageOrder = FALSE
Marc Kupietz76685f52019-11-25 17:46:06 +010066)
Marc Kupietze95108e2019-09-18 13:23:58 +020067
Marc Kupietzdbd431a2021-08-29 12:17:45 +020068\S4method{fetchAll}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...)
Marc Kupietze95108e2019-09-18 13:23:58 +020069
Marc Kupietzdbd431a2021-08-29 12:17:45 +020070\S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...)
Marc Kupietze95108e2019-09-18 13:23:58 +020071
Marc Kupietz38a9d682024-12-06 16:17:09 +010072buildWebUIRequestUrlFromString(KorAPUrl, query, vc = "", ql = "poliqarp")
73
Marc Kupietzdbd431a2021-08-29 12:17:45 +020074buildWebUIRequestUrl(
75 kco,
Marc Kupietza824d502025-05-02 15:40:23 +020076 query = if (missing(KorAPUrl)) {
77
Marc Kupietzdbd431a2021-08-29 12:17:45 +020078 stop("At least one of the parameters query and KorAPUrl must be specified.", call. =
Marc Kupietza824d502025-05-02 15:40:23 +020079 FALSE)
80 } else {
81 httr2::url_parse(KorAPUrl)$query$q
82 },
Marc Kupietzf9129592025-01-26 19:17:54 +010083 vc = if (missing(KorAPUrl)) "" else httr2::url_parse(KorAPUrl)$query$cq,
Marc Kupietzdbd431a2021-08-29 12:17:45 +020084 KorAPUrl,
Marc Kupietzf9129592025-01-26 19:17:54 +010085 ql = if (missing(KorAPUrl)) "poliqarp" else httr2::url_parse(KorAPUrl)$query$ql
Marc Kupietzdbd431a2021-08-29 12:17:45 +020086)
87
Marc Kupietze95108e2019-09-18 13:23:58 +020088\method{format}{KorAPQuery}(x, ...)
89
90\S4method{show}{KorAPQuery}(object)
91}
92\arguments{
93\item{.Object}{}
94
Marc Kupietzb8972182019-09-20 21:33:46 +020095\item{korapConnection}{KorAPConnection object}
96
Marc Kupietze95108e2019-09-18 13:23:58 +020097\item{request}{query part of the request URL}
98
Marc Kupietze2038322021-03-04 18:24:02 +010099\item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.}
Marc Kupietze95108e2019-09-18 13:23:58 +0200100
101\item{totalResults}{number of hits the query has yielded}
102
103\item{nextStartIndex}{at what index to start the next fetch of query results}
104
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200105\item{fields}{(meta)data fields that will be fetched for every match.}
Marc Kupietze95108e2019-09-18 13:23:58 +0200106
107\item{requestUrl}{complete URL of the API request}
108
109\item{webUIRequestUrl}{URL of a web frontend request corresponding to the API request}
110
111\item{apiResponse}{data-frame representation of the JSON response of the API request}
112
Marc Kupietz7776dec2019-09-27 16:59:02 +0200113\item{hasMoreMatches}{logical that signals if more query results can be fetched}
Marc Kupietze95108e2019-09-18 13:23:58 +0200114
115\item{collectedMatches}{matches already fetched from the KorAP-API-server}
116
Marc Kupietz617266d2025-02-27 10:43:07 +0100117\item{kco}{\code{\link[=KorAPConnection]{KorAPConnection()}} object (obtained e.g. from \code{KorAPConnection()}}
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200118
119\item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.}
120
121\item{KorAPUrl}{instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.}
122
Marc Kupietz132f0052023-04-16 14:23:05 +0200123\item{metadataOnly}{logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE.
124If you want your corpus queries to return not only metadata, but also KWICS, you need to authorize
125your RKorAPClient application as explained in the
126\href{https://github.com/KorAP/RKorAPClient#authorization}{authorization section}
127of the RKorAPClient Readme on GitHub and set the \code{metadataOnly} parameter to
128\code{FALSE}.}
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200129
130\item{ql}{string to choose the query language (see \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters}{section on Query Parameters} in the Kustvakt-Wiki for possible values.}
131
132\item{accessRewriteFatal}{abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).}
133
134\item{verbose}{print progress information if true}
135
Marc Kupietzad8d2ed2025-04-05 15:37:38 +0200136\item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations. Defaults to \code{TRUE}, iff \code{query} and \code{vc} have different lengths}
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200137
138\item{as.df}{return result as data frame instead of as S4 object?}
139
Marc Kupietzd9b2fd72023-04-17 19:08:50 +0200140\item{context}{string that specifies the size of the left and the right context returned in \code{snippet}
141(provided that \code{metadataOnly} is set to \code{false} and that the necessary access right are met).
142The format of the context size specifcation (e.g. \verb{3-token,3-token}) is described in the \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET}{Service: Search GET documentation of the Kustvakt Wiki}.
143If the parameter is not set, the default context size secification of the KorAP server instance will be used.
144Note that you cannot overrule the maximum context size set in the KorAP server instance,
145as this is typically legally motivated.}
146
Marc Kupietz67edcb52021-09-20 21:54:24 +0200147\item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}}}
Marc Kupietze95108e2019-09-18 13:23:58 +0200148
149\item{offset}{start offset for query results to fetch}
150
151\item{maxFetch}{maximum number of query results to fetch}
152
Marc Kupietz67edcb52021-09-20 21:54:24 +0200153\item{randomizePageOrder}{fetch result pages in pseudo random order if true. Use \code{\link[=set.seed]{set.seed()}} to set seed for reproducible results.}
Marc Kupietze95108e2019-09-18 13:23:58 +0200154
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200155\item{...}{further arguments passed to or from other methods}
Marc Kupietze95108e2019-09-18 13:23:58 +0200156
Marc Kupietz3f575282019-10-04 14:46:04 +0200157\item{x}{KorAPQuery object}
158
Marc Kupietze95108e2019-09-18 13:23:58 +0200159\item{object}{KorAPQuery object}
160}
161\value{
Marc Kupietzad8d2ed2025-04-05 15:37:38 +0200162Depending on the \code{as.df} parameter, a tibble or a \code{\link[=KorAPQuery]{KorAPQuery()}} object that, among other information, contains the total number of results in \verb{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link[=fetchAll]{fetchAll()}}) or the next page of results (with \code{\link[=fetchNext]{fetchNext()}}).
Marc Kupietz67edcb52021-09-20 21:54:24 +0200163A corresponding URL to be used within a web browser is contained in \verb{@webUIRequestUrl}
164Please make sure to check \verb{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
Marc Kupietze2038322021-03-04 18:24:02 +0100165
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200166The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches}
Marc Kupietze95108e2019-09-18 13:23:58 +0200167}
168\description{
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100169This class provides methods to perform different kinds of queries on the KorAP API server.
Marc Kupietz67edcb52021-09-20 21:54:24 +0200170\code{KorAPQuery} objects, which are typically created by the \code{\link[=corpusQuery]{corpusQuery()}} method,
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100171represent the current state of a query to a KorAP server.
Marc Kupietz3f575282019-10-04 14:46:04 +0200172
Marc Kupietz67edcb52021-09-20 21:54:24 +0200173\strong{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200174
Marc Kupietz67edcb52021-09-20 21:54:24 +0200175\strong{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
Marc Kupietz3f575282019-10-04 14:46:04 +0200176
Marc Kupietz67edcb52021-09-20 21:54:24 +0200177\strong{\code{fetchAll}} fetches all results of a KorAP query.
Marc Kupietze95108e2019-09-18 13:23:58 +0200178}
179\examples{
Marc Kupietz6ae76052021-09-21 10:34:00 +0200180\dontrun{
181
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200182# Fetch metadata of every query hit for "Ameisenplage" and show a summary
Marc Kupietza824d502025-05-02 15:40:23 +0200183KorAPConnection() \%>\%
184 corpusQuery("Ameisenplage") \%>\%
185 fetchAll()
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200186}
187
Marc Kupietz6ae76052021-09-21 10:34:00 +0200188\dontrun{
189
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200190# Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus
191# and show the number of query hits (but don't fetch them).
192
Marc Kupietz617266d2025-02-27 10:43:07 +0100193KorAPConnection(verbose = TRUE) \%>\%
Marc Kupietza824d502025-05-02 15:40:23 +0200194 corpusQuery(
195 KorAPUrl =
196 "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp"
197 )
Marc Kupietz6ae76052021-09-21 10:34:00 +0200198}
199
200\dontrun{
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200201
202# Plot the time/frequency curve of "Ameisenplage"
Marc Kupietza824d502025-05-02 15:40:23 +0200203KorAPConnection(verbose = TRUE) \%>\%
204 {
205 . ->> kco
206 } \%>\%
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200207 corpusQuery("Ameisenplage") \%>\%
208 fetchAll() \%>\%
209 slot("collectedMatches") \%>\%
210 mutate(year = lubridate::year(pubDate)) \%>\%
211 dplyr::select(year) \%>\%
212 group_by(year) \%>\%
213 summarise(Count = dplyr::n()) \%>\%
Marc Kupietza824d502025-05-02 15:40:23 +0200214 mutate(Freq = mapply(function(f, y) {
215 f / corpusStats(kco, paste("pubDate in", y))@tokens
216 }, Count, year)) \%>\%
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200217 dplyr::select(-Count) \%>\%
218 complete(year = min(year):max(year), fill = list(Freq = 0)) \%>\%
219 plot(type = "l")
220}
Marc Kupietz6ae76052021-09-21 10:34:00 +0200221\dontrun{
222
Marc Kupietza824d502025-05-02 15:40:23 +0200223q <- KorAPConnection() \%>\%
224 corpusQuery("Ameisenplage") \%>\%
225 fetchNext()
Marc Kupietze95108e2019-09-18 13:23:58 +0200226q@collectedMatches
Marc Kupietz657d8e72020-02-25 18:31:50 +0100227}
Marc Kupietze95108e2019-09-18 13:23:58 +0200228
Marc Kupietz6ae76052021-09-21 10:34:00 +0200229\dontrun{
230
Marc Kupietza824d502025-05-02 15:40:23 +0200231q <- KorAPConnection() \%>\%
232 corpusQuery("Ameisenplage") \%>\%
233 fetchAll()
Marc Kupietze95108e2019-09-18 13:23:58 +0200234q@collectedMatches
Marc Kupietz05b22772020-02-18 21:58:42 +0100235}
Marc Kupietze95108e2019-09-18 13:23:58 +0200236
Marc Kupietz6ae76052021-09-21 10:34:00 +0200237\dontrun{
238
Marc Kupietza824d502025-05-02 15:40:23 +0200239q <- KorAPConnection() \%>\%
240 corpusQuery("Ameisenplage") \%>\%
241 fetchRest()
Marc Kupietz05b22772020-02-18 21:58:42 +0100242q@collectedMatches
243}
244
Marc Kupietze95108e2019-09-18 13:23:58 +0200245}
246\references{
247\url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200248
249\url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
250}
251\seealso{
Marc Kupietz67edcb52021-09-20 21:54:24 +0200252\code{\link[=KorAPConnection]{KorAPConnection()}}, \code{\link[=fetchNext]{fetchNext()}}, \code{\link[=fetchRest]{fetchRest()}}, \code{\link[=fetchAll]{fetchAll()}}, \code{\link[=corpusStats]{corpusStats()}}
Marc Kupietze95108e2019-09-18 13:23:58 +0200253}