blob: 1f131a5eda8de0dd75b32e6c95cab8620bd6d9a4 [file] [log] [blame]
Marc Kupietze95108e2019-09-18 13:23:58 +02001% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/KorAPQuery.R
3\docType{class}
4\name{KorAPQuery-class}
5\alias{KorAPQuery-class}
6\alias{KorAPQuery}
7\alias{initialize,KorAPQuery-method}
Marc Kupietzdbd431a2021-08-29 12:17:45 +02008\alias{corpusQuery,KorAPConnection-method}
9\alias{corpusQuery}
Marc Kupietze95108e2019-09-18 13:23:58 +020010\alias{fetchNext,KorAPQuery-method}
11\alias{fetchNext}
12\alias{fetchAll,KorAPQuery-method}
13\alias{fetchAll}
14\alias{fetchRest,KorAPQuery-method}
15\alias{fetchRest}
Marc Kupietz3f575282019-10-04 14:46:04 +020016\alias{frequencyQuery,KorAPConnection-method}
17\alias{frequencyQuery}
Marc Kupietzdbd431a2021-08-29 12:17:45 +020018\alias{buildWebUIRequestUrl}
Marc Kupietze95108e2019-09-18 13:23:58 +020019\alias{format.KorAPQuery}
20\alias{show,KorAPQuery-method}
21\title{Class KorAPQuery}
22\usage{
Marc Kupietz76685f52019-11-25 17:46:06 +010023\S4method{initialize}{KorAPQuery}(
24 .Object,
25 korapConnection = NULL,
26 request = NULL,
27 vc = "",
28 totalResults = 0,
29 nextStartIndex = 0,
30 fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability",
Marc Kupietz2078bde2023-08-27 16:46:15 +020031 "textClass", "snippet", "tokens"),
Marc Kupietz76685f52019-11-25 17:46:06 +010032 requestUrl = "",
33 webUIRequestUrl = "",
34 apiResponse = NULL,
35 hasMoreMatches = FALSE,
36 collectedMatches = NULL
37)
Marc Kupietze95108e2019-09-18 13:23:58 +020038
Marc Kupietzdbd431a2021-08-29 12:17:45 +020039\S4method{corpusQuery}{KorAPConnection}(
40 kco,
41 query = if (missing(KorAPUrl))
42 stop("At least one of the parameters query and KorAPUrl must be specified.", call. =
43 FALSE) else httr::parse_url(KorAPUrl)$query$q,
44 vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq,
45 KorAPUrl,
46 metadataOnly = TRUE,
47 ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql,
48 fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability",
Marc Kupietz2078bde2023-08-27 16:46:15 +020049 "textClass", "snippet", "tokens"),
Marc Kupietzdbd431a2021-08-29 12:17:45 +020050 accessRewriteFatal = TRUE,
51 verbose = kco@verbose,
52 expand = length(vc) != length(query),
Marc Kupietzd9b2fd72023-04-17 19:08:50 +020053 as.df = FALSE,
54 context = NULL
Marc Kupietzdbd431a2021-08-29 12:17:45 +020055)
56
Marc Kupietz76685f52019-11-25 17:46:06 +010057\S4method{fetchNext}{KorAPQuery}(
58 kqo,
59 offset = kqo@nextStartIndex,
60 maxFetch = maxResultsPerPage,
Marc Kupietzdbd431a2021-08-29 12:17:45 +020061 verbose = kqo@korapConnection@verbose,
62 randomizePageOrder = FALSE
Marc Kupietz76685f52019-11-25 17:46:06 +010063)
Marc Kupietze95108e2019-09-18 13:23:58 +020064
Marc Kupietzdbd431a2021-08-29 12:17:45 +020065\S4method{fetchAll}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...)
Marc Kupietze95108e2019-09-18 13:23:58 +020066
Marc Kupietzdbd431a2021-08-29 12:17:45 +020067\S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...)
Marc Kupietze95108e2019-09-18 13:23:58 +020068
Marc Kupietz76685f52019-11-25 17:46:06 +010069\S4method{frequencyQuery}{KorAPConnection}(
70 kco,
71 query,
72 vc = "",
73 conf.level = 0.95,
74 as.alternatives = FALSE,
75 ...
76)
Marc Kupietz3f575282019-10-04 14:46:04 +020077
Marc Kupietzdbd431a2021-08-29 12:17:45 +020078buildWebUIRequestUrl(
79 kco,
80 query = if (missing(KorAPUrl))
81 stop("At least one of the parameters query and KorAPUrl must be specified.", call. =
82 FALSE) else httr::parse_url(KorAPUrl)$query$q,
83 vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq,
84 KorAPUrl,
85 metadataOnly = TRUE,
86 ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql,
87 fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability",
Marc Kupietz2078bde2023-08-27 16:46:15 +020088 "textClass", "snippet", "tokens"),
Marc Kupietzdbd431a2021-08-29 12:17:45 +020089 accessRewriteFatal = TRUE
90)
91
Marc Kupietze95108e2019-09-18 13:23:58 +020092\method{format}{KorAPQuery}(x, ...)
93
94\S4method{show}{KorAPQuery}(object)
95}
96\arguments{
97\item{.Object}{}
98
Marc Kupietzb8972182019-09-20 21:33:46 +020099\item{korapConnection}{KorAPConnection object}
100
Marc Kupietze95108e2019-09-18 13:23:58 +0200101\item{request}{query part of the request URL}
102
Marc Kupietze2038322021-03-04 18:24:02 +0100103\item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.}
Marc Kupietze95108e2019-09-18 13:23:58 +0200104
105\item{totalResults}{number of hits the query has yielded}
106
107\item{nextStartIndex}{at what index to start the next fetch of query results}
108
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200109\item{fields}{(meta)data fields that will be fetched for every match.}
Marc Kupietze95108e2019-09-18 13:23:58 +0200110
111\item{requestUrl}{complete URL of the API request}
112
113\item{webUIRequestUrl}{URL of a web frontend request corresponding to the API request}
114
115\item{apiResponse}{data-frame representation of the JSON response of the API request}
116
Marc Kupietz7776dec2019-09-27 16:59:02 +0200117\item{hasMoreMatches}{logical that signals if more query results can be fetched}
Marc Kupietze95108e2019-09-18 13:23:58 +0200118
119\item{collectedMatches}{matches already fetched from the KorAP-API-server}
120
Marc Kupietz67edcb52021-09-20 21:54:24 +0200121\item{kco}{\code{\link[=KorAPConnection]{KorAPConnection()}} object (obtained e.g. from \code{new("KorAPConnection")}}
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200122
123\item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.}
124
125\item{KorAPUrl}{instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.}
126
Marc Kupietz132f0052023-04-16 14:23:05 +0200127\item{metadataOnly}{logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE.
128If you want your corpus queries to return not only metadata, but also KWICS, you need to authorize
129your RKorAPClient application as explained in the
130\href{https://github.com/KorAP/RKorAPClient#authorization}{authorization section}
131of the RKorAPClient Readme on GitHub and set the \code{metadataOnly} parameter to
132\code{FALSE}.}
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200133
134\item{ql}{string to choose the query language (see \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters}{section on Query Parameters} in the Kustvakt-Wiki for possible values.}
135
136\item{accessRewriteFatal}{abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).}
137
138\item{verbose}{print progress information if true}
139
140\item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations}
141
142\item{as.df}{return result as data frame instead of as S4 object?}
143
Marc Kupietzd9b2fd72023-04-17 19:08:50 +0200144\item{context}{string that specifies the size of the left and the right context returned in \code{snippet}
145(provided that \code{metadataOnly} is set to \code{false} and that the necessary access right are met).
146The format of the context size specifcation (e.g. \verb{3-token,3-token}) is described in the \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET}{Service: Search GET documentation of the Kustvakt Wiki}.
147If the parameter is not set, the default context size secification of the KorAP server instance will be used.
148Note that you cannot overrule the maximum context size set in the KorAP server instance,
149as this is typically legally motivated.}
150
Marc Kupietz67edcb52021-09-20 21:54:24 +0200151\item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}}}
Marc Kupietze95108e2019-09-18 13:23:58 +0200152
153\item{offset}{start offset for query results to fetch}
154
155\item{maxFetch}{maximum number of query results to fetch}
156
Marc Kupietz67edcb52021-09-20 21:54:24 +0200157\item{randomizePageOrder}{fetch result pages in pseudo random order if true. Use \code{\link[=set.seed]{set.seed()}} to set seed for reproducible results.}
Marc Kupietze95108e2019-09-18 13:23:58 +0200158
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200159\item{...}{further arguments passed to or from other methods}
Marc Kupietze95108e2019-09-18 13:23:58 +0200160
Marc Kupietz67edcb52021-09-20 21:54:24 +0200161\item{conf.level}{confidence level of the returned confidence interval (passed through \code{\link[=ci]{ci()}} to \code{\link[=prop.test]{prop.test()}}).}
Marc Kupietz0c29cea2019-10-09 08:44:36 +0200162
Marc Kupietz71d6e052019-11-22 18:42:10 +0100163\item{as.alternatives}{LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.}
164
Marc Kupietz3f575282019-10-04 14:46:04 +0200165\item{x}{KorAPQuery object}
166
Marc Kupietze95108e2019-09-18 13:23:58 +0200167\item{object}{KorAPQuery object}
168}
169\value{
Marc Kupietz67edcb52021-09-20 21:54:24 +0200170Depending on the \code{as.df} parameter, a table or a \code{\link[=KorAPQuery]{KorAPQuery()}} object that, among other information, contains the total number of results in \verb{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link[=fetchAll]{fetchAll()}}) or the next page of results (with \code{\link[=fetchNext]{fetchNext()}}).
171A corresponding URL to be used within a web browser is contained in \verb{@webUIRequestUrl}
172Please make sure to check \verb{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
Marc Kupietze2038322021-03-04 18:24:02 +0100173
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200174The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches}
Marc Kupietze95108e2019-09-18 13:23:58 +0200175}
176\description{
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100177This class provides methods to perform different kinds of queries on the KorAP API server.
Marc Kupietz67edcb52021-09-20 21:54:24 +0200178\code{KorAPQuery} objects, which are typically created by the \code{\link[=corpusQuery]{corpusQuery()}} method,
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100179represent the current state of a query to a KorAP server.
Marc Kupietz3f575282019-10-04 14:46:04 +0200180
Marc Kupietz67edcb52021-09-20 21:54:24 +0200181\strong{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200182
Marc Kupietz67edcb52021-09-20 21:54:24 +0200183\strong{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
Marc Kupietz3f575282019-10-04 14:46:04 +0200184
Marc Kupietz67edcb52021-09-20 21:54:24 +0200185\strong{\code{fetchAll}} fetches all results of a KorAP query.
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100186
Marc Kupietz67edcb52021-09-20 21:54:24 +0200187\strong{\code{frequencyQuery}} combines \code{\link[=corpusQuery]{corpusQuery()}}, \code{\link[=corpusStats]{corpusStats()}} and
188\code{\link[=ci]{ci()}} to compute a table with the relative frequencies and
Marc Kupietz3f575282019-10-04 14:46:04 +0200189confidence intervals of one ore multiple search terms across one or multiple
190virtual corpora.
Marc Kupietze95108e2019-09-18 13:23:58 +0200191}
192\examples{
Marc Kupietz6ae76052021-09-21 10:34:00 +0200193\dontrun{
194
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200195# Fetch metadata of every query hit for "Ameisenplage" and show a summary
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200196new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll()
197}
198
Marc Kupietz6ae76052021-09-21 10:34:00 +0200199\dontrun{
200
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200201# Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus
202# and show the number of query hits (but don't fetch them).
203
204new("KorAPConnection", verbose = TRUE) \%>\%
205 corpusQuery(KorAPUrl =
206 "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp")
Marc Kupietz6ae76052021-09-21 10:34:00 +0200207}
208
209\dontrun{
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200210
211# Plot the time/frequency curve of "Ameisenplage"
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200212new("KorAPConnection", verbose=TRUE) \%>\%
213 { . ->> kco } \%>\%
214 corpusQuery("Ameisenplage") \%>\%
215 fetchAll() \%>\%
216 slot("collectedMatches") \%>\%
217 mutate(year = lubridate::year(pubDate)) \%>\%
218 dplyr::select(year) \%>\%
219 group_by(year) \%>\%
220 summarise(Count = dplyr::n()) \%>\%
221 mutate(Freq = mapply(function(f, y)
222 f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) \%>\%
223 dplyr::select(-Count) \%>\%
224 complete(year = min(year):max(year), fill = list(Freq = 0)) \%>\%
225 plot(type = "l")
226}
Marc Kupietz6ae76052021-09-21 10:34:00 +0200227\dontrun{
228
229q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchNext()
Marc Kupietze95108e2019-09-18 13:23:58 +0200230q@collectedMatches
Marc Kupietz657d8e72020-02-25 18:31:50 +0100231}
Marc Kupietze95108e2019-09-18 13:23:58 +0200232
Marc Kupietz6ae76052021-09-21 10:34:00 +0200233\dontrun{
234
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200235q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll()
Marc Kupietze95108e2019-09-18 13:23:58 +0200236q@collectedMatches
Marc Kupietz05b22772020-02-18 21:58:42 +0100237}
Marc Kupietze95108e2019-09-18 13:23:58 +0200238
Marc Kupietz6ae76052021-09-21 10:34:00 +0200239\dontrun{
240
Marc Kupietz05b22772020-02-18 21:58:42 +0100241q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchRest()
242q@collectedMatches
243}
244
Marc Kupietz6ae76052021-09-21 10:34:00 +0200245\dontrun{
246
Marc Kupietz3f575282019-10-04 14:46:04 +0200247new("KorAPConnection", verbose = TRUE) \%>\%
248 frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
Marc Kupietz05b22772020-02-18 21:58:42 +0100249}
Marc Kupietz3f575282019-10-04 14:46:04 +0200250
Marc Kupietze95108e2019-09-18 13:23:58 +0200251}
252\references{
253\url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
Marc Kupietzdbd431a2021-08-29 12:17:45 +0200254
255\url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
256}
257\seealso{
Marc Kupietz67edcb52021-09-20 21:54:24 +0200258\code{\link[=KorAPConnection]{KorAPConnection()}}, \code{\link[=fetchNext]{fetchNext()}}, \code{\link[=fetchRest]{fetchRest()}}, \code{\link[=fetchAll]{fetchAll()}}, \code{\link[=corpusStats]{corpusStats()}}
Marc Kupietze95108e2019-09-18 13:23:58 +0200259}