Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 1 | % Generated by roxygen2: do not edit by hand |
| 2 | % Please edit documentation in R/KorAPQuery.R |
| 3 | \docType{class} |
| 4 | \name{KorAPQuery-class} |
| 5 | \alias{KorAPQuery-class} |
| 6 | \alias{KorAPQuery} |
| 7 | \alias{initialize,KorAPQuery-method} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 8 | \alias{corpusQuery,KorAPConnection-method} |
| 9 | \alias{corpusQuery} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 10 | \alias{fetchNext,KorAPQuery-method} |
| 11 | \alias{fetchNext} |
| 12 | \alias{fetchAll,KorAPQuery-method} |
| 13 | \alias{fetchAll} |
| 14 | \alias{fetchRest,KorAPQuery-method} |
| 15 | \alias{fetchRest} |
Marc Kupietz | 38a9d68 | 2024-12-06 16:17:09 +0100 | [diff] [blame] | 16 | \alias{buildWebUIRequestUrlFromString} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 17 | \alias{buildWebUIRequestUrl} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 18 | \alias{format.KorAPQuery} |
| 19 | \alias{show,KorAPQuery-method} |
| 20 | \title{Class KorAPQuery} |
| 21 | \usage{ |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 22 | \S4method{initialize}{KorAPQuery}( |
| 23 | .Object, |
| 24 | korapConnection = NULL, |
| 25 | request = NULL, |
| 26 | vc = "", |
| 27 | totalResults = 0, |
| 28 | nextStartIndex = 0, |
| 29 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 30 | "textClass", "snippet", "tokens"), |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 31 | requestUrl = "", |
| 32 | webUIRequestUrl = "", |
| 33 | apiResponse = NULL, |
| 34 | hasMoreMatches = FALSE, |
| 35 | collectedMatches = NULL |
| 36 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 37 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 38 | \S4method{corpusQuery}{KorAPConnection}( |
| 39 | kco, |
| 40 | query = if (missing(KorAPUrl)) |
| 41 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = |
Marc Kupietz | f912959 | 2025-01-26 19:17:54 +0100 | [diff] [blame] | 42 | FALSE) else httr2::url_parse(KorAPUrl)$query$q, |
| 43 | vc = if (missing(KorAPUrl)) "" else httr2::url_parse(KorAPUrl)$query$cq, |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 44 | KorAPUrl, |
| 45 | metadataOnly = TRUE, |
Marc Kupietz | f912959 | 2025-01-26 19:17:54 +0100 | [diff] [blame] | 46 | ql = if (missing(KorAPUrl)) "poliqarp" else httr2::url_parse(KorAPUrl)$query$ql, |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 47 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 48 | "textClass", "snippet", "tokens"), |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 49 | accessRewriteFatal = TRUE, |
| 50 | verbose = kco@verbose, |
| 51 | expand = length(vc) != length(query), |
Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame] | 52 | as.df = FALSE, |
| 53 | context = NULL |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 54 | ) |
| 55 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 56 | \S4method{fetchNext}{KorAPQuery}( |
| 57 | kqo, |
| 58 | offset = kqo@nextStartIndex, |
| 59 | maxFetch = maxResultsPerPage, |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 60 | verbose = kqo@korapConnection@verbose, |
| 61 | randomizePageOrder = FALSE |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 62 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 63 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 64 | \S4method{fetchAll}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 65 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 66 | \S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 67 | |
Marc Kupietz | 38a9d68 | 2024-12-06 16:17:09 +0100 | [diff] [blame] | 68 | buildWebUIRequestUrlFromString(KorAPUrl, query, vc = "", ql = "poliqarp") |
| 69 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 70 | buildWebUIRequestUrl( |
| 71 | kco, |
| 72 | query = if (missing(KorAPUrl)) |
| 73 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = |
Marc Kupietz | f912959 | 2025-01-26 19:17:54 +0100 | [diff] [blame] | 74 | FALSE) else httr2::url_parse(KorAPUrl)$query$q, |
| 75 | vc = if (missing(KorAPUrl)) "" else httr2::url_parse(KorAPUrl)$query$cq, |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 76 | KorAPUrl, |
Marc Kupietz | f912959 | 2025-01-26 19:17:54 +0100 | [diff] [blame] | 77 | ql = if (missing(KorAPUrl)) "poliqarp" else httr2::url_parse(KorAPUrl)$query$ql |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 78 | ) |
| 79 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 80 | \method{format}{KorAPQuery}(x, ...) |
| 81 | |
| 82 | \S4method{show}{KorAPQuery}(object) |
| 83 | } |
| 84 | \arguments{ |
| 85 | \item{.Object}{…} |
| 86 | |
Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 87 | \item{korapConnection}{KorAPConnection object} |
| 88 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 89 | \item{request}{query part of the request URL} |
| 90 | |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 91 | \item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 92 | |
| 93 | \item{totalResults}{number of hits the query has yielded} |
| 94 | |
| 95 | \item{nextStartIndex}{at what index to start the next fetch of query results} |
| 96 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 97 | \item{fields}{(meta)data fields that will be fetched for every match.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 98 | |
| 99 | \item{requestUrl}{complete URL of the API request} |
| 100 | |
| 101 | \item{webUIRequestUrl}{URL of a web frontend request corresponding to the API request} |
| 102 | |
| 103 | \item{apiResponse}{data-frame representation of the JSON response of the API request} |
| 104 | |
Marc Kupietz | 7776dec | 2019-09-27 16:59:02 +0200 | [diff] [blame] | 105 | \item{hasMoreMatches}{logical that signals if more query results can be fetched} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 106 | |
| 107 | \item{collectedMatches}{matches already fetched from the KorAP-API-server} |
| 108 | |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 109 | \item{kco}{\code{\link[=KorAPConnection]{KorAPConnection()}} object (obtained e.g. from \code{KorAPConnection()}} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 110 | |
| 111 | \item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.} |
| 112 | |
| 113 | \item{KorAPUrl}{instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.} |
| 114 | |
Marc Kupietz | 132f005 | 2023-04-16 14:23:05 +0200 | [diff] [blame] | 115 | \item{metadataOnly}{logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE. |
| 116 | If you want your corpus queries to return not only metadata, but also KWICS, you need to authorize |
| 117 | your RKorAPClient application as explained in the |
| 118 | \href{https://github.com/KorAP/RKorAPClient#authorization}{authorization section} |
| 119 | of the RKorAPClient Readme on GitHub and set the \code{metadataOnly} parameter to |
| 120 | \code{FALSE}.} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 121 | |
| 122 | \item{ql}{string to choose the query language (see \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters}{section on Query Parameters} in the Kustvakt-Wiki for possible values.} |
| 123 | |
| 124 | \item{accessRewriteFatal}{abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).} |
| 125 | |
| 126 | \item{verbose}{print progress information if true} |
| 127 | |
Marc Kupietz | ad8d2ed | 2025-04-05 15:37:38 +0200 | [diff] [blame] | 128 | \item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations. Defaults to \code{TRUE}, iff \code{query} and \code{vc} have different lengths} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 129 | |
| 130 | \item{as.df}{return result as data frame instead of as S4 object?} |
| 131 | |
Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame] | 132 | \item{context}{string that specifies the size of the left and the right context returned in \code{snippet} |
| 133 | (provided that \code{metadataOnly} is set to \code{false} and that the necessary access right are met). |
| 134 | The format of the context size specifcation (e.g. \verb{3-token,3-token}) is described in the \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET}{Service: Search GET documentation of the Kustvakt Wiki}. |
| 135 | If the parameter is not set, the default context size secification of the KorAP server instance will be used. |
| 136 | Note that you cannot overrule the maximum context size set in the KorAP server instance, |
| 137 | as this is typically legally motivated.} |
| 138 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 139 | \item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}}} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 140 | |
| 141 | \item{offset}{start offset for query results to fetch} |
| 142 | |
| 143 | \item{maxFetch}{maximum number of query results to fetch} |
| 144 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 145 | \item{randomizePageOrder}{fetch result pages in pseudo random order if true. Use \code{\link[=set.seed]{set.seed()}} to set seed for reproducible results.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 146 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 147 | \item{...}{further arguments passed to or from other methods} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 148 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 149 | \item{x}{KorAPQuery object} |
| 150 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 151 | \item{object}{KorAPQuery object} |
| 152 | } |
| 153 | \value{ |
Marc Kupietz | ad8d2ed | 2025-04-05 15:37:38 +0200 | [diff] [blame] | 154 | Depending on the \code{as.df} parameter, a tibble or a \code{\link[=KorAPQuery]{KorAPQuery()}} object that, among other information, contains the total number of results in \verb{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link[=fetchAll]{fetchAll()}}) or the next page of results (with \code{\link[=fetchNext]{fetchNext()}}). |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 155 | A corresponding URL to be used within a web browser is contained in \verb{@webUIRequestUrl} |
| 156 | Please make sure to check \verb{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed. |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 157 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 158 | The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 159 | } |
| 160 | \description{ |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 161 | This class provides methods to perform different kinds of queries on the KorAP API server. |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 162 | \code{KorAPQuery} objects, which are typically created by the \code{\link[=corpusQuery]{corpusQuery()}} method, |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 163 | represent the current state of a query to a KorAP server. |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 164 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 165 | \strong{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 166 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 167 | \strong{\code{fetchNext}} fetches the next bunch of results of a KorAP query. |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 168 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 169 | \strong{\code{fetchAll}} fetches all results of a KorAP query. |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 170 | } |
| 171 | \examples{ |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 172 | \dontrun{ |
| 173 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 174 | # Fetch metadata of every query hit for "Ameisenplage" and show a summary |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 175 | KorAPConnection() \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll() |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 176 | } |
| 177 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 178 | \dontrun{ |
| 179 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 180 | # Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus |
| 181 | # and show the number of query hits (but don't fetch them). |
| 182 | |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 183 | KorAPConnection(verbose = TRUE) \%>\% |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 184 | corpusQuery(KorAPUrl = |
| 185 | "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp") |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 186 | } |
| 187 | |
| 188 | \dontrun{ |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 189 | |
| 190 | # Plot the time/frequency curve of "Ameisenplage" |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 191 | KorAPConnection(verbose=TRUE) \%>\% |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 192 | { . ->> kco } \%>\% |
| 193 | corpusQuery("Ameisenplage") \%>\% |
| 194 | fetchAll() \%>\% |
| 195 | slot("collectedMatches") \%>\% |
| 196 | mutate(year = lubridate::year(pubDate)) \%>\% |
| 197 | dplyr::select(year) \%>\% |
| 198 | group_by(year) \%>\% |
| 199 | summarise(Count = dplyr::n()) \%>\% |
| 200 | mutate(Freq = mapply(function(f, y) |
| 201 | f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) \%>\% |
| 202 | dplyr::select(-Count) \%>\% |
| 203 | complete(year = min(year):max(year), fill = list(Freq = 0)) \%>\% |
| 204 | plot(type = "l") |
| 205 | } |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 206 | \dontrun{ |
| 207 | |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 208 | q <- KorAPConnection() \%>\% corpusQuery("Ameisenplage") \%>\% fetchNext() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 209 | q@collectedMatches |
Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 210 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 211 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 212 | \dontrun{ |
| 213 | |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 214 | q <- KorAPConnection() \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 215 | q@collectedMatches |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 216 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 217 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 218 | \dontrun{ |
| 219 | |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 220 | q <- KorAPConnection() \%>\% corpusQuery("Ameisenplage") \%>\% fetchRest() |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 221 | q@collectedMatches |
| 222 | } |
| 223 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 224 | } |
| 225 | \references{ |
| 226 | \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 227 | |
| 228 | \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026} |
| 229 | } |
| 230 | \seealso{ |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 231 | \code{\link[=KorAPConnection]{KorAPConnection()}}, \code{\link[=fetchNext]{fetchNext()}}, \code{\link[=fetchRest]{fetchRest()}}, \code{\link[=fetchAll]{fetchAll()}}, \code{\link[=corpusStats]{corpusStats()}} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 232 | } |