Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 1 | % Generated by roxygen2: do not edit by hand |
| 2 | % Please edit documentation in R/KorAPQuery.R |
| 3 | \docType{class} |
| 4 | \name{KorAPQuery-class} |
| 5 | \alias{KorAPQuery-class} |
| 6 | \alias{KorAPQuery} |
| 7 | \alias{initialize,KorAPQuery-method} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 8 | \alias{corpusQuery,KorAPConnection-method} |
| 9 | \alias{corpusQuery} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 10 | \alias{fetchNext,KorAPQuery-method} |
| 11 | \alias{fetchNext} |
| 12 | \alias{fetchAll,KorAPQuery-method} |
| 13 | \alias{fetchAll} |
| 14 | \alias{fetchRest,KorAPQuery-method} |
| 15 | \alias{fetchRest} |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 16 | \alias{frequencyQuery,KorAPConnection-method} |
| 17 | \alias{frequencyQuery} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 18 | \alias{buildWebUIRequestUrl} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 19 | \alias{format.KorAPQuery} |
| 20 | \alias{show,KorAPQuery-method} |
| 21 | \title{Class KorAPQuery} |
| 22 | \usage{ |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 23 | \S4method{initialize}{KorAPQuery}( |
| 24 | .Object, |
| 25 | korapConnection = NULL, |
| 26 | request = NULL, |
| 27 | vc = "", |
| 28 | totalResults = 0, |
| 29 | nextStartIndex = 0, |
| 30 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 31 | "textClass", "snippet"), |
| 32 | requestUrl = "", |
| 33 | webUIRequestUrl = "", |
| 34 | apiResponse = NULL, |
| 35 | hasMoreMatches = FALSE, |
| 36 | collectedMatches = NULL |
| 37 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 38 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 39 | \S4method{corpusQuery}{KorAPConnection}( |
| 40 | kco, |
| 41 | query = if (missing(KorAPUrl)) |
| 42 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = |
| 43 | FALSE) else httr::parse_url(KorAPUrl)$query$q, |
| 44 | vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, |
| 45 | KorAPUrl, |
| 46 | metadataOnly = TRUE, |
| 47 | ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql, |
| 48 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 49 | "textClass", "snippet"), |
| 50 | accessRewriteFatal = TRUE, |
| 51 | verbose = kco@verbose, |
| 52 | expand = length(vc) != length(query), |
Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame^] | 53 | as.df = FALSE, |
| 54 | context = NULL |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 55 | ) |
| 56 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 57 | \S4method{fetchNext}{KorAPQuery}( |
| 58 | kqo, |
| 59 | offset = kqo@nextStartIndex, |
| 60 | maxFetch = maxResultsPerPage, |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 61 | verbose = kqo@korapConnection@verbose, |
| 62 | randomizePageOrder = FALSE |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 63 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 64 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 65 | \S4method{fetchAll}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 66 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 67 | \S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 68 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 69 | \S4method{frequencyQuery}{KorAPConnection}( |
| 70 | kco, |
| 71 | query, |
| 72 | vc = "", |
| 73 | conf.level = 0.95, |
| 74 | as.alternatives = FALSE, |
| 75 | ... |
| 76 | ) |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 77 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 78 | buildWebUIRequestUrl( |
| 79 | kco, |
| 80 | query = if (missing(KorAPUrl)) |
| 81 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = |
| 82 | FALSE) else httr::parse_url(KorAPUrl)$query$q, |
| 83 | vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, |
| 84 | KorAPUrl, |
| 85 | metadataOnly = TRUE, |
| 86 | ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql, |
| 87 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 88 | "textClass", "snippet"), |
| 89 | accessRewriteFatal = TRUE |
| 90 | ) |
| 91 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 92 | \method{format}{KorAPQuery}(x, ...) |
| 93 | |
| 94 | \S4method{show}{KorAPQuery}(object) |
| 95 | } |
| 96 | \arguments{ |
| 97 | \item{.Object}{…} |
| 98 | |
Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 99 | \item{korapConnection}{KorAPConnection object} |
| 100 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 101 | \item{request}{query part of the request URL} |
| 102 | |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 103 | \item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 104 | |
| 105 | \item{totalResults}{number of hits the query has yielded} |
| 106 | |
| 107 | \item{nextStartIndex}{at what index to start the next fetch of query results} |
| 108 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 109 | \item{fields}{(meta)data fields that will be fetched for every match.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 110 | |
| 111 | \item{requestUrl}{complete URL of the API request} |
| 112 | |
| 113 | \item{webUIRequestUrl}{URL of a web frontend request corresponding to the API request} |
| 114 | |
| 115 | \item{apiResponse}{data-frame representation of the JSON response of the API request} |
| 116 | |
Marc Kupietz | 7776dec | 2019-09-27 16:59:02 +0200 | [diff] [blame] | 117 | \item{hasMoreMatches}{logical that signals if more query results can be fetched} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 118 | |
| 119 | \item{collectedMatches}{matches already fetched from the KorAP-API-server} |
| 120 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 121 | \item{kco}{\code{\link[=KorAPConnection]{KorAPConnection()}} object (obtained e.g. from \code{new("KorAPConnection")}} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 122 | |
| 123 | \item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.} |
| 124 | |
| 125 | \item{KorAPUrl}{instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.} |
| 126 | |
Marc Kupietz | 132f005 | 2023-04-16 14:23:05 +0200 | [diff] [blame] | 127 | \item{metadataOnly}{logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE. |
| 128 | If you want your corpus queries to return not only metadata, but also KWICS, you need to authorize |
| 129 | your RKorAPClient application as explained in the |
| 130 | \href{https://github.com/KorAP/RKorAPClient#authorization}{authorization section} |
| 131 | of the RKorAPClient Readme on GitHub and set the \code{metadataOnly} parameter to |
| 132 | \code{FALSE}.} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 133 | |
| 134 | \item{ql}{string to choose the query language (see \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters}{section on Query Parameters} in the Kustvakt-Wiki for possible values.} |
| 135 | |
| 136 | \item{accessRewriteFatal}{abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).} |
| 137 | |
| 138 | \item{verbose}{print progress information if true} |
| 139 | |
| 140 | \item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations} |
| 141 | |
| 142 | \item{as.df}{return result as data frame instead of as S4 object?} |
| 143 | |
Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame^] | 144 | \item{context}{string that specifies the size of the left and the right context returned in \code{snippet} |
| 145 | (provided that \code{metadataOnly} is set to \code{false} and that the necessary access right are met). |
| 146 | The format of the context size specifcation (e.g. \verb{3-token,3-token}) is described in the \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET}{Service: Search GET documentation of the Kustvakt Wiki}. |
| 147 | If the parameter is not set, the default context size secification of the KorAP server instance will be used. |
| 148 | Note that you cannot overrule the maximum context size set in the KorAP server instance, |
| 149 | as this is typically legally motivated.} |
| 150 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 151 | \item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}}} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 152 | |
| 153 | \item{offset}{start offset for query results to fetch} |
| 154 | |
| 155 | \item{maxFetch}{maximum number of query results to fetch} |
| 156 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 157 | \item{randomizePageOrder}{fetch result pages in pseudo random order if true. Use \code{\link[=set.seed]{set.seed()}} to set seed for reproducible results.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 158 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 159 | \item{...}{further arguments passed to or from other methods} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 160 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 161 | \item{conf.level}{confidence level of the returned confidence interval (passed through \code{\link[=ci]{ci()}} to \code{\link[=prop.test]{prop.test()}}).} |
Marc Kupietz | 0c29cea | 2019-10-09 08:44:36 +0200 | [diff] [blame] | 162 | |
Marc Kupietz | 71d6e05 | 2019-11-22 18:42:10 +0100 | [diff] [blame] | 163 | \item{as.alternatives}{LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.} |
| 164 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 165 | \item{x}{KorAPQuery object} |
| 166 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 167 | \item{object}{KorAPQuery object} |
| 168 | } |
| 169 | \value{ |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 170 | Depending on the \code{as.df} parameter, a table or a \code{\link[=KorAPQuery]{KorAPQuery()}} object that, among other information, contains the total number of results in \verb{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link[=fetchAll]{fetchAll()}}) or the next page of results (with \code{\link[=fetchNext]{fetchNext()}}). |
| 171 | A corresponding URL to be used within a web browser is contained in \verb{@webUIRequestUrl} |
| 172 | Please make sure to check \verb{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed. |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 173 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 174 | The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 175 | } |
| 176 | \description{ |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 177 | This class provides methods to perform different kinds of queries on the KorAP API server. |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 178 | \code{KorAPQuery} objects, which are typically created by the \code{\link[=corpusQuery]{corpusQuery()}} method, |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 179 | represent the current state of a query to a KorAP server. |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 180 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 181 | \strong{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 182 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 183 | \strong{\code{fetchNext}} fetches the next bunch of results of a KorAP query. |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 184 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 185 | \strong{\code{fetchAll}} fetches all results of a KorAP query. |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 186 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 187 | \strong{\code{frequencyQuery}} combines \code{\link[=corpusQuery]{corpusQuery()}}, \code{\link[=corpusStats]{corpusStats()}} and |
| 188 | \code{\link[=ci]{ci()}} to compute a table with the relative frequencies and |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 189 | confidence intervals of one ore multiple search terms across one or multiple |
| 190 | virtual corpora. |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 191 | } |
| 192 | \examples{ |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 193 | \dontrun{ |
| 194 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 195 | # Fetch metadata of every query hit for "Ameisenplage" and show a summary |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 196 | new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll() |
| 197 | } |
| 198 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 199 | \dontrun{ |
| 200 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 201 | # Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus |
| 202 | # and show the number of query hits (but don't fetch them). |
| 203 | |
| 204 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 205 | corpusQuery(KorAPUrl = |
| 206 | "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp") |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 207 | } |
| 208 | |
| 209 | \dontrun{ |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 210 | |
| 211 | # Plot the time/frequency curve of "Ameisenplage" |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 212 | new("KorAPConnection", verbose=TRUE) \%>\% |
| 213 | { . ->> kco } \%>\% |
| 214 | corpusQuery("Ameisenplage") \%>\% |
| 215 | fetchAll() \%>\% |
| 216 | slot("collectedMatches") \%>\% |
| 217 | mutate(year = lubridate::year(pubDate)) \%>\% |
| 218 | dplyr::select(year) \%>\% |
| 219 | group_by(year) \%>\% |
| 220 | summarise(Count = dplyr::n()) \%>\% |
| 221 | mutate(Freq = mapply(function(f, y) |
| 222 | f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) \%>\% |
| 223 | dplyr::select(-Count) \%>\% |
| 224 | complete(year = min(year):max(year), fill = list(Freq = 0)) \%>\% |
| 225 | plot(type = "l") |
| 226 | } |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 227 | \dontrun{ |
| 228 | |
| 229 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchNext() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 230 | q@collectedMatches |
Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 231 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 232 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 233 | \dontrun{ |
| 234 | |
Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 235 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 236 | q@collectedMatches |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 237 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 238 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 239 | \dontrun{ |
| 240 | |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 241 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchRest() |
| 242 | q@collectedMatches |
| 243 | } |
| 244 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 245 | \dontrun{ |
| 246 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 247 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 248 | frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003)) |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 249 | } |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 250 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 251 | } |
| 252 | \references{ |
| 253 | \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 254 | |
| 255 | \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026} |
| 256 | } |
| 257 | \seealso{ |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 258 | \code{\link[=KorAPConnection]{KorAPConnection()}}, \code{\link[=fetchNext]{fetchNext()}}, \code{\link[=fetchRest]{fetchRest()}}, \code{\link[=fetchAll]{fetchAll()}}, \code{\link[=corpusStats]{corpusStats()}} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 259 | } |