Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 1 | % Generated by roxygen2: do not edit by hand |
| 2 | % Please edit documentation in R/KorAPQuery.R |
| 3 | \docType{class} |
| 4 | \name{KorAPQuery-class} |
| 5 | \alias{KorAPQuery-class} |
| 6 | \alias{KorAPQuery} |
| 7 | \alias{initialize,KorAPQuery-method} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 8 | \alias{corpusQuery,KorAPConnection-method} |
| 9 | \alias{corpusQuery} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 10 | \alias{fetchNext,KorAPQuery-method} |
| 11 | \alias{fetchNext} |
| 12 | \alias{fetchAll,KorAPQuery-method} |
| 13 | \alias{fetchAll} |
| 14 | \alias{fetchRest,KorAPQuery-method} |
| 15 | \alias{fetchRest} |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 16 | \alias{frequencyQuery,KorAPConnection-method} |
| 17 | \alias{frequencyQuery} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 18 | \alias{buildWebUIRequestUrl} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 19 | \alias{format.KorAPQuery} |
| 20 | \alias{show,KorAPQuery-method} |
| 21 | \title{Class KorAPQuery} |
| 22 | \usage{ |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 23 | \S4method{initialize}{KorAPQuery}( |
| 24 | .Object, |
| 25 | korapConnection = NULL, |
| 26 | request = NULL, |
| 27 | vc = "", |
| 28 | totalResults = 0, |
| 29 | nextStartIndex = 0, |
| 30 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 31 | "textClass", "snippet"), |
| 32 | requestUrl = "", |
| 33 | webUIRequestUrl = "", |
| 34 | apiResponse = NULL, |
| 35 | hasMoreMatches = FALSE, |
| 36 | collectedMatches = NULL |
| 37 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 38 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 39 | \S4method{corpusQuery}{KorAPConnection}( |
| 40 | kco, |
| 41 | query = if (missing(KorAPUrl)) |
| 42 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = |
| 43 | FALSE) else httr::parse_url(KorAPUrl)$query$q, |
| 44 | vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, |
| 45 | KorAPUrl, |
| 46 | metadataOnly = TRUE, |
| 47 | ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql, |
| 48 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 49 | "textClass", "snippet"), |
| 50 | accessRewriteFatal = TRUE, |
| 51 | verbose = kco@verbose, |
| 52 | expand = length(vc) != length(query), |
| 53 | as.df = FALSE |
| 54 | ) |
| 55 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 56 | \S4method{fetchNext}{KorAPQuery}( |
| 57 | kqo, |
| 58 | offset = kqo@nextStartIndex, |
| 59 | maxFetch = maxResultsPerPage, |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 60 | verbose = kqo@korapConnection@verbose, |
| 61 | randomizePageOrder = FALSE |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 62 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 63 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 64 | \S4method{fetchAll}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 65 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 66 | \S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 67 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 68 | \S4method{frequencyQuery}{KorAPConnection}( |
| 69 | kco, |
| 70 | query, |
| 71 | vc = "", |
| 72 | conf.level = 0.95, |
| 73 | as.alternatives = FALSE, |
| 74 | ... |
| 75 | ) |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 76 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 77 | buildWebUIRequestUrl( |
| 78 | kco, |
| 79 | query = if (missing(KorAPUrl)) |
| 80 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = |
| 81 | FALSE) else httr::parse_url(KorAPUrl)$query$q, |
| 82 | vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, |
| 83 | KorAPUrl, |
| 84 | metadataOnly = TRUE, |
| 85 | ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql, |
| 86 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 87 | "textClass", "snippet"), |
| 88 | accessRewriteFatal = TRUE |
| 89 | ) |
| 90 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 91 | \method{format}{KorAPQuery}(x, ...) |
| 92 | |
| 93 | \S4method{show}{KorAPQuery}(object) |
| 94 | } |
| 95 | \arguments{ |
| 96 | \item{.Object}{…} |
| 97 | |
Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 98 | \item{korapConnection}{KorAPConnection object} |
| 99 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 100 | \item{request}{query part of the request URL} |
| 101 | |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 102 | \item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 103 | |
| 104 | \item{totalResults}{number of hits the query has yielded} |
| 105 | |
| 106 | \item{nextStartIndex}{at what index to start the next fetch of query results} |
| 107 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 108 | \item{fields}{(meta)data fields that will be fetched for every match.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 109 | |
| 110 | \item{requestUrl}{complete URL of the API request} |
| 111 | |
| 112 | \item{webUIRequestUrl}{URL of a web frontend request corresponding to the API request} |
| 113 | |
| 114 | \item{apiResponse}{data-frame representation of the JSON response of the API request} |
| 115 | |
Marc Kupietz | 7776dec | 2019-09-27 16:59:02 +0200 | [diff] [blame] | 116 | \item{hasMoreMatches}{logical that signals if more query results can be fetched} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 117 | |
| 118 | \item{collectedMatches}{matches already fetched from the KorAP-API-server} |
| 119 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 120 | \item{kco}{\code{\link[=KorAPConnection]{KorAPConnection()}} object (obtained e.g. from \code{new("KorAPConnection")}} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 121 | |
| 122 | \item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.} |
| 123 | |
| 124 | \item{KorAPUrl}{instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.} |
| 125 | |
Marc Kupietz | 132f005 | 2023-04-16 14:23:05 +0200 | [diff] [blame^] | 126 | \item{metadataOnly}{logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE. |
| 127 | If you want your corpus queries to return not only metadata, but also KWICS, you need to authorize |
| 128 | your RKorAPClient application as explained in the |
| 129 | \href{https://github.com/KorAP/RKorAPClient#authorization}{authorization section} |
| 130 | of the RKorAPClient Readme on GitHub and set the \code{metadataOnly} parameter to |
| 131 | \code{FALSE}.} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 132 | |
| 133 | \item{ql}{string to choose the query language (see \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters}{section on Query Parameters} in the Kustvakt-Wiki for possible values.} |
| 134 | |
| 135 | \item{accessRewriteFatal}{abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).} |
| 136 | |
| 137 | \item{verbose}{print progress information if true} |
| 138 | |
| 139 | \item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations} |
| 140 | |
| 141 | \item{as.df}{return result as data frame instead of as S4 object?} |
| 142 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 143 | \item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}}} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 144 | |
| 145 | \item{offset}{start offset for query results to fetch} |
| 146 | |
| 147 | \item{maxFetch}{maximum number of query results to fetch} |
| 148 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 149 | \item{randomizePageOrder}{fetch result pages in pseudo random order if true. Use \code{\link[=set.seed]{set.seed()}} to set seed for reproducible results.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 150 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 151 | \item{...}{further arguments passed to or from other methods} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 152 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 153 | \item{conf.level}{confidence level of the returned confidence interval (passed through \code{\link[=ci]{ci()}} to \code{\link[=prop.test]{prop.test()}}).} |
Marc Kupietz | 0c29cea | 2019-10-09 08:44:36 +0200 | [diff] [blame] | 154 | |
Marc Kupietz | 71d6e05 | 2019-11-22 18:42:10 +0100 | [diff] [blame] | 155 | \item{as.alternatives}{LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.} |
| 156 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 157 | \item{x}{KorAPQuery object} |
| 158 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 159 | \item{object}{KorAPQuery object} |
| 160 | } |
| 161 | \value{ |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 162 | Depending on the \code{as.df} parameter, a table or a \code{\link[=KorAPQuery]{KorAPQuery()}} object that, among other information, contains the total number of results in \verb{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link[=fetchAll]{fetchAll()}}) or the next page of results (with \code{\link[=fetchNext]{fetchNext()}}). |
| 163 | A corresponding URL to be used within a web browser is contained in \verb{@webUIRequestUrl} |
| 164 | Please make sure to check \verb{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed. |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 165 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 166 | The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 167 | } |
| 168 | \description{ |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 169 | This class provides methods to perform different kinds of queries on the KorAP API server. |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 170 | \code{KorAPQuery} objects, which are typically created by the \code{\link[=corpusQuery]{corpusQuery()}} method, |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 171 | represent the current state of a query to a KorAP server. |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 172 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 173 | \strong{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 174 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 175 | \strong{\code{fetchNext}} fetches the next bunch of results of a KorAP query. |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 176 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 177 | \strong{\code{fetchAll}} fetches all results of a KorAP query. |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 178 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 179 | \strong{\code{frequencyQuery}} combines \code{\link[=corpusQuery]{corpusQuery()}}, \code{\link[=corpusStats]{corpusStats()}} and |
| 180 | \code{\link[=ci]{ci()}} to compute a table with the relative frequencies and |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 181 | confidence intervals of one ore multiple search terms across one or multiple |
| 182 | virtual corpora. |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 183 | } |
| 184 | \examples{ |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 185 | \dontrun{ |
| 186 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 187 | # Fetch metadata of every query hit for "Ameisenplage" and show a summary |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 188 | new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll() |
| 189 | } |
| 190 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 191 | \dontrun{ |
| 192 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 193 | # Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus |
| 194 | # and show the number of query hits (but don't fetch them). |
| 195 | |
| 196 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 197 | corpusQuery(KorAPUrl = |
| 198 | "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp") |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 199 | } |
| 200 | |
| 201 | \dontrun{ |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 202 | |
| 203 | # Plot the time/frequency curve of "Ameisenplage" |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 204 | new("KorAPConnection", verbose=TRUE) \%>\% |
| 205 | { . ->> kco } \%>\% |
| 206 | corpusQuery("Ameisenplage") \%>\% |
| 207 | fetchAll() \%>\% |
| 208 | slot("collectedMatches") \%>\% |
| 209 | mutate(year = lubridate::year(pubDate)) \%>\% |
| 210 | dplyr::select(year) \%>\% |
| 211 | group_by(year) \%>\% |
| 212 | summarise(Count = dplyr::n()) \%>\% |
| 213 | mutate(Freq = mapply(function(f, y) |
| 214 | f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) \%>\% |
| 215 | dplyr::select(-Count) \%>\% |
| 216 | complete(year = min(year):max(year), fill = list(Freq = 0)) \%>\% |
| 217 | plot(type = "l") |
| 218 | } |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 219 | \dontrun{ |
| 220 | |
| 221 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchNext() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 222 | q@collectedMatches |
Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 223 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 224 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 225 | \dontrun{ |
| 226 | |
Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 227 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 228 | q@collectedMatches |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 229 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 230 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 231 | \dontrun{ |
| 232 | |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 233 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchRest() |
| 234 | q@collectedMatches |
| 235 | } |
| 236 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 237 | \dontrun{ |
| 238 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 239 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 240 | frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003)) |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 241 | } |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 242 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 243 | } |
| 244 | \references{ |
| 245 | \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 246 | |
| 247 | \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026} |
| 248 | } |
| 249 | \seealso{ |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 250 | \code{\link[=KorAPConnection]{KorAPConnection()}}, \code{\link[=fetchNext]{fetchNext()}}, \code{\link[=fetchRest]{fetchRest()}}, \code{\link[=fetchAll]{fetchAll()}}, \code{\link[=corpusStats]{corpusStats()}} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 251 | } |