Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 1 | % Generated by roxygen2: do not edit by hand |
| 2 | % Please edit documentation in R/KorAPQuery.R |
| 3 | \docType{class} |
| 4 | \name{KorAPQuery-class} |
| 5 | \alias{KorAPQuery-class} |
| 6 | \alias{KorAPQuery} |
| 7 | \alias{initialize,KorAPQuery-method} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 8 | \alias{corpusQuery,KorAPConnection-method} |
| 9 | \alias{corpusQuery} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 10 | \alias{fetchNext,KorAPQuery-method} |
| 11 | \alias{fetchNext} |
| 12 | \alias{fetchAll,KorAPQuery-method} |
| 13 | \alias{fetchAll} |
| 14 | \alias{fetchRest,KorAPQuery-method} |
| 15 | \alias{fetchRest} |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 16 | \alias{frequencyQuery,KorAPConnection-method} |
| 17 | \alias{frequencyQuery} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 18 | \alias{buildWebUIRequestUrl} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 19 | \alias{format.KorAPQuery} |
| 20 | \alias{show,KorAPQuery-method} |
| 21 | \title{Class KorAPQuery} |
| 22 | \usage{ |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 23 | \S4method{initialize}{KorAPQuery}( |
| 24 | .Object, |
| 25 | korapConnection = NULL, |
| 26 | request = NULL, |
| 27 | vc = "", |
| 28 | totalResults = 0, |
| 29 | nextStartIndex = 0, |
| 30 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 31 | "textClass", "snippet"), |
| 32 | requestUrl = "", |
| 33 | webUIRequestUrl = "", |
| 34 | apiResponse = NULL, |
| 35 | hasMoreMatches = FALSE, |
| 36 | collectedMatches = NULL |
| 37 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 38 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 39 | \S4method{corpusQuery}{KorAPConnection}( |
| 40 | kco, |
| 41 | query = if (missing(KorAPUrl)) |
| 42 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = |
| 43 | FALSE) else httr::parse_url(KorAPUrl)$query$q, |
| 44 | vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, |
| 45 | KorAPUrl, |
| 46 | metadataOnly = TRUE, |
| 47 | ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql, |
| 48 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 49 | "textClass", "snippet"), |
| 50 | accessRewriteFatal = TRUE, |
| 51 | verbose = kco@verbose, |
| 52 | expand = length(vc) != length(query), |
| 53 | as.df = FALSE |
| 54 | ) |
| 55 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 56 | \S4method{fetchNext}{KorAPQuery}( |
| 57 | kqo, |
| 58 | offset = kqo@nextStartIndex, |
| 59 | maxFetch = maxResultsPerPage, |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 60 | verbose = kqo@korapConnection@verbose, |
| 61 | randomizePageOrder = FALSE |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 62 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 63 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 64 | \S4method{fetchAll}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 65 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 66 | \S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 67 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 68 | \S4method{frequencyQuery}{KorAPConnection}( |
| 69 | kco, |
| 70 | query, |
| 71 | vc = "", |
| 72 | conf.level = 0.95, |
| 73 | as.alternatives = FALSE, |
| 74 | ... |
| 75 | ) |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 76 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 77 | buildWebUIRequestUrl( |
| 78 | kco, |
| 79 | query = if (missing(KorAPUrl)) |
| 80 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = |
| 81 | FALSE) else httr::parse_url(KorAPUrl)$query$q, |
| 82 | vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, |
| 83 | KorAPUrl, |
| 84 | metadataOnly = TRUE, |
| 85 | ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql, |
| 86 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 87 | "textClass", "snippet"), |
| 88 | accessRewriteFatal = TRUE |
| 89 | ) |
| 90 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 91 | \method{format}{KorAPQuery}(x, ...) |
| 92 | |
| 93 | \S4method{show}{KorAPQuery}(object) |
| 94 | } |
| 95 | \arguments{ |
| 96 | \item{.Object}{…} |
| 97 | |
Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 98 | \item{korapConnection}{KorAPConnection object} |
| 99 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 100 | \item{request}{query part of the request URL} |
| 101 | |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 102 | \item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 103 | |
| 104 | \item{totalResults}{number of hits the query has yielded} |
| 105 | |
| 106 | \item{nextStartIndex}{at what index to start the next fetch of query results} |
| 107 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 108 | \item{fields}{(meta)data fields that will be fetched for every match.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 109 | |
| 110 | \item{requestUrl}{complete URL of the API request} |
| 111 | |
| 112 | \item{webUIRequestUrl}{URL of a web frontend request corresponding to the API request} |
| 113 | |
| 114 | \item{apiResponse}{data-frame representation of the JSON response of the API request} |
| 115 | |
Marc Kupietz | 7776dec | 2019-09-27 16:59:02 +0200 | [diff] [blame] | 116 | \item{hasMoreMatches}{logical that signals if more query results can be fetched} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 117 | |
| 118 | \item{collectedMatches}{matches already fetched from the KorAP-API-server} |
| 119 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 120 | \item{kco}{\code{\link[=KorAPConnection]{KorAPConnection()}} object (obtained e.g. from \code{new("KorAPConnection")}} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 121 | |
| 122 | \item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.} |
| 123 | |
| 124 | \item{KorAPUrl}{instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.} |
| 125 | |
| 126 | \item{metadataOnly}{logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE, unless the connection is authorized (currently not possible).} |
| 127 | |
| 128 | \item{ql}{string to choose the query language (see \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters}{section on Query Parameters} in the Kustvakt-Wiki for possible values.} |
| 129 | |
| 130 | \item{accessRewriteFatal}{abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).} |
| 131 | |
| 132 | \item{verbose}{print progress information if true} |
| 133 | |
| 134 | \item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations} |
| 135 | |
| 136 | \item{as.df}{return result as data frame instead of as S4 object?} |
| 137 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 138 | \item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}}} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 139 | |
| 140 | \item{offset}{start offset for query results to fetch} |
| 141 | |
| 142 | \item{maxFetch}{maximum number of query results to fetch} |
| 143 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 144 | \item{randomizePageOrder}{fetch result pages in pseudo random order if true. Use \code{\link[=set.seed]{set.seed()}} to set seed for reproducible results.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 145 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 146 | \item{...}{further arguments passed to or from other methods} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 147 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 148 | \item{conf.level}{confidence level of the returned confidence interval (passed through \code{\link[=ci]{ci()}} to \code{\link[=prop.test]{prop.test()}}).} |
Marc Kupietz | 0c29cea | 2019-10-09 08:44:36 +0200 | [diff] [blame] | 149 | |
Marc Kupietz | 71d6e05 | 2019-11-22 18:42:10 +0100 | [diff] [blame] | 150 | \item{as.alternatives}{LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.} |
| 151 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 152 | \item{x}{KorAPQuery object} |
| 153 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 154 | \item{object}{KorAPQuery object} |
| 155 | } |
| 156 | \value{ |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 157 | Depending on the \code{as.df} parameter, a table or a \code{\link[=KorAPQuery]{KorAPQuery()}} object that, among other information, contains the total number of results in \verb{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link[=fetchAll]{fetchAll()}}) or the next page of results (with \code{\link[=fetchNext]{fetchNext()}}). |
| 158 | A corresponding URL to be used within a web browser is contained in \verb{@webUIRequestUrl} |
| 159 | Please make sure to check \verb{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed. |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 160 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 161 | The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 162 | } |
| 163 | \description{ |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 164 | This class provides methods to perform different kinds of queries on the KorAP API server. |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 165 | \code{KorAPQuery} objects, which are typically created by the \code{\link[=corpusQuery]{corpusQuery()}} method, |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 166 | represent the current state of a query to a KorAP server. |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 167 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 168 | \strong{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 169 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 170 | \strong{\code{fetchNext}} fetches the next bunch of results of a KorAP query. |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 171 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 172 | \strong{\code{fetchAll}} fetches all results of a KorAP query. |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 173 | |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 174 | \strong{\code{frequencyQuery}} combines \code{\link[=corpusQuery]{corpusQuery()}}, \code{\link[=corpusStats]{corpusStats()}} and |
| 175 | \code{\link[=ci]{ci()}} to compute a table with the relative frequencies and |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 176 | confidence intervals of one ore multiple search terms across one or multiple |
| 177 | virtual corpora. |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 178 | } |
| 179 | \examples{ |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame^] | 180 | \dontrun{ |
| 181 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 182 | # Fetch metadata of every query hit for "Ameisenplage" and show a summary |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 183 | new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll() |
| 184 | } |
| 185 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame^] | 186 | \dontrun{ |
| 187 | |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 188 | # Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus |
| 189 | # and show the number of query hits (but don't fetch them). |
| 190 | |
| 191 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 192 | corpusQuery(KorAPUrl = |
| 193 | "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp") |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame^] | 194 | } |
| 195 | |
| 196 | \dontrun{ |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 197 | |
| 198 | # Plot the time/frequency curve of "Ameisenplage" |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 199 | new("KorAPConnection", verbose=TRUE) \%>\% |
| 200 | { . ->> kco } \%>\% |
| 201 | corpusQuery("Ameisenplage") \%>\% |
| 202 | fetchAll() \%>\% |
| 203 | slot("collectedMatches") \%>\% |
| 204 | mutate(year = lubridate::year(pubDate)) \%>\% |
| 205 | dplyr::select(year) \%>\% |
| 206 | group_by(year) \%>\% |
| 207 | summarise(Count = dplyr::n()) \%>\% |
| 208 | mutate(Freq = mapply(function(f, y) |
| 209 | f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) \%>\% |
| 210 | dplyr::select(-Count) \%>\% |
| 211 | complete(year = min(year):max(year), fill = list(Freq = 0)) \%>\% |
| 212 | plot(type = "l") |
| 213 | } |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame^] | 214 | \dontrun{ |
| 215 | |
| 216 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchNext() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 217 | q@collectedMatches |
Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 218 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 219 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame^] | 220 | \dontrun{ |
| 221 | |
Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 222 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 223 | q@collectedMatches |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 224 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 225 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame^] | 226 | \dontrun{ |
| 227 | |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 228 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchRest() |
| 229 | q@collectedMatches |
| 230 | } |
| 231 | |
Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame^] | 232 | \dontrun{ |
| 233 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 234 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 235 | frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003)) |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 236 | } |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 237 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 238 | } |
| 239 | \references{ |
| 240 | \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026} |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 241 | |
| 242 | \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026} |
| 243 | } |
| 244 | \seealso{ |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 245 | \code{\link[=KorAPConnection]{KorAPConnection()}}, \code{\link[=fetchNext]{fetchNext()}}, \code{\link[=fetchRest]{fetchRest()}}, \code{\link[=fetchAll]{fetchAll()}}, \code{\link[=corpusStats]{corpusStats()}} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 246 | } |