Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 1 | % Generated by roxygen2: do not edit by hand |
| 2 | % Please edit documentation in R/KorAPQuery.R |
| 3 | \docType{class} |
| 4 | \name{KorAPQuery-class} |
| 5 | \alias{KorAPQuery-class} |
| 6 | \alias{KorAPQuery} |
| 7 | \alias{initialize,KorAPQuery-method} |
| 8 | \alias{fetchNext,KorAPQuery-method} |
| 9 | \alias{fetchNext} |
| 10 | \alias{fetchAll,KorAPQuery-method} |
| 11 | \alias{fetchAll} |
| 12 | \alias{fetchRest,KorAPQuery-method} |
| 13 | \alias{fetchRest} |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 14 | \alias{frequencyQuery,KorAPConnection-method} |
| 15 | \alias{frequencyQuery} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 16 | \alias{format.KorAPQuery} |
| 17 | \alias{show,KorAPQuery-method} |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 18 | \alias{collocationScoreQuery,KorAPConnection-method} |
| 19 | \alias{collocationScoreQuery} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 20 | \title{Class KorAPQuery} |
| 21 | \usage{ |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 22 | \S4method{initialize}{KorAPQuery}( |
| 23 | .Object, |
| 24 | korapConnection = NULL, |
| 25 | request = NULL, |
| 26 | vc = "", |
| 27 | totalResults = 0, |
| 28 | nextStartIndex = 0, |
| 29 | fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability", |
| 30 | "textClass", "snippet"), |
| 31 | requestUrl = "", |
| 32 | webUIRequestUrl = "", |
| 33 | apiResponse = NULL, |
| 34 | hasMoreMatches = FALSE, |
| 35 | collectedMatches = NULL |
| 36 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 37 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 38 | \S4method{fetchNext}{KorAPQuery}( |
| 39 | kqo, |
| 40 | offset = kqo@nextStartIndex, |
| 41 | maxFetch = maxResultsPerPage, |
| 42 | verbose = kqo@korapConnection@verbose |
| 43 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 44 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 45 | \S4method{fetchAll}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 46 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 47 | \S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 48 | |
Marc Kupietz | 76685f5 | 2019-11-25 17:46:06 +0100 | [diff] [blame] | 49 | \S4method{frequencyQuery}{KorAPConnection}( |
| 50 | kco, |
| 51 | query, |
| 52 | vc = "", |
| 53 | conf.level = 0.95, |
| 54 | as.alternatives = FALSE, |
| 55 | ... |
| 56 | ) |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 57 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 58 | \method{format}{KorAPQuery}(x, ...) |
| 59 | |
| 60 | \S4method{show}{KorAPQuery}(object) |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 61 | |
| 62 | \S4method{collocationScoreQuery}{KorAPConnection}( |
| 63 | kco, |
| 64 | node, |
| 65 | collocate, |
| 66 | vc = "", |
| 67 | lemmatizeNodeQuery = FALSE, |
| 68 | lemmatizeCollocateQuery = FALSE, |
| 69 | leftContextSize = 5, |
| 70 | rightContextSize = 5, |
| 71 | scoreFunctions = defaultAssociationScoreFunctions(), |
| 72 | smoothingConstant = 0.5 |
| 73 | ) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 74 | } |
| 75 | \arguments{ |
| 76 | \item{.Object}{…} |
| 77 | |
Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 78 | \item{korapConnection}{KorAPConnection object} |
| 79 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 80 | \item{request}{query part of the request URL} |
| 81 | |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 82 | \item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 83 | |
| 84 | \item{totalResults}{number of hits the query has yielded} |
| 85 | |
| 86 | \item{nextStartIndex}{at what index to start the next fetch of query results} |
| 87 | |
| 88 | \item{fields}{what data / metadata fields should be collected} |
| 89 | |
| 90 | \item{requestUrl}{complete URL of the API request} |
| 91 | |
| 92 | \item{webUIRequestUrl}{URL of a web frontend request corresponding to the API request} |
| 93 | |
| 94 | \item{apiResponse}{data-frame representation of the JSON response of the API request} |
| 95 | |
Marc Kupietz | 7776dec | 2019-09-27 16:59:02 +0200 | [diff] [blame] | 96 | \item{hasMoreMatches}{logical that signals if more query results can be fetched} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 97 | |
| 98 | \item{collectedMatches}{matches already fetched from the KorAP-API-server} |
| 99 | |
| 100 | \item{kqo}{object obtained from \code{\link{corpusQuery}}} |
| 101 | |
| 102 | \item{offset}{start offset for query results to fetch} |
| 103 | |
| 104 | \item{maxFetch}{maximum number of query results to fetch} |
| 105 | |
| 106 | \item{verbose}{print progress information if true} |
| 107 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 108 | \item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}} |
| 109 | |
| 110 | \item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 111 | |
Marc Kupietz | 43a6ade | 2020-02-18 17:01:44 +0100 | [diff] [blame] | 112 | \item{conf.level}{confidence level of the returned confidence interval (passed through \code{\link{ci}} to \code{\link{prop.test}}).} |
Marc Kupietz | 0c29cea | 2019-10-09 08:44:36 +0200 | [diff] [blame] | 113 | |
Marc Kupietz | 71d6e05 | 2019-11-22 18:42:10 +0100 | [diff] [blame] | 114 | \item{as.alternatives}{LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.} |
| 115 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 116 | \item{...}{further arguments passed to or from other methods} |
| 117 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 118 | \item{x}{KorAPQuery object} |
| 119 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 120 | \item{object}{KorAPQuery object} |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 121 | |
| 122 | \item{node}{target word} |
| 123 | |
| 124 | \item{collocate}{collocate of target word} |
| 125 | |
| 126 | \item{lemmatizeNodeQuery}{logical, set to TRUE if node query should be lemmatized, i.e. x -> [tt/l=x]} |
| 127 | |
| 128 | \item{lemmatizeCollocateQuery}{logical, set to TRUE if collocate query should be lemmatized, i.e. x -> [tt/l=x]} |
| 129 | |
| 130 | \item{leftContextSize}{size of the left context window} |
| 131 | |
| 132 | \item{rightContextSize}{size of the right context window} |
| 133 | |
| 134 | \item{scoreFunctions}{named list of score functions of the form function(O1, O2, O, N, E, window_size), see e.g. \link{pmi}} |
| 135 | |
| 136 | \item{smoothingConstant}{smoothing constant will be added to all observed values} |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 137 | } |
| 138 | \value{ |
| 139 | The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches} |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 140 | |
| 141 | tibble with query KorAP web request URL, all observed values and association scores |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 142 | } |
| 143 | \description{ |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 144 | This class provides methods to perform different kinds of queries on the KorAP API server. |
| 145 | \code{KorAPQuery} objects, which are typically created by the \code{\link{corpusQuery}} method, |
| 146 | represent the current state of a query to a KorAP server. |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 147 | |
| 148 | \bold{\code{fetchNext}} fetches the next bunch of results of a KorAP query. |
| 149 | |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 150 | \bold{\code{fetchAll}} fetches allf results of a KorAP query. |
| 151 | |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 152 | \bold{\code{frequencyQuery}} combines \code{\link{corpusQuery}}, \code{\link{corpusStats}} and |
| 153 | \code{\link{ci}} to compute a table with the relative frequencies and |
| 154 | confidence intervals of one ore multiple search terms across one or multiple |
| 155 | virtual corpora. |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 156 | |
| 157 | \bold{\code{collocationScoreQuery}} computes various collocation association scores |
| 158 | based on \code{\link{frequencyQuery}}s for a target word and a collocate. |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 159 | } |
| 160 | \examples{ |
Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 161 | \donttest{q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchNext() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 162 | q@collectedMatches |
Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 163 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 164 | |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 165 | \donttest{ |
Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 166 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll() |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 167 | q@collectedMatches |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 168 | } |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 169 | |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 170 | \donttest{ |
| 171 | q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchRest() |
| 172 | q@collectedMatches |
| 173 | } |
| 174 | |
| 175 | \donttest{ |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 176 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 177 | frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003)) |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 178 | } |
Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 179 | |
Marc Kupietz | e203832 | 2021-03-04 18:24:02 +0100 | [diff] [blame] | 180 | \donttest{ |
| 181 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 182 | collocationScoreQuery("Grund", "triftiger") |
| 183 | } |
| 184 | |
| 185 | \donttest{ |
| 186 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 187 | collocationScoreQuery("Grund", c("guter", "triftiger"), |
| 188 | scoreFunctions = list(localMI = function(O1, O2, O, N, E, window_size) { O * log2(O/E) }) ) |
| 189 | } |
| 190 | |
| 191 | \donttest{ |
| 192 | library(highcharter) |
| 193 | library(tidyr) |
| 194 | new("KorAPConnection", verbose = TRUE) \%>\% |
| 195 | collocationScoreQuery("Team", "agil", vc = paste("pubDate in", c(2014:2018)), |
| 196 | lemmatizeNodeQuery = TRUE, lemmatizeCollocateQuery = TRUE) \%>\% |
| 197 | pivot_longer(14:last_col(), names_to = "measure", values_to = "score") \%>\% |
| 198 | hchart(type="spline", hcaes(label, score, group=measure)) \%>\% |
| 199 | hc_add_onclick_korap_search() |
| 200 | } |
| 201 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 202 | } |
| 203 | \references{ |
| 204 | \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026} |
| 205 | } |