blob: 8984967b9f094a5ccaed0f03fcc5a1a5c6527df0 [file] [log] [blame]
Marc Kupietz6dfeed92025-06-03 11:58:06 +02001#' @include logging.R
2setGeneric("textMetadata", function(kco, ...) standardGeneric("textMetadata"))
Marc Kupietz6ddece42023-12-18 17:02:36 +01003
4#' Retrieve metadata for a text, identified by its sigle (id)
5#'
6#' @aliases textMetadata
7#'
8#' @description
9#' Retrieves metadata for a text, identified by its sigle (id) using the corresponding KorAP API
10#' (see [Kustvakt Wiki](https://github.com/KorAP/Kustvakt/wiki/Service:-Metadata-Retrieval)).
11#'
12#'
Marc Kupietz617266d2025-02-27 10:43:07 +010013#' @param kco [KorAPConnection()] object (obtained e.g. from `KorAPConnection()`)
Marc Kupietz6ddece42023-12-18 17:02:36 +010014#' @param textSigle unique text id (concatenation of corpus, document and text ids, separated by `/`, e.g. ) or vector thereof
15#' @param verbose logical. If `TRUE`, additional diagnostics are printed. Defaults to `kco@verbose`.
16#'
Marc Kupietz3687a8c2024-01-24 16:18:36 +010017#' @return Tibble with columns for each metadata property. In case of errors, such as non-existing texts/sigles, the tibble will also contain a column called `errors`.
18#' If there are metadata columns you cannot make sense of, please ignore them. The function simply returns all the metadata it gets from the server.
Marc Kupietz6ddece42023-12-18 17:02:36 +010019#'
20#' @importFrom urltools url_encode
Marc Kupietza7a8f1b2024-12-18 15:56:19 +010021#' @importFrom dplyr across bind_rows relocate mutate where
Marc Kupietz2a8ab822024-06-26 21:13:37 +020022#' @importFrom tibble as_tibble
23#' @importFrom tidyr pivot_wider
Marc Kupietz6ddece42023-12-18 17:02:36 +010024#'
25#' @examples
26#' \dontrun{
Marc Kupietz617266d2025-02-27 10:43:07 +010027#' KorAPConnection() %>% textMetadata(c("WUD17/A97/08542", "WUD17/B96/57558", "WUD17/A97/08541"))
Marc Kupietz6ddece42023-12-18 17:02:36 +010028#' }
29#'
30#' @export
Marc Kupietz6dfeed92025-06-03 11:58:06 +020031setMethod(
32 "textMetadata", "KorAPConnection",
33 function(kco, textSigle, verbose = kco@verbose) {
34 # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
35 key <- 0
36 if (length(textSigle) > 1) {
37 do.call(bind_rows, Map(function(atomicSigle) {
38 textMetadata(kco, atomicSigle)
39 }, textSigle))
Marc Kupietz6ddece42023-12-18 17:02:36 +010040 } else {
Marc Kupietz6dfeed92025-06-03 11:58:06 +020041 url <-
42 paste0(
43 kco@apiUrl, "corpus/",
44 URLencode(enc2utf8(textSigle), reserved = TRUE)
45 )
46 log_info(verbose, "Getting metadata for ", textSigle, sep = "")
47 res <- apiCall(kco, url)
48 log_info(verbose, ifelse(is.null(res) || "errors" %in% names(res), " [error]\n", "\n"))
49
50 if (is.null(res)) {
51 res <- tibble(errors = "API request failed")
Marc Kupietz2a8ab822024-06-26 21:13:37 +020052 } else {
Marc Kupietz6dfeed92025-06-03 11:58:06 +020053 if ("document" %in% names(res) & "fields" %in% names(res$document) && length(res$document$fields) > 0) {
54 res <- as_tibble(res$document$fields) %>%
55 dplyr::mutate(across(where(is.list), ~ purrr::map(.x, ~ if (length(.x) < 2) unlist(.x) else paste(.x, collapse = "\\t")))) %>%
56 select(key, value) %>%
57 tidyr::pivot_wider(names_from = key, values_from = value, names_repair = "unique") %>%
58 mutate(
59 textSigle = as.character(textSigle),
60 requestUrl = url,
61 webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))
62 ) %>%
63 mutate(across(everything(), as.character)) %>%
64 relocate(textSigle)
65 } else {
66 res <- lapply(res, function(x) paste0(x, collapse = "\\t")) # flatten list
67 res <- as_tibble(res) %>%
68 head(n = 1) %>%
69 mutate(
70 requestUrl = url,
71 textSigle = textSigle,
72 webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))
73 ) %>%
74 relocate(textSigle)
75 }
Marc Kupietz2a8ab822024-06-26 21:13:37 +020076 }
Marc Kupietz6dfeed92025-06-03 11:58:06 +020077 res
Marc Kupietz6ddece42023-12-18 17:02:36 +010078 }
Marc Kupietz6ddece42023-12-18 17:02:36 +010079 }
Marc Kupietz6dfeed92025-06-03 11:58:06 +020080)