blob: a0b115af78646cdc94c26a4a43014f9f49b999e3 [file] [log] [blame]
Marc Kupietz6dfeed92025-06-03 11:58:06 +02001#' @include logging.R
2setGeneric("textMetadata", function(kco, ...) standardGeneric("textMetadata"))
Marc Kupietz6ddece42023-12-18 17:02:36 +01003
4#' Retrieve metadata for a text, identified by its sigle (id)
5#'
6#' @aliases textMetadata
7#'
8#' @description
9#' Retrieves metadata for a text, identified by its sigle (id) using the corresponding KorAP API
10#' (see [Kustvakt Wiki](https://github.com/KorAP/Kustvakt/wiki/Service:-Metadata-Retrieval)).
Marc Kupietza267c422025-06-26 10:16:20 +020011#' To retrieve the metadata for every text in a virtual corpus, use [corpusQuery()]
12#' with `<base/s=t>` as query, instead.
Marc Kupietz6ddece42023-12-18 17:02:36 +010013#'
14#'
Marc Kupietz617266d2025-02-27 10:43:07 +010015#' @param kco [KorAPConnection()] object (obtained e.g. from `KorAPConnection()`)
Marc Kupietz6ddece42023-12-18 17:02:36 +010016#' @param textSigle unique text id (concatenation of corpus, document and text ids, separated by `/`, e.g. ) or vector thereof
17#' @param verbose logical. If `TRUE`, additional diagnostics are printed. Defaults to `kco@verbose`.
18#'
Marc Kupietz3687a8c2024-01-24 16:18:36 +010019#' @return Tibble with columns for each metadata property. In case of errors, such as non-existing texts/sigles, the tibble will also contain a column called `errors`.
20#' If there are metadata columns you cannot make sense of, please ignore them. The function simply returns all the metadata it gets from the server.
Marc Kupietz6ddece42023-12-18 17:02:36 +010021#'
22#' @importFrom urltools url_encode
Marc Kupietza7a8f1b2024-12-18 15:56:19 +010023#' @importFrom dplyr across bind_rows relocate mutate where
Marc Kupietz2a8ab822024-06-26 21:13:37 +020024#' @importFrom tibble as_tibble
25#' @importFrom tidyr pivot_wider
Marc Kupietz6ddece42023-12-18 17:02:36 +010026#'
27#' @examples
28#' \dontrun{
Marc Kupietza267c422025-06-26 10:16:20 +020029#' KorAPConnection() |> textMetadata(c("WUD17/A97/08542", "WUD17/B96/57558", "WUD17/A97/08541"))
Marc Kupietz6ddece42023-12-18 17:02:36 +010030#' }
31#'
32#' @export
Marc Kupietz6dfeed92025-06-03 11:58:06 +020033setMethod(
34 "textMetadata", "KorAPConnection",
35 function(kco, textSigle, verbose = kco@verbose) {
36 # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
37 key <- 0
38 if (length(textSigle) > 1) {
39 do.call(bind_rows, Map(function(atomicSigle) {
40 textMetadata(kco, atomicSigle)
41 }, textSigle))
Marc Kupietz6ddece42023-12-18 17:02:36 +010042 } else {
Marc Kupietz6dfeed92025-06-03 11:58:06 +020043 url <-
44 paste0(
45 kco@apiUrl, "corpus/",
46 URLencode(enc2utf8(textSigle), reserved = TRUE)
47 )
48 log_info(verbose, "Getting metadata for ", textSigle, sep = "")
49 res <- apiCall(kco, url)
50 log_info(verbose, ifelse(is.null(res) || "errors" %in% names(res), " [error]\n", "\n"))
51
52 if (is.null(res)) {
53 res <- tibble(errors = "API request failed")
Marc Kupietz2a8ab822024-06-26 21:13:37 +020054 } else {
Marc Kupietz6dfeed92025-06-03 11:58:06 +020055 if ("document" %in% names(res) & "fields" %in% names(res$document) && length(res$document$fields) > 0) {
56 res <- as_tibble(res$document$fields) %>%
57 dplyr::mutate(across(where(is.list), ~ purrr::map(.x, ~ if (length(.x) < 2) unlist(.x) else paste(.x, collapse = "\\t")))) %>%
58 select(key, value) %>%
59 tidyr::pivot_wider(names_from = key, values_from = value, names_repair = "unique") %>%
60 mutate(
61 textSigle = as.character(textSigle),
62 requestUrl = url,
63 webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))
64 ) %>%
65 mutate(across(everything(), as.character)) %>%
66 relocate(textSigle)
67 } else {
68 res <- lapply(res, function(x) paste0(x, collapse = "\\t")) # flatten list
69 res <- as_tibble(res) %>%
70 head(n = 1) %>%
71 mutate(
72 requestUrl = url,
73 textSigle = textSigle,
74 webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))
75 ) %>%
76 relocate(textSigle)
77 }
Marc Kupietz2a8ab822024-06-26 21:13:37 +020078 }
Marc Kupietz6dfeed92025-06-03 11:58:06 +020079 res
Marc Kupietz6ddece42023-12-18 17:02:36 +010080 }
Marc Kupietz6ddece42023-12-18 17:02:36 +010081 }
Marc Kupietz6dfeed92025-06-03 11:58:06 +020082)