blob: 2e5372f01b8291713dadf82643ec4863095b3a6e [file] [log] [blame]
Marc Kupietz6ddece42023-12-18 17:02:36 +01001setGeneric("textMetadata", function(kco, ...) standardGeneric("textMetadata") )
2
3#' Retrieve metadata for a text, identified by its sigle (id)
4#'
5#' @aliases textMetadata
6#'
7#' @description
8#' Retrieves metadata for a text, identified by its sigle (id) using the corresponding KorAP API
9#' (see [Kustvakt Wiki](https://github.com/KorAP/Kustvakt/wiki/Service:-Metadata-Retrieval)).
10#'
11#'
12#' @param kco [KorAPConnection()] object (obtained e.g. from `new("KorAPConnection")`)
13#' @param textSigle unique text id (concatenation of corpus, document and text ids, separated by `/`, e.g. ) or vector thereof
14#' @param verbose logical. If `TRUE`, additional diagnostics are printed. Defaults to `kco@verbose`.
15#'
Marc Kupietz3687a8c2024-01-24 16:18:36 +010016#' @return Tibble with columns for each metadata property. In case of errors, such as non-existing texts/sigles, the tibble will also contain a column called `errors`.
17#' If there are metadata columns you cannot make sense of, please ignore them. The function simply returns all the metadata it gets from the server.
Marc Kupietz6ddece42023-12-18 17:02:36 +010018#'
19#' @importFrom urltools url_encode
Marc Kupietza7a8f1b2024-12-18 15:56:19 +010020#' @importFrom dplyr across bind_rows relocate mutate where
Marc Kupietz2a8ab822024-06-26 21:13:37 +020021#' @importFrom tibble as_tibble
22#' @importFrom tidyr pivot_wider
Marc Kupietz6ddece42023-12-18 17:02:36 +010023#'
24#' @examples
25#' \dontrun{
26#' new("KorAPConnection") %>% textMetadata(c("WUD17/A97/08542", "WUD17/B96/57558", "WUD17/A97/08541"))
27#' }
28#'
29#' @export
30setMethod("textMetadata", "KorAPConnection",
31 function(kco, textSigle, verbose = kco@verbose) {
Marc Kupietza7a8f1b2024-12-18 15:56:19 +010032 # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
33 key <- 0
Marc Kupietz6ddece42023-12-18 17:02:36 +010034 if (length(textSigle) > 1)
35 do.call(bind_rows, Map(function(atomicSigle)
36 textMetadata(kco, atomicSigle), textSigle))
37 else {
38 url <-
39 paste0(kco@apiUrl, 'corpus/',
40 URLencode(enc2utf8(textSigle), reserved = TRUE))
41 log_info(verbose, "Getting metadata for ", textSigle, sep = "")
42 res <- apiCall(kco, url)
43 log_info(verbose, ifelse(is.null(res) || "errors" %in% names(res), " [error]\n", "\n"))
44
45 if(is.null(res)) {
46 res <- tibble(errors="API request failed")
47 } else {
Marc Kupietz05664aa2024-12-07 16:56:50 +010048 if ("document" %in% names(res) & "fields" %in% names(res$document) && length(res$document$fields) > 0) {
Marc Kupietz2a8ab822024-06-26 21:13:37 +020049 res <- as_tibble(res$document$fields) %>%
Marc Kupietz06e5b972024-12-07 17:22:06 +010050 dplyr::mutate(across(where(is.list), ~ purrr::map(.x, ~ if (length(.x) < 2) unlist(.x) else paste(.x, collapse = "\\t")))) %>%
Marc Kupietz2a8ab822024-06-26 21:13:37 +020051 select(key, value) %>%
52 tidyr::pivot_wider(names_from = key, values_from = value, names_repair = "unique") %>%
53 mutate(
54 textSigle = as.character(textSigle),
55 requestUrl = url,
56 webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))) %>%
Marc Kupietz06e5b972024-12-07 17:22:06 +010057 mutate(across(everything(), as.character)) %>%
58 relocate(textSigle)
Marc Kupietz2a8ab822024-06-26 21:13:37 +020059 } else {
60 res <- lapply(res, function(x) paste0(x, collapse = "\\t")) # flatten list
61 res <- as_tibble(res) %>%
62 head(n=1) %>%
63 mutate(
64 requestUrl = url,
65 textSigle = textSigle,
66 webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))) %>%
67 relocate(textSigle)
68 }
Marc Kupietz6ddece42023-12-18 17:02:36 +010069 }
70 res
71 }
72})
73
74