Marc Kupietz | 6dfeed9 | 2025-06-03 11:58:06 +0200 | [diff] [blame] | 1 | #' @include logging.R |
| 2 | setGeneric("textMetadata", function(kco, ...) standardGeneric("textMetadata")) |
Marc Kupietz | 6ddece4 | 2023-12-18 17:02:36 +0100 | [diff] [blame] | 3 | |
| 4 | #' Retrieve metadata for a text, identified by its sigle (id) |
| 5 | #' |
| 6 | #' @aliases textMetadata |
| 7 | #' |
| 8 | #' @description |
| 9 | #' Retrieves metadata for a text, identified by its sigle (id) using the corresponding KorAP API |
| 10 | #' (see [Kustvakt Wiki](https://github.com/KorAP/Kustvakt/wiki/Service:-Metadata-Retrieval)). |
Marc Kupietz | a267c42 | 2025-06-26 10:16:20 +0200 | [diff] [blame^] | 11 | #' To retrieve the metadata for every text in a virtual corpus, use [corpusQuery()] |
| 12 | #' with `<base/s=t>` as query, instead. |
Marc Kupietz | 6ddece4 | 2023-12-18 17:02:36 +0100 | [diff] [blame] | 13 | #' |
| 14 | #' |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 15 | #' @param kco [KorAPConnection()] object (obtained e.g. from `KorAPConnection()`) |
Marc Kupietz | 6ddece4 | 2023-12-18 17:02:36 +0100 | [diff] [blame] | 16 | #' @param textSigle unique text id (concatenation of corpus, document and text ids, separated by `/`, e.g. ) or vector thereof |
| 17 | #' @param verbose logical. If `TRUE`, additional diagnostics are printed. Defaults to `kco@verbose`. |
| 18 | #' |
Marc Kupietz | 3687a8c | 2024-01-24 16:18:36 +0100 | [diff] [blame] | 19 | #' @return Tibble with columns for each metadata property. In case of errors, such as non-existing texts/sigles, the tibble will also contain a column called `errors`. |
| 20 | #' If there are metadata columns you cannot make sense of, please ignore them. The function simply returns all the metadata it gets from the server. |
Marc Kupietz | 6ddece4 | 2023-12-18 17:02:36 +0100 | [diff] [blame] | 21 | #' |
| 22 | #' @importFrom urltools url_encode |
Marc Kupietz | a7a8f1b | 2024-12-18 15:56:19 +0100 | [diff] [blame] | 23 | #' @importFrom dplyr across bind_rows relocate mutate where |
Marc Kupietz | 2a8ab82 | 2024-06-26 21:13:37 +0200 | [diff] [blame] | 24 | #' @importFrom tibble as_tibble |
| 25 | #' @importFrom tidyr pivot_wider |
Marc Kupietz | 6ddece4 | 2023-12-18 17:02:36 +0100 | [diff] [blame] | 26 | #' |
| 27 | #' @examples |
| 28 | #' \dontrun{ |
Marc Kupietz | a267c42 | 2025-06-26 10:16:20 +0200 | [diff] [blame^] | 29 | #' KorAPConnection() |> textMetadata(c("WUD17/A97/08542", "WUD17/B96/57558", "WUD17/A97/08541")) |
Marc Kupietz | 6ddece4 | 2023-12-18 17:02:36 +0100 | [diff] [blame] | 30 | #' } |
| 31 | #' |
| 32 | #' @export |
Marc Kupietz | 6dfeed9 | 2025-06-03 11:58:06 +0200 | [diff] [blame] | 33 | setMethod( |
| 34 | "textMetadata", "KorAPConnection", |
| 35 | function(kco, textSigle, verbose = kco@verbose) { |
| 36 | # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check |
| 37 | key <- 0 |
| 38 | if (length(textSigle) > 1) { |
| 39 | do.call(bind_rows, Map(function(atomicSigle) { |
| 40 | textMetadata(kco, atomicSigle) |
| 41 | }, textSigle)) |
Marc Kupietz | 6ddece4 | 2023-12-18 17:02:36 +0100 | [diff] [blame] | 42 | } else { |
Marc Kupietz | 6dfeed9 | 2025-06-03 11:58:06 +0200 | [diff] [blame] | 43 | url <- |
| 44 | paste0( |
| 45 | kco@apiUrl, "corpus/", |
| 46 | URLencode(enc2utf8(textSigle), reserved = TRUE) |
| 47 | ) |
| 48 | log_info(verbose, "Getting metadata for ", textSigle, sep = "") |
| 49 | res <- apiCall(kco, url) |
| 50 | log_info(verbose, ifelse(is.null(res) || "errors" %in% names(res), " [error]\n", "\n")) |
| 51 | |
| 52 | if (is.null(res)) { |
| 53 | res <- tibble(errors = "API request failed") |
Marc Kupietz | 2a8ab82 | 2024-06-26 21:13:37 +0200 | [diff] [blame] | 54 | } else { |
Marc Kupietz | 6dfeed9 | 2025-06-03 11:58:06 +0200 | [diff] [blame] | 55 | if ("document" %in% names(res) & "fields" %in% names(res$document) && length(res$document$fields) > 0) { |
| 56 | res <- as_tibble(res$document$fields) %>% |
| 57 | dplyr::mutate(across(where(is.list), ~ purrr::map(.x, ~ if (length(.x) < 2) unlist(.x) else paste(.x, collapse = "\\t")))) %>% |
| 58 | select(key, value) %>% |
| 59 | tidyr::pivot_wider(names_from = key, values_from = value, names_repair = "unique") %>% |
| 60 | mutate( |
| 61 | textSigle = as.character(textSigle), |
| 62 | requestUrl = url, |
| 63 | webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle)))) |
| 64 | ) %>% |
| 65 | mutate(across(everything(), as.character)) %>% |
| 66 | relocate(textSigle) |
| 67 | } else { |
| 68 | res <- lapply(res, function(x) paste0(x, collapse = "\\t")) # flatten list |
| 69 | res <- as_tibble(res) %>% |
| 70 | head(n = 1) %>% |
| 71 | mutate( |
| 72 | requestUrl = url, |
| 73 | textSigle = textSigle, |
| 74 | webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle)))) |
| 75 | ) %>% |
| 76 | relocate(textSigle) |
| 77 | } |
Marc Kupietz | 2a8ab82 | 2024-06-26 21:13:37 +0200 | [diff] [blame] | 78 | } |
Marc Kupietz | 6dfeed9 | 2025-06-03 11:58:06 +0200 | [diff] [blame] | 79 | res |
Marc Kupietz | 6ddece4 | 2023-12-18 17:02:36 +0100 | [diff] [blame] | 80 | } |
Marc Kupietz | 6ddece4 | 2023-12-18 17:02:36 +0100 | [diff] [blame] | 81 | } |
Marc Kupietz | 6dfeed9 | 2025-06-03 11:58:06 +0200 | [diff] [blame] | 82 | ) |