blob: fb1dc98d3fd1bc942929eec038840c8c95074ffc [file] [log] [blame]
Marc Kupietz6dfeed92025-06-03 11:58:06 +02001#' Logging utilities for RKorAPClient
2#'
3#' This module provides centralized logging functions used throughout the package
4#' for progress reporting and ETA calculations.
5
6#' Log informational messages with optional coloring
Marc Kupietze8c8e1a2025-06-19 17:37:59 +02007#' @importFrom stats median
Marc Kupietz6dfeed92025-06-03 11:58:06 +02008#'
9#' @param v logical flag indicating whether to output the message
10#' @param ... message components to concatenate and display
11#' @keywords internal
12log_info <- function(v, ...) {
13 green <- "\033[32m"
14 reset <- "\033[0m"
15 cat(ifelse(v, paste0(green, ..., reset), ""))
16}
17
18#' Format duration in seconds to human-readable format
19#'
20#' Converts a duration in seconds to a formatted string with days, hours, minutes, and seconds.
21#' Used for ETA calculations and progress reporting.
22#'
23#' @param seconds numeric duration in seconds
24#' @return character string with formatted duration
25#' @keywords internal
26#' @examples
27#' \dontrun{
28#' format_duration(3661) # "01h 01m 01s"
29#' format_duration(86461) # "1d 00h 01m 01s"
30#' }
31format_duration <- function(seconds) {
32 if (is.na(seconds) || !is.finite(seconds) || seconds < 0) {
33 return("00s")
34 }
35
36 days <- floor(seconds / (24 * 3600))
37 seconds <- seconds %% (24 * 3600)
38 hours <- floor(seconds / 3600)
39 seconds <- seconds %% 3600
40 minutes <- floor(seconds / 60)
41 seconds <- floor(seconds %% 60)
42
43 paste0(
44 if (days > 0) paste0(days, "d ") else "",
45 if (hours > 0 || days > 0) paste0(sprintf("%02d", hours), "h ") else "",
46 if (minutes > 0 || hours > 0 || days > 0) paste0(sprintf("%02d", minutes), "m ") else "",
47 paste0(sprintf("%02d", seconds), "s")
48 )
49}
50
51#' Calculate and format ETA for batch operations
52#'
53#' Helper function to calculate estimated time of arrival based on elapsed time
54#' and progress through a batch operation.
55#'
56#' @param current_item current item number (1-based)
57#' @param total_items total number of items to process
58#' @param start_time POSIXct start time of the operation
59#' @return character string with formatted ETA and completion time or empty string if not calculable
60#' @keywords internal
61calculate_eta <- function(current_item, total_items, start_time) {
62 if (current_item <= 1 || total_items <= 1) {
63 return("")
64 }
65
66 elapsed_time <- as.numeric(difftime(Sys.time(), start_time, units = "secs"))
67 if (elapsed_time <= 0) {
68 return("")
69 }
70
71 avg_time_per_item <- elapsed_time / (current_item - 1)
72 remaining_items <- total_items - current_item + 1
73 eta_seconds <- avg_time_per_item * remaining_items
74 estimated_completion_time <- Sys.time() + eta_seconds
75 completion_time_str <- format(estimated_completion_time, "%Y-%m-%d %H:%M:%S")
76
77 paste0(". ETA: ", format_duration(eta_seconds), " (", completion_time_str, ")")
78}
Marc Kupietze8c8e1a2025-06-19 17:37:59 +020079
80#' Calculate sophisticated ETA using median of recent non-cached times
81#'
82#' Advanced ETA calculation that excludes cached responses and uses median
83#' of recent timing data for more stable estimates. This is particularly
84#' useful for operations where some responses may be cached and much faster.
85#'
86#' @param individual_times numeric vector of individual item processing times
87#' @param current_item current item number (1-based)
88#' @param total_items total number of items to process
89#' @param cache_threshold minimum time in seconds to consider as non-cached (default: 0.1)
90#' @param window_size number of recent non-cached times to use for median calculation (default: 5)
91#' @return list with eta_seconds, estimated_completion_time, and is_cached flag
92#' @keywords internal
Marc Kupietz365660e2025-06-25 15:09:55 +020093calculate_sophisticated_eta <- function(individual_times, current_item, total_items,
Marc Kupietze8c8e1a2025-06-19 17:37:59 +020094 cache_threshold = 0.1, window_size = 5) {
95 if (current_item < 2) {
96 return(list(eta_seconds = NA, estimated_completion_time = NA, is_cached = FALSE))
97 }
Marc Kupietz365660e2025-06-25 15:09:55 +020098
Marc Kupietze8c8e1a2025-06-19 17:37:59 +020099 # Get times up to current item
100 current_times <- individual_times[1:current_item]
101 current_time <- individual_times[current_item]
102 is_cached <- current_time < cache_threshold
Marc Kupietz365660e2025-06-25 15:09:55 +0200103
Marc Kupietze8c8e1a2025-06-19 17:37:59 +0200104 # Use recent non-cached times for better ETA estimates
105 # Exclude very fast responses as likely cached
106 non_cached_times <- current_times[current_times >= cache_threshold]
Marc Kupietz365660e2025-06-25 15:09:55 +0200107
Marc Kupietze8c8e1a2025-06-19 17:37:59 +0200108 if (length(non_cached_times) >= 1) {
109 # Use median of recent non-cached times for more stable estimates
110 recent_window <- min(window_size, length(non_cached_times))
111 recent_times <- tail(non_cached_times, recent_window)
112 time_per_item <- median(recent_times)
Marc Kupietz365660e2025-06-25 15:09:55 +0200113
Marc Kupietze8c8e1a2025-06-19 17:37:59 +0200114 remaining_items <- total_items - current_item
115 eta_seconds <- time_per_item * remaining_items
116 estimated_completion_time <- Sys.time() + eta_seconds
Marc Kupietz365660e2025-06-25 15:09:55 +0200117
Marc Kupietze8c8e1a2025-06-19 17:37:59 +0200118 return(list(
119 eta_seconds = eta_seconds,
120 estimated_completion_time = estimated_completion_time,
121 is_cached = is_cached
122 ))
123 } else {
124 # All responses so far appear cached
125 return(list(eta_seconds = NA, estimated_completion_time = NA, is_cached = is_cached))
126 }
127}
128
129#' Format ETA information for display
130#'
131#' Helper function to format ETA information consistently across different methods.
132#'
133#' @param eta_seconds numeric ETA in seconds (can be NA)
134#' @param estimated_completion_time POSIXct estimated completion time (can be NA)
135#' @return character string with formatted ETA or empty string if NA
136#' @keywords internal
137format_eta_display <- function(eta_seconds, estimated_completion_time) {
138 if (is.na(eta_seconds) || is.na(estimated_completion_time)) {
139 return("")
140 }
Marc Kupietz365660e2025-06-25 15:09:55 +0200141
Marc Kupietze8c8e1a2025-06-19 17:37:59 +0200142 completion_time_str <- format(estimated_completion_time, "%Y-%m-%d %H:%M:%S")
Marc Kupietz365660e2025-06-25 15:09:55 +0200143 paste0(". ETA: ", format_duration(eta_seconds), " (", completion_time_str, ")")
Marc Kupietze8c8e1a2025-06-19 17:37:59 +0200144}
145
146#' Get cache indicator string
147#'
148#' Helper function to generate cache indicator for logging.
149#'
150#' @param is_cached logical indicating if the item was cached
151#' @param cache_threshold minimum time threshold for non-cached items
152#' @return character string with cache indicator or empty string
153#' @keywords internal
154get_cache_indicator <- function(is_cached, cache_threshold = 0.1) {
155 if (is_cached) " [cached]" else ""
156}