Factor out ETA logging Change-Id: Ic8b1600395018aa79701e29a38c4ec22d598337a

commit: 6dfeed9bd68d0b36be2c47718c1b29a17c8c2d1a [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Tue Jun 03 11:58:06 2025 +0200
committer: Marc Kupietz <kupietz@ids-mannheim.de> Tue Jun 03 11:58:06 2025 +0200
tree: ede04b9396f6b33babf58668d041c96b81957b22
parent: 71c30d80c6334c8c7a4b772485ca281e4664a5f3 [diff]
diff --git a/DESCRIPTION b/DESCRIPTION
index a25074b..1e47e7a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION

@@ -77,4 +77,5 @@
     'misc.R'
     'reexports.R'
     'textMetadata.R'
+    'logging.R'
 Roxygen: list(markdown = TRUE)

diff --git a/R/KorAPConnection.R b/R/KorAPConnection.R
index f2d65a8..1d567a8 100644
--- a/R/KorAPConnection.R
+++ b/R/KorAPConnection.R

@@ -13,6 +13,7 @@
 #' @import R.cache
 #' @import utils
 #' @import methods
+#' @include logging.R
 #' @slot KorAPUrl        URL of the web user interface of the KorAP server used in the connection.
 #' @slot apiVersion      requested KorAP API version.
 #' @slot indexRevision   indexRevision code as reported from API via `X-Index-Revision` HTTP header.

diff --git a/R/KorAPCorpusStats.R b/R/KorAPCorpusStats.R
index 75bd6eb..8097637 100644
--- a/R/KorAPCorpusStats.R
+++ b/R/KorAPCorpusStats.R

@@ -4,6 +4,7 @@
 #' `KorAPCorpusStats` objects can be obtained by the [corpusStats()] method.
 #'
 #' @include KorAPConnection.R
+#' @include logging.R
 #'
 #' @export
 #' @slot vc definition of the virtual corpus
@@ -14,11 +15,6 @@
 #' @slot webUIRequestUrl link to the web user interface with the current vc definition
 setClass("KorAPCorpusStats", slots = c(vc = "character", documents = "numeric", tokens = "numeric", sentences = "numeric", paragraphs = "numeric", webUIRequestUrl = "character"))
 
-log_info <- function(v, ...) {
-  green <- "\033[32m"
-  reset <- "\033[0m"
-  cat(ifelse(v, paste0(green, ..., reset), ""))
-}
 setGeneric("corpusStats", function(kco, ...) standardGeneric("corpusStats"))
 
 #' Fetch information about a (virtual) corpus
@@ -37,22 +33,6 @@
 #' }
 #'
 #' @aliases corpusStats
-# Helper function to format duration with leading zeros
-format_duration <- function(seconds) {
-  if (is.na(seconds) || !is.finite(seconds) || seconds <= 0) {
-    return("N/A")
-  }
-
-  hours <- floor(seconds / 3600)
-  minutes <- floor((seconds %% 3600) / 60)
-  secs <- round(seconds %% 60)
-
-  if (hours > 0) {
-    return(sprintf("%02d:%02d:%02d", hours, minutes, secs))
-  } else {
-    return(sprintf("%02d:%02d", minutes, secs))
-  }
-}
 
 #' @export
 setMethod("corpusStats", "KorAPConnection", function(kco,

diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index 93f5458..04d859a 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R

@@ -5,6 +5,7 @@
 #' represent the current state of a query to a KorAP server.
 #'
 #' @include KorAPConnection.R
+#' @include logging.R
 #' @import httr2
 #'
 #' @include RKorAPClient-package.R
@@ -196,25 +197,6 @@
       current_query <- 0
       start_time <- Sys.time()
 
-      # Helper function to format duration
-      format_duration <- function(seconds) {
-        if (is.na(seconds) || seconds < 0) {
-          return("00s")
-        }
-        days <- floor(seconds / (24 * 3600))
-        seconds <- seconds %% (24 * 3600)
-        hours <- floor(seconds / 3600)
-        seconds <- seconds %% 3600
-        minutes <- floor(seconds / 60)
-        seconds <- floor(seconds %% 60)
-        paste0(
-          if (days > 0) paste0(days, "d ") else "",
-          if (hours > 0 || days > 0) paste0(sprintf("%02d", hours), "h ") else "",
-          if (minutes > 0 || hours > 0 || days > 0) paste0(sprintf("%02d", minutes), "m ") else "",
-          paste0(sprintf("%02d", seconds), "s")
-        )
-      }
-
       results <- purrr::pmap(grid, function(query, vc, ...) {
         current_query <<- current_query + 1
 
@@ -279,16 +261,10 @@
 
         # Calculate and display ETA information if verbose and we have more than one query
         if (verbose && total_queries > 1) {
-          elapsed_time <- as.numeric(difftime(Sys.time(), start_time, units = "secs"))
-
-          if (current_query > 1) { # Only calculate ETA after the first query
+          eta_info <- calculate_eta(current_query, total_queries, start_time)
+          if (eta_info != "") {
+            elapsed_time <- as.numeric(difftime(Sys.time(), start_time, units = "secs"))
             avg_time_per_query <- elapsed_time / current_query
-            remaining_queries <- total_queries - current_query
-            estimated_remaining_seconds <- remaining_queries * avg_time_per_query
-            estimated_completion_time <- Sys.time() + estimated_remaining_seconds
-
-            eta_str <- format_duration(estimated_remaining_seconds)
-            completion_time_str <- format(estimated_completion_time, "%Y-%m-%d %H:%M:%S")
 
             # Create progress display
             progress_display <- paste0(
@@ -298,9 +274,8 @@
               sprintf("%d", total_queries),
               " completed. Avg: ",
               sprintf("%.1f", avg_time_per_query),
-              "s/query. ETA: ",
-              eta_str,
-              " (", completion_time_str, ")"
+              "s/query",
+              eta_info
             )
 
             log_info(verbose, progress_display, "\n")

diff --git a/R/collocationAnalysis.R b/R/collocationAnalysis.R
index 3d43012..16987c6 100644
--- a/R/collocationAnalysis.R
+++ b/R/collocationAnalysis.R

@@ -1,4 +1,5 @@
-setGeneric("collocationAnalysis", function(kco, ...)  standardGeneric("collocationAnalysis") )
+#' @include logging.R
+setGeneric("collocationAnalysis", function(kco, ...) standardGeneric("collocationAnalysis"))
 
 #' Collocation analysis
 #'
@@ -52,10 +53,12 @@
 #' @examples
 #' \dontrun{
 #'
-#'  # Find top collocates of "Packung" inside and outside the sports domain.
-#'  KorAPConnection(verbose = TRUE) |>
-#'   collocationAnalysis("Packung", vc=c("textClass=sport", "textClass!=sport"),
-#'                       leftContextSize=1, rightContextSize=1, topCollocatesLimit=20) |>
+#' # Find top collocates of "Packung" inside and outside the sports domain.
+#' KorAPConnection(verbose = TRUE) |>
+#'   collocationAnalysis("Packung",
+#'     vc = c("textClass=sport", "textClass!=sport"),
+#'     leftContextSize = 1, rightContextSize = 1, topCollocatesLimit = 20
+#'   ) |>
 #'   dplyr::filter(logDice >= 5)
 #' }
 #'
@@ -65,150 +68,154 @@
 #' # Note that, currently, the use of focus function disallows exactFrequencies.
 #' KorAPConnection(verbose = TRUE) |>
 #'   collocationAnalysis("focus(in [tt/p=NN] {[tt/l=setzen]})",
-#'     leftContextSize=1, rightContextSize=0, exactFrequencies=FALSE, topCollocatesLimit=20)
+#'     leftContextSize = 1, rightContextSize = 0, exactFrequencies = FALSE, topCollocatesLimit = 20
+#'   )
 #' }
 #'
 #' @export
-setMethod("collocationAnalysis", "KorAPConnection",
-          function(kco,
-                   node,
-                   vc = "",
-                   lemmatizeNodeQuery = FALSE,
-                   minOccur = 5,
-                   leftContextSize = 5,
-                   rightContextSize = 5,
-                   topCollocatesLimit = 200,
-                   searchHitsSampleLimit = 20000,
-                   ignoreCollocateCase = FALSE,
-                   withinSpan = ifelse(exactFrequencies, "base/s=s", ""),
-                   exactFrequencies = TRUE,
-                   stopwords = append(RKorAPClient::synsemanticStopwords(), node),
-                   seed = 7,
-                   expand = length(vc) != length(node),
-                   maxRecurse = 0,
-                   addExamples = FALSE,
-                   thresholdScore = "logDice",
-                   threshold = 2.0,
-                   localStopwords = c(),
-                   collocateFilterRegex = '^[:alnum:]+-?[:alnum:]*$',
-                   ...) {
-            # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
-            word <- frequency <- O <- NULL
+setMethod(
+  "collocationAnalysis", "KorAPConnection",
+  function(kco,
+           node,
+           vc = "",
+           lemmatizeNodeQuery = FALSE,
+           minOccur = 5,
+           leftContextSize = 5,
+           rightContextSize = 5,
+           topCollocatesLimit = 200,
+           searchHitsSampleLimit = 20000,
+           ignoreCollocateCase = FALSE,
+           withinSpan = ifelse(exactFrequencies, "base/s=s", ""),
+           exactFrequencies = TRUE,
+           stopwords = append(RKorAPClient::synsemanticStopwords(), node),
+           seed = 7,
+           expand = length(vc) != length(node),
+           maxRecurse = 0,
+           addExamples = FALSE,
+           thresholdScore = "logDice",
+           threshold = 2.0,
+           localStopwords = c(),
+           collocateFilterRegex = "^[:alnum:]+-?[:alnum:]*$",
+           ...) {
+    # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
+    word <- frequency <- O <- NULL
 
-            if(!exactFrequencies && (!is.na(withinSpan) && !is.null(withinSpan) && nchar(withinSpan)>0 )) {
-              stop(sprintf("Not empty withinSpan (='%s') requires exactFrequencies=TRUE", withinSpan), call. = FALSE)
-            }
+    if (!exactFrequencies && (!is.na(withinSpan) && !is.null(withinSpan) && nchar(withinSpan) > 0)) {
+      stop(sprintf("Not empty withinSpan (='%s') requires exactFrequencies=TRUE", withinSpan), call. = FALSE)
+    }
 
-            warnIfNotAuthorized(kco)
+    warnIfNotAuthorized(kco)
 
-            if (lemmatizeNodeQuery) {
-              node <- lemmatizeWordQuery(node)
-            }
+    if (lemmatizeNodeQuery) {
+      node <- lemmatizeWordQuery(node)
+    }
 
-            result <- if (length(node) > 1 || length(vc) > 1) {
-              grid <- if (expand) expand_grid(node=node, vc=vc) else tibble(node=node, vc=vc)
-              purrr::pmap(grid, function(node, vc, ...)
-                        collocationAnalysis(kco,
-                                            node =node,
-                                            vc = vc,
-                                            minOccur = minOccur,
-                                            leftContextSize = leftContextSize,
-                                            rightContextSize = rightContextSize,
-                                            topCollocatesLimit = topCollocatesLimit,
-                                            searchHitsSampleLimit = searchHitsSampleLimit,
-                                            ignoreCollocateCase = ignoreCollocateCase,
-                                            withinSpan = withinSpan,
-                                            exactFrequencies = exactFrequencies,
-                                            stopwords = stopwords,
-                                            addExamples = TRUE,
-                                            localStopwords = localStopwords,
-                                            seed = seed,
-                                            expand = expand,
-                                            ...) ) |>
-                bind_rows()
-            } else {
-              set.seed(seed)
-              candidates <- collocatesQuery(
-                kco,
-                node,
-                vc = vc,
-                minOccur = minOccur,
-                leftContextSize = leftContextSize,
-                rightContextSize = rightContextSize,
-                searchHitsSampleLimit = searchHitsSampleLimit,
-                ignoreCollocateCase = ignoreCollocateCase,
-                stopwords = append(stopwords, localStopwords),
-                ...
-              )
+    result <- if (length(node) > 1 || length(vc) > 1) {
+      grid <- if (expand) expand_grid(node = node, vc = vc) else tibble(node = node, vc = vc)
+      purrr::pmap(grid, function(node, vc, ...) {
+        collocationAnalysis(kco,
+          node = node,
+          vc = vc,
+          minOccur = minOccur,
+          leftContextSize = leftContextSize,
+          rightContextSize = rightContextSize,
+          topCollocatesLimit = topCollocatesLimit,
+          searchHitsSampleLimit = searchHitsSampleLimit,
+          ignoreCollocateCase = ignoreCollocateCase,
+          withinSpan = withinSpan,
+          exactFrequencies = exactFrequencies,
+          stopwords = stopwords,
+          addExamples = TRUE,
+          localStopwords = localStopwords,
+          seed = seed,
+          expand = expand,
+          ...
+        )
+      }) |>
+        bind_rows()
+    } else {
+      set.seed(seed)
+      candidates <- collocatesQuery(
+        kco,
+        node,
+        vc = vc,
+        minOccur = minOccur,
+        leftContextSize = leftContextSize,
+        rightContextSize = rightContextSize,
+        searchHitsSampleLimit = searchHitsSampleLimit,
+        ignoreCollocateCase = ignoreCollocateCase,
+        stopwords = append(stopwords, localStopwords),
+        ...
+      )
 
-              if (nrow(candidates) > 0) {
-                candidates <- candidates |>
-                  filter(frequency >= minOccur) |>
-                  slice_head(n=topCollocatesLimit)
-                collocationScoreQuery(
-                  kco,
-                  node = node,
-                  collocate = candidates$word,
-                  vc = vc,
-                  leftContextSize = leftContextSize,
-                  rightContextSize = rightContextSize,
-                  observed = if (exactFrequencies) NA else candidates$frequency,
-                  ignoreCollocateCase = ignoreCollocateCase,
-                  withinSpan = withinSpan,
-                  ...
-                ) |>
-                  filter(O >= minOccur) |>
-                  dplyr::arrange(dplyr::desc(logDice))
-              } else {
-                tibble()
-              }
-            }
-            if (maxRecurse > 0 & length(result) > 0 && any(!!thresholdScore >= threshold)) {
-              recurseWith <- result |>
-                filter(!!as.name(thresholdScore) >= threshold)
-              result <- collocationAnalysis(
-                kco,
-                node = paste0("(", buildCollocationQuery(
-                  removeWithinSpan(recurseWith$node, withinSpan),
-                  recurseWith$collocate,
-                  leftContextSize = leftContextSize,
-                  rightContextSize = rightContextSize,
-                  withinSpan = ""
-                ), ")"),
-                vc = vc,
-                minOccur = minOccur,
-                leftContextSize = leftContextSize,
-                rightContextSize = rightContextSize,
-                withinSpan = withinSpan,
-                maxRecurse = maxRecurse - 1,
-                stopwords = stopwords,
-                localStopwords = recurseWith$collocate,
-                exactFrequencies = exactFrequencies,
-                searchHitsSampleLimit = searchHitsSampleLimit,
-                topCollocatesLimit = topCollocatesLimit,
-                addExamples = FALSE
-              ) |>
-                bind_rows(result) |>
-                filter(logDice >= 2) |>
-                filter(.$O >= minOccur) |>
-                dplyr::arrange(dplyr::desc(logDice))
-            }
-            if (addExamples && length(result) > 0) {
-              result$query <-buildCollocationQuery(
-                result$node,
-                result$collocate,
-                leftContextSize = leftContextSize,
-                rightContextSize = rightContextSize,
-                withinSpan = withinSpan
-              )
-              result$example <- findExample(
-                kco,
-                query = result$query,
-                vc = result$vc
-              )
-            }
-            result
-          }
+      if (nrow(candidates) > 0) {
+        candidates <- candidates |>
+          filter(frequency >= minOccur) |>
+          slice_head(n = topCollocatesLimit)
+        collocationScoreQuery(
+          kco,
+          node = node,
+          collocate = candidates$word,
+          vc = vc,
+          leftContextSize = leftContextSize,
+          rightContextSize = rightContextSize,
+          observed = if (exactFrequencies) NA else candidates$frequency,
+          ignoreCollocateCase = ignoreCollocateCase,
+          withinSpan = withinSpan,
+          ...
+        ) |>
+          filter(O >= minOccur) |>
+          dplyr::arrange(dplyr::desc(logDice))
+      } else {
+        tibble()
+      }
+    }
+    if (maxRecurse > 0 & length(result) > 0 && any(!!thresholdScore >= threshold)) {
+      recurseWith <- result |>
+        filter(!!as.name(thresholdScore) >= threshold)
+      result <- collocationAnalysis(
+        kco,
+        node = paste0("(", buildCollocationQuery(
+          removeWithinSpan(recurseWith$node, withinSpan),
+          recurseWith$collocate,
+          leftContextSize = leftContextSize,
+          rightContextSize = rightContextSize,
+          withinSpan = ""
+        ), ")"),
+        vc = vc,
+        minOccur = minOccur,
+        leftContextSize = leftContextSize,
+        rightContextSize = rightContextSize,
+        withinSpan = withinSpan,
+        maxRecurse = maxRecurse - 1,
+        stopwords = stopwords,
+        localStopwords = recurseWith$collocate,
+        exactFrequencies = exactFrequencies,
+        searchHitsSampleLimit = searchHitsSampleLimit,
+        topCollocatesLimit = topCollocatesLimit,
+        addExamples = FALSE
+      ) |>
+        bind_rows(result) |>
+        filter(logDice >= 2) |>
+        filter(.$O >= minOccur) |>
+        dplyr::arrange(dplyr::desc(logDice))
+    }
+    if (addExamples && length(result) > 0) {
+      result$query <- buildCollocationQuery(
+        result$node,
+        result$collocate,
+        leftContextSize = leftContextSize,
+        rightContextSize = rightContextSize,
+        withinSpan = withinSpan
+      )
+      result$example <- findExample(
+        kco,
+        query = result$query,
+        vc = result$vc
+      )
+    }
+    result
+  }
 )
 
 # #' @export
@@ -217,9 +224,9 @@
     return(query)
   }
   needle <- sprintf("^\\(contains\\(<%s>, ?(.*)\\){2}$", withinSpan)
-  res <- gsub(needle, '\\1', query)
+  res <- gsub(needle, "\\1", query)
   needle <- sprintf("^contains\\(<%s>, ?(.*)\\)$", withinSpan)
-  res <- gsub(needle, '\\1', res)
+  res <- gsub(needle, "\\1", res)
   return(res)
 }
 
@@ -234,20 +241,21 @@
                               rightContextSize = 5,
                               ignoreCollocateCase = FALSE,
                               stopwords = c(),
-                              collocateFilterRegex =  '^[:alnum:]+-?[:alnum:]*$',
+                              collocateFilterRegex = "^[:alnum:]+-?[:alnum:]*$",
                               oldTable = data.frame(word = rep(NA, 1), frequency = rep(NA, 1)),
                               verbose = TRUE) {
   word <- NULL # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
   frequency <- NULL
 
   if (nrow(matches) < 1) {
-    dplyr::tibble(word=c(), frequency=c())
+    dplyr::tibble(word = c(), frequency = c())
   } else if (index == 0) {
-    if (! "tokens" %in% colnames(matches) || ! is.list(matches$tokens)) {
+    if (!"tokens" %in% colnames(matches) || !is.list(matches$tokens)) {
       log_info(verbose, "Outdated KorAP server: Falling back to client side tokenization.\n")
-      return(snippet2FreqTable(matches$snippet, minOccur, leftContextSize, rightContextSize, ignoreCollocateCase = ignoreCollocateCase,
-                        stopwords = stopwords, oldTable = oldTable, verbose = verbose))
-
+      return(snippet2FreqTable(matches$snippet, minOccur, leftContextSize, rightContextSize,
+        ignoreCollocateCase = ignoreCollocateCase,
+        stopwords = stopwords, oldTable = oldTable, verbose = verbose
+      ))
     }
     log_info(verbose, paste("Joining", nrow(matches), "kwics\n"))
     for (i in 1:nrow(matches)) {
@@ -262,30 +270,30 @@
       )
     }
     log_info(verbose, paste("Aggregating", length(oldTable$word), "tokens\n"))
-    oldTable  |>
+    oldTable |>
       group_by(word) |>
       mutate(word = dplyr::case_when(ignoreCollocateCase ~ tolower(word), TRUE ~ word)) |>
-      summarise(frequency=sum(frequency), .groups = "drop") |>
+      summarise(frequency = sum(frequency), .groups = "drop") |>
       arrange(desc(frequency))
   } else {
-    stopwordsTable <- dplyr::tibble(word=stopwords)
+    stopwordsTable <- dplyr::tibble(word = stopwords)
 
     left <- tail(unlist(matches$tokens$left[index]), leftContextSize)
 
-#    cat(paste("left:", left, "\n", collapse=" "))
+    #    cat(paste("left:", left, "\n", collapse=" "))
 
     right <- head(unlist(matches$tokens$right[index]), rightContextSize)
 
-#    cat(paste("right:", right, "\n", collapse=" "))
+    #    cat(paste("right:", right, "\n", collapse=" "))
 
-    if(length(left) + length(right) == 0) {
+    if (length(left) + length(right) == 0) {
       oldTable
     } else {
       table(c(left, right)) |>
         dplyr::as_tibble(.name_repair = "minimal") |>
         dplyr::rename(word = 1, frequency = 2) |>
         dplyr::filter(str_detect(word, collocateFilterRegex)) |>
-        dplyr::anti_join(stopwordsTable, by="word")  |>
+        dplyr::anti_join(stopwordsTable, by = "word") |>
         dplyr::bind_rows(oldTable)
     }
   }
@@ -302,14 +310,14 @@
                               ignoreCollocateCase = FALSE,
                               stopwords = c(),
                               tokenizeRegex = "([! )(\uc2\uab,.:?\u201e\u201c\'\"]+|&quot;)",
-                              collocateFilterRegex =  '^[:alnum:]+-?[:alnum:]*$',
+                              collocateFilterRegex = "^[:alnum:]+-?[:alnum:]*$",
                               oldTable = data.frame(word = rep(NA, 1), frequency = rep(NA, 1)),
                               verbose = TRUE) {
   word <- NULL # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
   frequency <- NULL
 
   if (length(snippet) < 1) {
-    dplyr::tibble(word=c(), frequency=c())
+    dplyr::tibble(word = c(), frequency = c())
   } else if (length(snippet) > 1) {
     log_info(verbose, paste("Joining", length(snippet), "kwics\n"))
     for (s in snippet) {
@@ -323,39 +331,41 @@
       )
     }
     log_info(verbose, paste("Aggregating", length(oldTable$word), "tokens\n"))
-    oldTable  |>
+    oldTable |>
       group_by(word) |>
       mutate(word = dplyr::case_when(ignoreCollocateCase ~ tolower(word), TRUE ~ word)) |>
-      summarise(frequency=sum(frequency), .groups = "drop") |>
+      summarise(frequency = sum(frequency), .groups = "drop") |>
       arrange(desc(frequency))
   } else {
-    stopwordsTable <- dplyr::tibble(word=stopwords)
+    stopwordsTable <- dplyr::tibble(word = stopwords)
     match <-
       str_match(
         snippet,
         '<span class="context-left">(<span class="more"></span>)?(.*[^ ]) *</span><span class="match"><mark>.*</mark></span><span class="context-right"> *([^<]*)'
       )
 
-    left <- if(leftContextSize > 0)
+    left <- if (leftContextSize > 0) {
       tail(unlist(str_split(match[1, 3], tokenizeRegex)), leftContextSize)
-    else
+    } else {
       ""
-#    cat(paste("left:", left, "\n", collapse=" "))
+    }
+    #    cat(paste("left:", left, "\n", collapse=" "))
 
-    right <- if(rightContextSize > 0)
+    right <- if (rightContextSize > 0) {
       head(unlist(str_split(match[1, 4], tokenizeRegex)), rightContextSize)
-    else
-        ""
-#    cat(paste("right:", right, "\n", collapse=" "))
+    } else {
+      ""
+    }
+    #    cat(paste("right:", right, "\n", collapse=" "))
 
-    if(is.na(left[1]) || is.na(right[1]) || length(left) + length(right) == 0) {
+    if (is.na(left[1]) || is.na(right[1]) || length(left) + length(right) == 0) {
       oldTable
     } else {
       table(c(left, right)) |>
         dplyr::as_tibble(.name_repair = "minimal") |>
         dplyr::rename(word = 1, frequency = 2) |>
         dplyr::filter(str_detect(word, collocateFilterRegex)) |>
-        dplyr::anti_join(stopwordsTable, by="word")  |>
+        dplyr::anti_join(stopwordsTable, by = "word") |>
         dplyr::bind_rows(oldTable)
     }
   }
@@ -442,21 +452,22 @@
            matchOnly = TRUE) {
     out <- character(length = length(query))
 
-    if (length(vc) < length(query))
+    if (length(vc) < length(query)) {
       vc <- rep(vc, length(query))
+    }
 
     for (i in seq_along(query)) {
       q <- corpusQuery(kco, paste0("(", query[i], ")"), vc = vc[i], metadataOnly = FALSE)
       if (q@totalResults > 0) {
-        q <- fetchNext(q, maxFetch=50, randomizePageOrder=F)
+        q <- fetchNext(q, maxFetch = 50, randomizePageOrder = F)
         example <- as.character((q@collectedMatches)$snippet[1])
-        out[i] <- if(matchOnly) {
-          gsub('.*<mark>(.+)</mark>.*', '\\1', example)
+        out[i] <- if (matchOnly) {
+          gsub(".*<mark>(.+)</mark>.*", "\\1", example)
         } else {
-          stringr::str_replace(example, '<[^>]*>', '')
+          stringr::str_replace(example, "<[^>]*>", "")
         }
       } else {
-        out[i] = ""
+        out[i] <- ""
       }
     }
     out
@@ -475,19 +486,20 @@
            ...) {
     frequency <- NULL
     q <- corpusQuery(kco, query, vc, metadataOnly = F, ...)
-    if(q@totalResults == 0) {
-      tibble(word=c(), frequency=c())
+    if (q@totalResults == 0) {
+      tibble(word = c(), frequency = c())
     } else {
-      q <- fetchNext(q, maxFetch=searchHitsSampleLimit, randomizePageOrder=TRUE)
-      matches2FreqTable (q@collectedMatches,
-                         0,
-                        minOccur = minOccur,
-                        leftContextSize = leftContextSize,
-                        rightContextSize = rightContextSize,
-                        ignoreCollocateCase = ignoreCollocateCase,
-                        stopwords = stopwords,
-                        ...,
-                        verbose = kco@verbose) |>
+      q <- fetchNext(q, maxFetch = searchHitsSampleLimit, randomizePageOrder = TRUE)
+      matches2FreqTable(q@collectedMatches,
+        0,
+        minOccur = minOccur,
+        leftContextSize = leftContextSize,
+        rightContextSize = rightContextSize,
+        ignoreCollocateCase = ignoreCollocateCase,
+        stopwords = stopwords,
+        ...,
+        verbose = kco@verbose
+      ) |>
         mutate(frequency = frequency * q@totalResults / min(q@totalResults, searchHitsSampleLimit)) |>
         filter(frequency >= minOccur)
     }

diff --git a/R/logging.R b/R/logging.R
new file mode 100644
index 0000000..55be5cf
--- /dev/null
+++ b/R/logging.R

@@ -0,0 +1,77 @@
+#' Logging utilities for RKorAPClient
+#'
+#' This module provides centralized logging functions used throughout the package
+#' for progress reporting and ETA calculations.
+
+#' Log informational messages with optional coloring
+#'
+#' @param v logical flag indicating whether to output the message
+#' @param ... message components to concatenate and display
+#' @keywords internal
+log_info <- function(v, ...) {
+  green <- "\033[32m"
+  reset <- "\033[0m"
+  cat(ifelse(v, paste0(green, ..., reset), ""))
+}
+
+#' Format duration in seconds to human-readable format
+#'
+#' Converts a duration in seconds to a formatted string with days, hours, minutes, and seconds.
+#' Used for ETA calculations and progress reporting.
+#'
+#' @param seconds numeric duration in seconds
+#' @return character string with formatted duration
+#' @keywords internal
+#' @examples
+#' \dontrun{
+#' format_duration(3661) # "01h 01m 01s"
+#' format_duration(86461) # "1d 00h 01m 01s"
+#' }
+format_duration <- function(seconds) {
+  if (is.na(seconds) || !is.finite(seconds) || seconds < 0) {
+    return("00s")
+  }
+
+  days <- floor(seconds / (24 * 3600))
+  seconds <- seconds %% (24 * 3600)
+  hours <- floor(seconds / 3600)
+  seconds <- seconds %% 3600
+  minutes <- floor(seconds / 60)
+  seconds <- floor(seconds %% 60)
+
+  paste0(
+    if (days > 0) paste0(days, "d ") else "",
+    if (hours > 0 || days > 0) paste0(sprintf("%02d", hours), "h ") else "",
+    if (minutes > 0 || hours > 0 || days > 0) paste0(sprintf("%02d", minutes), "m ") else "",
+    paste0(sprintf("%02d", seconds), "s")
+  )
+}
+
+#' Calculate and format ETA for batch operations
+#'
+#' Helper function to calculate estimated time of arrival based on elapsed time
+#' and progress through a batch operation.
+#'
+#' @param current_item current item number (1-based)
+#' @param total_items total number of items to process
+#' @param start_time POSIXct start time of the operation
+#' @return character string with formatted ETA and completion time or empty string if not calculable
+#' @keywords internal
+calculate_eta <- function(current_item, total_items, start_time) {
+  if (current_item <= 1 || total_items <= 1) {
+    return("")
+  }
+
+  elapsed_time <- as.numeric(difftime(Sys.time(), start_time, units = "secs"))
+  if (elapsed_time <= 0) {
+    return("")
+  }
+
+  avg_time_per_item <- elapsed_time / (current_item - 1)
+  remaining_items <- total_items - current_item + 1
+  eta_seconds <- avg_time_per_item * remaining_items
+  estimated_completion_time <- Sys.time() + eta_seconds
+  completion_time_str <- format(estimated_completion_time, "%Y-%m-%d %H:%M:%S")
+
+  paste0(". ETA: ", format_duration(eta_seconds), " (", completion_time_str, ")")
+}

diff --git a/R/textMetadata.R b/R/textMetadata.R
index a6f00ce..8984967 100644
--- a/R/textMetadata.R
+++ b/R/textMetadata.R

@@ -1,4 +1,5 @@
-setGeneric("textMetadata", function(kco, ...)  standardGeneric("textMetadata") )
+#' @include logging.R
+setGeneric("textMetadata", function(kco, ...) standardGeneric("textMetadata"))
 
 #' Retrieve metadata for a text, identified by its sigle (id)
 #'
@@ -27,48 +28,53 @@
 #' }
 #'
 #' @export
-setMethod("textMetadata", "KorAPConnection",
- function(kco, textSigle, verbose = kco@verbose) {
-  # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
-  key <- 0
-  if (length(textSigle) > 1)
-    do.call(bind_rows, Map(function(atomicSigle)
-      textMetadata(kco, atomicSigle), textSigle))
-  else {
-    url <-
-      paste0(kco@apiUrl, 'corpus/',
-             URLencode(enc2utf8(textSigle), reserved = TRUE))
-    log_info(verbose, "Getting metadata for ", textSigle, sep = "")
-    res <- apiCall(kco, url)
-    log_info(verbose, ifelse(is.null(res) || "errors" %in% names(res), " [error]\n",  "\n"))
-
-    if(is.null(res)) {
-      res <- tibble(errors="API request failed")
+setMethod(
+  "textMetadata", "KorAPConnection",
+  function(kco, textSigle, verbose = kco@verbose) {
+    # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
+    key <- 0
+    if (length(textSigle) > 1) {
+      do.call(bind_rows, Map(function(atomicSigle) {
+        textMetadata(kco, atomicSigle)
+      }, textSigle))
     } else {
-      if ("document" %in% names(res) & "fields" %in% names(res$document) && length(res$document$fields) > 0) {
-        res <- as_tibble(res$document$fields) %>%
-          dplyr::mutate(across(where(is.list), ~ purrr::map(.x, ~ if (length(.x) < 2) unlist(.x) else paste(.x, collapse = "\\t")))) %>%
-          select(key, value) %>%
-          tidyr::pivot_wider(names_from = key, values_from = value, names_repair = "unique") %>%
-          mutate(
-            textSigle = as.character(textSigle),
-            requestUrl = url,
-            webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))) %>%
-          mutate(across(everything(), as.character)) %>%
-          relocate(textSigle)
+      url <-
+        paste0(
+          kco@apiUrl, "corpus/",
+          URLencode(enc2utf8(textSigle), reserved = TRUE)
+        )
+      log_info(verbose, "Getting metadata for ", textSigle, sep = "")
+      res <- apiCall(kco, url)
+      log_info(verbose, ifelse(is.null(res) || "errors" %in% names(res), " [error]\n", "\n"))
+
+      if (is.null(res)) {
+        res <- tibble(errors = "API request failed")
       } else {
-        res <- lapply(res, function(x) paste0(x, collapse = "\\t")) # flatten list
-        res <- as_tibble(res) %>%
-          head(n=1) %>%
-          mutate(
-            requestUrl = url,
-            textSigle = textSigle,
-            webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))) %>%
-          relocate(textSigle)
+        if ("document" %in% names(res) & "fields" %in% names(res$document) && length(res$document$fields) > 0) {
+          res <- as_tibble(res$document$fields) %>%
+            dplyr::mutate(across(where(is.list), ~ purrr::map(.x, ~ if (length(.x) < 2) unlist(.x) else paste(.x, collapse = "\\t")))) %>%
+            select(key, value) %>%
+            tidyr::pivot_wider(names_from = key, values_from = value, names_repair = "unique") %>%
+            mutate(
+              textSigle = as.character(textSigle),
+              requestUrl = url,
+              webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))
+            ) %>%
+            mutate(across(everything(), as.character)) %>%
+            relocate(textSigle)
+        } else {
+          res <- lapply(res, function(x) paste0(x, collapse = "\\t")) # flatten list
+          res <- as_tibble(res) %>%
+            head(n = 1) %>%
+            mutate(
+              requestUrl = url,
+              textSigle = textSigle,
+              webUIRequestUrl = paste0(kco@KorAPUrl, sprintf('?q=<base/s=t>&cq=textSigle+%%3D+"%s"', url_encode(enc2utf8(textSigle))))
+            ) %>%
+            relocate(textSigle)
+        }
       }
+      res
     }
-    res
   }
-})
-
-
+)

diff --git a/tests/testthat/test-corpusStats-eta.R b/tests/testthat/test-corpusStats-eta.R
index 5a57079..f95ccaa 100644
--- a/tests/testthat/test-corpusStats-eta.R
+++ b/tests/testthat/test-corpusStats-eta.R

@@ -45,7 +45,7 @@
   # Test 3: Check that ETA is displayed (format like "ETA: MM:SS" or "ETA: HH:MM:SS")
   expect_match(
     output_str,
-    "ETA: \\d{2}:\\d{2}",
+    "ETA: \\d{2}",
     info = "ETA format not found in output"
   )
commit	6dfeed9bd68d0b36be2c47718c1b29a17c8c2d1a	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Tue Jun 03 11:58:06 2025 +0200
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Tue Jun 03 11:58:06 2025 +0200
tree	ede04b9396f6b33babf58668d041c96b81957b22
parent	71c30d80c6334c8c7a4b772485ca281e4664a5f3 [diff]