Add ETA logging also for corpus/frequency query

Change-Id: I6bc9b65db23c1e35a0764053c2e28faa1f442655
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index dc9294b..93f5458 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -190,10 +190,127 @@
            context = NULL) {
     if (length(query) > 1 || length(vc) > 1) {
       grid <- if (expand) expand_grid(query = query, vc = vc) else tibble(query = query, vc = vc)
-      purrr::pmap(grid, function(query, vc, ...) {
-        corpusQuery(kco, query = query, vc = vc, ql = ql, verbose = verbose, as.df = TRUE)
-      }) %>%
-        bind_rows()
+
+      # Initialize timing variables for ETA calculation
+      total_queries <- nrow(grid)
+      current_query <- 0
+      start_time <- Sys.time()
+
+      # Helper function to format duration
+      format_duration <- function(seconds) {
+        if (is.na(seconds) || seconds < 0) {
+          return("00s")
+        }
+        days <- floor(seconds / (24 * 3600))
+        seconds <- seconds %% (24 * 3600)
+        hours <- floor(seconds / 3600)
+        seconds <- seconds %% 3600
+        minutes <- floor(seconds / 60)
+        seconds <- floor(seconds %% 60)
+        paste0(
+          if (days > 0) paste0(days, "d ") else "",
+          if (hours > 0 || days > 0) paste0(sprintf("%02d", hours), "h ") else "",
+          if (minutes > 0 || hours > 0 || days > 0) paste0(sprintf("%02d", minutes), "m ") else "",
+          paste0(sprintf("%02d", seconds), "s")
+        )
+      }
+
+      results <- purrr::pmap(grid, function(query, vc, ...) {
+        current_query <<- current_query + 1
+
+        # Execute the single query directly (avoiding recursive call)
+        contentFields <- c("snippet", "tokens")
+        query_fields <- fields
+        if (metadataOnly) {
+          query_fields <- query_fields[!query_fields %in% contentFields]
+        }
+        if (!"textSigle" %in% query_fields) {
+          query_fields <- c(query_fields, "textSigle")
+        }
+        request <-
+          paste0(
+            "?q=",
+            url_encode(enc2utf8(query)),
+            ifelse(!metadataOnly && !is.null(context) && context != "", paste0("&context=", url_encode(enc2utf8(context))), ""),
+            ifelse(vc != "", paste0("&cq=", url_encode(enc2utf8(vc))), ""),
+            ifelse(!metadataOnly, "&show-tokens=true", ""),
+            "&ql=", ql
+          )
+        webUIRequestUrl <- paste0(kco@KorAPUrl, request)
+        requestUrl <- paste0(
+          kco@apiUrl,
+          "search",
+          request,
+          "&fields=",
+          paste(query_fields, collapse = ","),
+          if (metadataOnly) "&access-rewrite-disabled=true" else ""
+        )
+
+        # Show individual query progress
+        log_info(verbose, "\rSearching \"", query, "\" in \"", vc, "\"", sep = "")
+        res <- apiCall(kco, paste0(requestUrl, "&count=0"))
+        if (is.null(res)) {
+          log_info(verbose, ": API call failed\n")
+          totalResults <- 0
+        } else {
+          totalResults <- as.integer(res$meta$totalResults)
+          log_info(verbose, ": ", totalResults, " hits")
+          if (!is.null(res$meta$cached)) {
+            log_info(verbose, " [cached]")
+          } else if (!is.null(res$meta$benchmark)) {
+            if (is.character(res$meta$benchmark) && grepl("s$", res$meta$benchmark)) {
+              time_value <- as.numeric(sub("s$", "", res$meta$benchmark))
+              formatted_time <- paste0(round(time_value, 2), "s")
+              log_info(verbose, ", took ", formatted_time)
+            } else {
+              log_info(verbose, ", took ", res$meta$benchmark)
+            }
+          }
+          log_info(verbose, "\n")
+        }
+
+        result <- data.frame(
+          query = query,
+          totalResults = totalResults,
+          vc = vc,
+          webUIRequestUrl = webUIRequestUrl,
+          stringsAsFactors = FALSE
+        )
+
+        # Calculate and display ETA information if verbose and we have more than one query
+        if (verbose && total_queries > 1) {
+          elapsed_time <- as.numeric(difftime(Sys.time(), start_time, units = "secs"))
+
+          if (current_query > 1) { # Only calculate ETA after the first query
+            avg_time_per_query <- elapsed_time / current_query
+            remaining_queries <- total_queries - current_query
+            estimated_remaining_seconds <- remaining_queries * avg_time_per_query
+            estimated_completion_time <- Sys.time() + estimated_remaining_seconds
+
+            eta_str <- format_duration(estimated_remaining_seconds)
+            completion_time_str <- format(estimated_completion_time, "%Y-%m-%d %H:%M:%S")
+
+            # Create progress display
+            progress_display <- paste0(
+              "Query ",
+              sprintf(paste0("%", nchar(total_queries), "d"), current_query),
+              "/",
+              sprintf("%d", total_queries),
+              " completed. Avg: ",
+              sprintf("%.1f", avg_time_per_query),
+              "s/query. ETA: ",
+              eta_str,
+              " (", completion_time_str, ")"
+            )
+
+            log_info(verbose, progress_display, "\n")
+          }
+        }
+
+        return(result)
+      })
+
+      results %>% bind_rows()
     } else {
       contentFields <- c("snippet", "tokens")
       if (metadataOnly) {
diff --git a/tests/testthat/test-corpusquery-eta.R b/tests/testthat/test-corpusquery-eta.R
new file mode 100644
index 0000000..d3f3ae1
--- /dev/null
+++ b/tests/testthat/test-corpusquery-eta.R
@@ -0,0 +1,191 @@
+test_that("corpusQuery displays ETA with multiple queries", {
+  skip_if_offline()  # Commented out for testing
+  kco <- KorAPConnection(verbose = TRUE, cache = FALSE)
+
+  # Use simple queries to ensure they complete quickly
+  query <- c("Test", "der")
+  vc <- c("pubDate in 2020", "pubDate in 2021")
+
+  # Capture output from corpusQuery with multiple queries
+  temp_file <- tempfile()
+  sink(temp_file)
+  result <- corpusQuery(kco, query = query, vc = vc, metadataOnly = TRUE, as.df = TRUE, expand = TRUE)
+  cat("\n")
+  sink()
+
+  # Read the captured output
+  output <- readLines(temp_file)
+  unlink(temp_file)
+
+  # Echo the output to console for debugging
+  cat("\nCaptured output from corpusQuery with multiple queries:\n")
+  cat(paste(output, collapse = "\n"))
+
+  # Combined output string for all tests - strip ANSI color codes
+  output_str <- paste(output, collapse = "\n")
+  # Remove ANSI escape sequences
+  output_str <- gsub("\\033\\[[0-9;]*m", "", output_str)
+
+  # Test 1: Check that query progress is shown (format: "Query X/Y completed")
+  expect_match(
+    output_str,
+    "Query \\d+/\\d+ completed",
+    info = "Query progress counter not found in output"
+  )
+
+  # Test 2: Check that ETA is displayed (should contain digits followed by 's')
+  expect_match(
+    output_str,
+    "ETA: \\d+s",
+    info = "ETA format should show digits followed by 's'"
+  )
+
+  # Test 3: Check that completion time is shown
+  expect_match(
+    output_str,
+    "\\(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\)",
+    info = "Completion time format not found in output"
+  )
+
+  # Test 4: Check that we get results for all query combinations
+  # Note: with expand=TRUE (default), we should get length(query) * length(vc) results
+  expect_equal(nrow(result), length(query) * length(vc),
+               info = paste("Should get results for all query/vc combinations. Got:", nrow(result), "Expected:", length(query) * length(vc)))
+})
+
+test_that("corpusQuery ETA works with frequencyQuery", {
+  skip_if_offline()  # Commented out for testing
+  kco <- KorAPConnection(verbose = TRUE, cache = FALSE)
+
+  # Test the exact pattern from the user's example (but smaller)
+  query <- c("macht []{0,3} Sinn", "ergibt []{0,3} Sinn")
+  years <- c(2020:2021)  # Just 2 years for testing
+  as.alternatives <- TRUE
+  vc <- "textType = /Zeit.*/ & pubDate in"
+
+  # Capture output from frequencyQuery which calls corpusQuery internally
+  temp_file <- tempfile()
+  sink(temp_file)
+  result <- frequencyQuery(kco, query, paste(vc, years), as.alternatives = as.alternatives)
+  cat("\n")
+  sink()
+
+  # Read the captured output
+  output <- readLines(temp_file)
+  unlink(temp_file)
+
+  # Echo the output to console for debugging
+  cat("\nCaptured output from frequencyQuery with ETA:\n")
+  cat(paste(output, collapse = "\n"))
+
+  # Combined output string for all tests - strip ANSI color codes
+  output_str <- paste(output, collapse = "\n")
+  # Remove ANSI escape sequences
+  output_str <- gsub("\\033\\[[0-9;]*m", "", output_str)
+
+  # Test 1: Check that multiple queries are processed (format: "Query X/Y completed")
+  expect_match(
+    output_str,
+    "Query \\d+/\\d+ completed",
+    info = "Query progress should be shown for multiple queries"
+  )
+
+  # Test 2: Check that ETA is displayed when processing multiple queries
+  expect_match(
+    output_str,
+    "ETA:",
+    info = "ETA should be displayed when processing multiple queries"
+  )
+
+  # Test 3: Check that we get results
+  expect_true(nrow(result) > 0,
+              info = "Should get frequency query results")
+
+  # Test 4: Check that result has expected columns
+  expect_true(all(c("query", "vc", "totalResults") %in% names(result)),
+              info = "Result should contain expected columns")
+})
+
+test_that("corpusQuery ETA only displays with verbose=TRUE and multiple queries", {
+  skip_if_offline()  # Commented out for testing
+
+  # Test with verbose=FALSE - should not show ETA
+  kco_quiet <- KorAPConnection(verbose = FALSE, cache = FALSE)
+  query <- c("Test", "der")
+  vc <- c("pubDate in 2020", "pubDate in 2021")
+
+  # Capture output with verbose=FALSE
+  temp_file <- tempfile()
+  sink(temp_file)
+  result1 <- corpusQuery(kco_quiet, query = query, vc = vc, metadataOnly = TRUE, as.df = TRUE)
+  cat("\n")
+  sink()
+
+  output <- readLines(temp_file)
+  unlink(temp_file)
+  output_str <- paste(output, collapse = "\n")
+  # Remove ANSI escape sequences
+  output_str <- gsub("\\033\\[[0-9;]*m", "", output_str)
+
+  # Should not contain ETA information when verbose=FALSE
+  expect_false(grepl("ETA:", output_str),
+               info = "ETA should not be displayed when verbose=FALSE")
+
+  # Test with single query - should not show ETA even with verbose=TRUE
+  kco_verbose <- KorAPConnection(verbose = TRUE, cache = FALSE)
+  temp_file2 <- tempfile()
+  sink(temp_file2)
+  result2 <- corpusQuery(kco_verbose, query = "Test", vc = "pubDate in 2020",
+                        metadataOnly = TRUE, as.df = TRUE)
+  cat("\n")
+  sink()
+
+  output2 <- readLines(temp_file2)
+  unlink(temp_file2)
+  output_str2 <- paste(output2, collapse = "\n")
+  # Remove ANSI escape sequences
+  output_str2 <- gsub("\\033\\[[0-9;]*m", "", output_str2)
+
+  # Should not contain ETA for single query
+  expect_false(grepl("ETA:", output_str2),
+               info = "ETA should not be displayed for single queries")
+})
+
+test_that("corpusQuery ETA format_duration function works correctly", {
+  # This tests the internal format_duration function indirectly
+  # by checking that ETA displays reasonable time formats
+  skip_if_offline()  # Commented out for testing
+  kco <- KorAPConnection(verbose = TRUE, cache = FALSE)
+
+  # Use multiple queries to trigger ETA display
+  query <- c("Test", "der", "und")
+  vc <- c("pubDate in 2020", "pubDate in 2021")
+
+  # Capture output
+  temp_file <- tempfile()
+  sink(temp_file)
+  result <- corpusQuery(kco, query = query, vc = vc, metadataOnly = TRUE, as.df = TRUE)
+  cat("\n")
+  sink()
+
+  output <- readLines(temp_file)
+  unlink(temp_file)
+  output_str <- paste(output, collapse = "\n")
+  # Remove ANSI escape sequences
+  output_str <- gsub("\\033\\[[0-9;]*m", "", output_str)
+
+  # Check that ETA contains reasonable time format (digits followed by 's')
+  # This indirectly tests that format_duration is working
+  expect_match(
+    output_str,
+    "ETA: \\d+s",
+    info = "ETA should display time in seconds format"
+  )
+
+  # Also check for completion time format which uses the same function
+  expect_match(
+    output_str,
+    "\\(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\)",
+    info = "Completion time should be formatted correctly"
+  )
+})