blob: a75dba465a2e40a468c0b347eed40686453e3ccd [file] [log] [blame]
Marc Kupietz623d7122025-05-25 12:46:12 +02001# Page numbering and ETA tests
2
3test_that("page numbering is displayed correctly in sequential mode", {
4 skip_if_offline()
Marc Kupietz51704112025-06-26 14:32:16 +02005 kco <- KorAPConnection(verbose = TRUE, cache = FALSE, accessToken = NULL)
Marc Kupietz623d7122025-05-25 12:46:12 +02006 q <- kco %>% corpusQuery("Test", "pubDate since 2014", fields = c("sigle"))
7
8 # Capture output - we need to use sink to capture the actual console output
9 temp_file <- tempfile()
10 sink(temp_file)
11 q <- fetchNext(q, maxFetch = 75)
12 cat("\n")
13 sink()
14
15 # Read the captured output
16 output <- readLines(temp_file)
17 unlink(temp_file)
18
19 # Echo the output to console
20 cat("\nCaptured output from sequential mode:\n")
21 cat(paste(output, collapse = "\n"))
22
23 # Combined output string for all tests
24 output_str <- paste(output, collapse = "\n")
25
26 # Test 1: Check page numbering format
27 expect_match(
28 output_str,
29 "Retrieved page .+/\\d+ \\(page \\d+ of \\d+ total\\)",
30 info = "Page numbering format not found in output"
31 )
32
33 # Test 2: Check that ETA is displayed with time values (not "N/A")
34 expect_match(
35 output_str,
36 "ETA: [^N][^/][^A]", # Negative pattern to ensure "N/A" is not in the ETA
37 info = "ETA format is not correct or contains N/A"
38 )
39
40 # Test 3: Check that completion time is shown in parentheses
41 expect_match(
42 output_str,
43 "\\(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\)",
44 info = "Completion time format not found in output"
45 )
46})
47
48test_that("page numbering and ETA are displayed correctly in randomized mode", {
49 skip_if_offline()
Marc Kupietz51704112025-06-26 14:32:16 +020050 kco <- KorAPConnection(verbose = TRUE, cache = FALSE, accessToken = NULL)
Marc Kupietz623d7122025-05-25 12:46:12 +020051 q <- kco %>% corpusQuery("Test", "pubDate since 2014", fields = c("sigle"))
52
53 # Set a fixed seed for reproducible tests
54 set.seed(123)
55
56 # Capture output - we need to use sink to capture the actual console output
57 temp_file <- tempfile()
58 sink(temp_file)
59 q <- fetchNext(q, maxFetch = 75, randomizePageOrder = TRUE)
60 cat("\n")
61 sink()
62
63 # Read the captured output
64 output <- readLines(temp_file)
65 unlink(temp_file)
66
67 # Echo the output to console
68 cat("\nCaptured output from randomized mode:\n")
69 cat(paste(output, collapse = "\n"))
70
71 # Combined output string for all tests
72 output_str <- paste(output, collapse = "\n")
73
74 # Test 1: Check page numbering format in randomized mode
75 expect_match(
76 output_str,
77 "Retrieved page .+/\\d+ \\(actual page \\d+\\)",
78 info = "Randomized page numbering format not found in output"
79 )
80
81 # Test 2: Check that ETA is displayed and doesn't contain "N/A (random order)"
82 expect_match(
83 output_str,
84 "ETA: [^N][^/][^A]", # Ensure "N/A" is not in the ETA
85 info = "ETA format is incorrect or contains N/A"
86 )
87
88 # Test 3: Check that proper time values and completion time are shown
89 expect_match(
90 output_str,
91 "ETA: \\d+s \\(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\)",
92 info = "Time format or completion time not found in output"
93 )
94})
95
96test_that("page numbering and ETA are displayed correctly in subsequent calls with randomized mode", {
97 skip_if_offline()
Marc Kupietz51704112025-06-26 14:32:16 +020098 kco <- KorAPConnection(verbose = TRUE, cache = FALSE, accessToken = NULL)
Marc Kupietz623d7122025-05-25 12:46:12 +020099 q <- kco %>% corpusQuery("Test", "pubDate since 2014", fields = c("sigle"))
100
101 # Set a fixed seed for reproducible tests
102 set.seed(123)
103
104 # First call to fetchNext (we don't need to test this part)
105 q <- fetchNext(q, maxFetch = 75, randomizePageOrder = TRUE)
106
107 # Capture output from the subsequent call
108 temp_file <- tempfile()
109 sink(temp_file)
110 q <- fetchNext(q, maxFetch = 50, randomizePageOrder = TRUE)
111 cat("\n")
112 sink()
113
114 # Read the captured output
115 output <- readLines(temp_file)
116 unlink(temp_file)
117
118 # Echo the output to console
119 cat("\nCaptured output from subsequent call with randomized mode:\n")
120 cat(paste(output, collapse = "\n"))
121
122 # Combined output string for all tests
123 output_str <- paste(output, collapse = "\n")
124
125 # Test 1: Check that page numbering format is correct and not negative
126 expect_match(
127 output_str,
128 "Retrieved page [1-9]\\d*/\\d+ \\(actual page \\d+\\)",
129 info = "Randomized page numbering format is incorrect or negative in subsequent call"
130 )
131
Marc Kupietz24799fd2025-06-25 14:15:36 +0200132 # Test 2: Check that either ETA is displayed or no ETA (for single page fetches)
133 # ETA may not be shown if there's only one page to fetch
134 if (grepl("ETA:", output_str)) {
135 expect_match(
136 output_str,
137 "ETA: \\d+s",
138 info = "ETA format should show digits followed by 's' when present"
139 )
Marc Kupietz623d7122025-05-25 12:46:12 +0200140
Marc Kupietz24799fd2025-06-25 14:15:36 +0200141 # Test 3: Check that completion time is shown in parentheses when ETA is present
142 expect_match(
143 output_str,
144 "\\(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\)",
145 info = "Completion time not found when ETA is present"
146 )
147 } else {
148 # If no ETA, just check that timing information is present
149 expect_match(
150 output_str,
151 "in\\s+\\d+\\.\\d+s",
152 info = "Timing information should be present even without ETA"
153 )
154 }
Marc Kupietz623d7122025-05-25 12:46:12 +0200155})