blob: 1ef280807b08698c17415ebb26eec6bb649c7189 [file] [log] [blame]
Marc Kupietz623d7122025-05-25 12:46:12 +02001# Page numbering and ETA tests
2
3test_that("page numbering is displayed correctly in sequential mode", {
4 skip_if_offline()
5 kco <- KorAPConnection(verbose = TRUE, cache = FALSE)
6 q <- kco %>% corpusQuery("Test", "pubDate since 2014", fields = c("sigle"))
7
8 # Capture output - we need to use sink to capture the actual console output
9 temp_file <- tempfile()
10 sink(temp_file)
11 q <- fetchNext(q, maxFetch = 75)
12 cat("\n")
13 sink()
14
15 # Read the captured output
16 output <- readLines(temp_file)
17 unlink(temp_file)
18
19 # Echo the output to console
20 cat("\nCaptured output from sequential mode:\n")
21 cat(paste(output, collapse = "\n"))
22
23 # Combined output string for all tests
24 output_str <- paste(output, collapse = "\n")
25
26 # Test 1: Check page numbering format
27 expect_match(
28 output_str,
29 "Retrieved page .+/\\d+ \\(page \\d+ of \\d+ total\\)",
30 info = "Page numbering format not found in output"
31 )
32
33 # Test 2: Check that ETA is displayed with time values (not "N/A")
34 expect_match(
35 output_str,
36 "ETA: [^N][^/][^A]", # Negative pattern to ensure "N/A" is not in the ETA
37 info = "ETA format is not correct or contains N/A"
38 )
39
40 # Test 3: Check that completion time is shown in parentheses
41 expect_match(
42 output_str,
43 "\\(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\)",
44 info = "Completion time format not found in output"
45 )
46})
47
48test_that("page numbering and ETA are displayed correctly in randomized mode", {
49 skip_if_offline()
50 kco <- KorAPConnection(verbose = TRUE, cache = FALSE)
51 q <- kco %>% corpusQuery("Test", "pubDate since 2014", fields = c("sigle"))
52
53 # Set a fixed seed for reproducible tests
54 set.seed(123)
55
56 # Capture output - we need to use sink to capture the actual console output
57 temp_file <- tempfile()
58 sink(temp_file)
59 q <- fetchNext(q, maxFetch = 75, randomizePageOrder = TRUE)
60 cat("\n")
61 sink()
62
63 # Read the captured output
64 output <- readLines(temp_file)
65 unlink(temp_file)
66
67 # Echo the output to console
68 cat("\nCaptured output from randomized mode:\n")
69 cat(paste(output, collapse = "\n"))
70
71 # Combined output string for all tests
72 output_str <- paste(output, collapse = "\n")
73
74 # Test 1: Check page numbering format in randomized mode
75 expect_match(
76 output_str,
77 "Retrieved page .+/\\d+ \\(actual page \\d+\\)",
78 info = "Randomized page numbering format not found in output"
79 )
80
81 # Test 2: Check that ETA is displayed and doesn't contain "N/A (random order)"
82 expect_match(
83 output_str,
84 "ETA: [^N][^/][^A]", # Ensure "N/A" is not in the ETA
85 info = "ETA format is incorrect or contains N/A"
86 )
87
88 # Test 3: Check that proper time values and completion time are shown
89 expect_match(
90 output_str,
91 "ETA: \\d+s \\(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\)",
92 info = "Time format or completion time not found in output"
93 )
94})
95
96test_that("page numbering and ETA are displayed correctly in subsequent calls with randomized mode", {
97 skip_if_offline()
98 kco <- KorAPConnection(verbose = TRUE, cache = FALSE)
99 q <- kco %>% corpusQuery("Test", "pubDate since 2014", fields = c("sigle"))
100
101 # Set a fixed seed for reproducible tests
102 set.seed(123)
103
104 # First call to fetchNext (we don't need to test this part)
105 q <- fetchNext(q, maxFetch = 75, randomizePageOrder = TRUE)
106
107 # Capture output from the subsequent call
108 temp_file <- tempfile()
109 sink(temp_file)
110 q <- fetchNext(q, maxFetch = 50, randomizePageOrder = TRUE)
111 cat("\n")
112 sink()
113
114 # Read the captured output
115 output <- readLines(temp_file)
116 unlink(temp_file)
117
118 # Echo the output to console
119 cat("\nCaptured output from subsequent call with randomized mode:\n")
120 cat(paste(output, collapse = "\n"))
121
122 # Combined output string for all tests
123 output_str <- paste(output, collapse = "\n")
124
125 # Test 1: Check that page numbering format is correct and not negative
126 expect_match(
127 output_str,
128 "Retrieved page [1-9]\\d*/\\d+ \\(actual page \\d+\\)",
129 info = "Randomized page numbering format is incorrect or negative in subsequent call"
130 )
131
132 # Test 2: Check that ETA is displayed - we're now ensuring it contains digits followed by 's'
133 expect_match(
134 output_str,
135 "ETA: \\d+s",
136 info = "ETA format should show digits followed by 's'"
137 )
138
139 # Test 3: Check that completion time is shown in parentheses
140 expect_match(
141 output_str,
142 "\\(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\)",
143 info = "Completion time not found in subsequent call output"
144 )
145})