blob: c980470a5aa3c7b2c07a0f0598bb1240bb839803 [file] [log] [blame]
Marc Kupietz319e7462025-06-04 17:14:03 +02001test_that("ci function works with basic input", {
2 # Create a simple test data frame
3 df <- data.frame(
4 totalResults = c(100, 200, 50),
5 total = c(1000, 2000, 500),
6 query = c("test1", "test2", "test3")
7 )
8
9 result <- ci(df)
10
11 expect_s3_class(result, "data.frame")
12 expect_true("f" %in% names(result))
13 expect_true("conf.low" %in% names(result))
14 expect_true("conf.high" %in% names(result))
15 expect_equal(nrow(result), 3)
16
17 # Check that relative frequencies are calculated correctly
18 expect_equal(result$f[1], 0.1, tolerance = 0.001)
19 expect_equal(result$f[2], 0.1, tolerance = 0.001)
20 expect_equal(result$f[3], 0.1, tolerance = 0.001)
21})
22
23test_that("ci function handles custom column names", {
24 # Test with custom column names
25 df <- data.frame(
26 observed = c(50, 100),
27 N_total = c(500, 1000),
28 condition = c("A", "B")
29 )
30
31 result <- ci(df, x = observed, N = N_total)
32
33 expect_s3_class(result, "data.frame")
34 expect_true("f" %in% names(result))
35 expect_true("conf.low" %in% names(result))
36 expect_true("conf.high" %in% names(result))
37 expect_equal(nrow(result), 2)
38 expect_equal(result$f[1], 0.1, tolerance = 0.001)
39 expect_equal(result$f[2], 0.1, tolerance = 0.001)
40})
41
42test_that("ci function handles different confidence levels", {
43 df <- data.frame(
44 totalResults = c(100),
45 total = c(1000)
46 )
47
48 # Test 90% confidence level
49 result_90 <- ci(df, conf.level = 0.90)
50 expect_s3_class(result_90, "data.frame")
51 expect_true("f" %in% names(result_90))
52 expect_true("conf.low" %in% names(result_90))
53 expect_true("conf.high" %in% names(result_90))
54
55 # Test 99% confidence level
56 result_99 <- ci(df, conf.level = 0.99)
57 expect_s3_class(result_99, "data.frame")
58
59 # 99% CI should be wider than 90% CI
60 ci_width_90 <- result_90$conf.high[1] - result_90$conf.low[1]
61 ci_width_99 <- result_99$conf.high[1] - result_99$conf.low[1]
62 expect_true(ci_width_99 > ci_width_90)
63})
64
65test_that("ci function handles zero and negative totals", {
66 df <- data.frame(
67 totalResults = c(10, 20, 30),
68 total = c(100, 0, -10)
69 )
70
71 result <- ci(df)
72
73 expect_s3_class(result, "data.frame")
74 expect_equal(nrow(result), 3)
75
76 # First row should have valid values
77 expect_false(is.na(result$f[1]))
78 expect_false(is.na(result$conf.low[1]))
79 expect_false(is.na(result$conf.high[1]))
80
81 # Rows with zero or negative totals should have NA values
82 expect_true(is.na(result$f[2]))
83 expect_true(is.na(result$conf.low[2]))
84 expect_true(is.na(result$conf.high[2]))
85 expect_true(is.na(result$f[3]))
86 expect_true(is.na(result$conf.low[3]))
87 expect_true(is.na(result$conf.high[3]))
88})
89
90test_that("ci function handles NA values in totals", {
91 df <- data.frame(
92 totalResults = c(10, 20, 30),
93 total = c(100, NA, 300)
94 )
95
96 result <- ci(df)
97
98 expect_s3_class(result, "data.frame")
99 expect_equal(nrow(result), 3)
100
101 # First and third rows should have valid values
102 expect_false(is.na(result$f[1]))
103 expect_false(is.na(result$f[3]))
104
105 # Second row (with NA total) should have NA values
106 expect_true(is.na(result$f[2]))
107 expect_true(is.na(result$conf.low[2]))
108 expect_true(is.na(result$conf.high[2]))
109})
110
111test_that("ci function handles edge cases with very small frequencies", {
112 df <- data.frame(
113 totalResults = c(1, 0),
114 total = c(1000000, 1000000)
115 )
116
117 result <- ci(df)
118
119 expect_s3_class(result, "data.frame")
120 expect_equal(nrow(result), 2)
121
122 # Check that very small frequencies are handled correctly
123 expect_true(result$f[1] > 0)
124 expect_true(result$f[1] < 0.01)
125 expect_equal(result$f[2], 0)
126})
127
128test_that("ci function handles large numbers correctly", {
129 df <- data.frame(
130 totalResults = c(1000000),
131 total = c(10000000)
132 )
133
134 result <- ci(df)
135
136 expect_s3_class(result, "data.frame")
137 expect_equal(nrow(result), 1)
138 expect_equal(result$f[1], 0.1, tolerance = 0.001)
139 expect_true(result$conf.low[1] > 0)
140 expect_true(result$conf.high[1] < 1)
141})
142
143test_that("ci function preserves original columns", {
144 df <- data.frame(
145 totalResults = c(100, 200),
146 total = c(1000, 2000),
147 query = c("test1", "test2"),
148 condition = c("A", "B"),
149 year = c(2020, 2021)
150 )
151
152 result <- ci(df)
153
154 expect_s3_class(result, "data.frame")
155 expect_true("query" %in% names(result))
156 expect_true("condition" %in% names(result))
157 expect_true("year" %in% names(result))
158 expect_true("totalResults" %in% names(result))
159 expect_true("total" %in% names(result))
160
161 # Check that original values are preserved
162 expect_equal(result$query, c("test1", "test2"))
163 expect_equal(result$condition, c("A", "B"))
164 expect_equal(result$year, c(2020, 2021))
165})
166
167test_that("ci function handles empty data frame", {
168 df <- data.frame(
169 totalResults = numeric(0),
170 total = numeric(0)
171 )
172
173 result <- ci(df)
174
175 expect_s3_class(result, "data.frame")
176 expect_equal(nrow(result), 0)
177 expect_true("f" %in% names(result))
178 expect_true("conf.low" %in% names(result))
179 expect_true("conf.high" %in% names(result))
180})
181
182test_that("ci function handles all zero totals", {
183 df <- data.frame(
184 totalResults = c(10, 20, 30),
185 total = c(0, 0, 0)
186 )
187
188 result <- ci(df)
189
190 expect_s3_class(result, "data.frame")
191 expect_equal(nrow(result), 3)
192
193 # All rows should have NA values
194 expect_true(all(is.na(result$f)))
195 expect_true(all(is.na(result$conf.low)))
196 expect_true(all(is.na(result$conf.high)))
197})
198
199test_that("ci function validates confidence level parameter", {
200 df <- data.frame(
201 totalResults = c(100),
202 total = c(1000)
203 )
204
205 # Test invalid confidence levels
206 expect_error(ci(df, conf.level = 1.1))
207 expect_error(ci(df, conf.level = 0))
208 expect_error(ci(df, conf.level = -0.1))
209})
210
211test_that("ci function handles tibble input", {
212 if (requireNamespace("tibble", quietly = TRUE)) {
213 df <- tibble::tibble(
214 totalResults = c(100, 200),
215 total = c(1000, 2000),
216 query = c("test1", "test2")
217 )
218
219 result <- ci(df)
220
221 expect_s3_class(result, "tbl_df")
222 expect_true("f" %in% names(result))
223 expect_true("conf.low" %in% names(result))
224 expect_true("conf.high" %in% names(result))
225 expect_equal(nrow(result), 2)
226 }
227})
228
229test_that("ci function confidence intervals are reasonable", {
230 # Test with a known case
231 df <- data.frame(
232 totalResults = c(50), # 50 out of 100 = 50%
233 total = c(100)
234 )
235
236 result <- ci(df, conf.level = 0.95)
237
238 expect_s3_class(result, "data.frame")
239 expect_equal(result$f[1], 0.5, tolerance = 0.001)
240
241 # For 50% with n=100, 95% CI should be roughly symmetric around 0.5
242 expect_true(result$conf.low[1] < 0.5)
243 expect_true(result$conf.high[1] > 0.5)
244
245 # CI should be reasonable width (not too narrow or too wide)
246 ci_width <- result$conf.high[1] - result$conf.low[1]
247 expect_true(ci_width > 0.05) # Not too narrow
248 expect_true(ci_width < 0.5) # Not too wide
249})
250
251test_that("ci function works with mixed valid and invalid data", {
252 df <- data.frame(
253 totalResults = c(100, 200, 50, 75),
254 total = c(1000, 0, NA, 500),
255 condition = c("A", "B", "C", "D")
256 )
257
258 result <- ci(df)
259
260 expect_s3_class(result, "data.frame")
261 expect_equal(nrow(result), 4)
262
263 # First and fourth rows should have valid values
264 expect_false(is.na(result$f[1]))
265 expect_false(is.na(result$f[4]))
266
267 # Second and third rows should have NA values
268 expect_true(is.na(result$f[2]))
269 expect_true(is.na(result$f[3]))
270
271 # Check that valid calculations are correct
272 expect_equal(result$f[1], 0.1, tolerance = 0.001)
273 expect_equal(result$f[4], 0.15, tolerance = 0.001)
274})
275
276test_that("ci function preserves row order with mixed valid/invalid data", {
277 # Test data with alternating valid and invalid rows
278 df <- data.frame(
279 totalResults = c(100, 0, 200, NA, 50),
280 total = c(1000, 0, 2000, 1500, 500),
281 query = c("first", "second", "third", "fourth", "fifth"),
282 stringsAsFactors = FALSE
283 )
284
285 result <- ci(df)
286
287 # Check that the order is preserved
288 expect_equal(result$query, c("first", "second", "third", "fourth", "fifth"))
289
290 # Check that valid rows have computed values
291 expect_false(is.na(result$f[1])) # first row should have valid f
292 expect_false(is.na(result$f[3])) # third row should have valid f
293 expect_false(is.na(result$f[5])) # fifth row should have valid f
294
295 # Check that invalid rows have NA values
296 expect_true(is.na(result$f[2])) # second row (total = 0)
297 expect_true(is.na(result$f[4])) # fourth row (total = NA)
298
299 expect_true(is.na(result$conf.low[2]))
300 expect_true(is.na(result$conf.high[2]))
301 expect_true(is.na(result$conf.low[4]))
302 expect_true(is.na(result$conf.high[4]))
303})