Blame - tests/testthat/test-ci.R - KorAP/RKorAPClient

blob: c980470a5aa3c7b2c07a0f0598bb1240bb839803 [file] [log] [blame]

Marc Kupietz	319e746	2025-06-04 17:14:03 +0200	[diff] [blame]	1	test_that("ci function works with basic input", {
				2	# Create a simple test data frame
				3	df <- data.frame(
				4	totalResults = c(100, 200, 50),
				5	total = c(1000, 2000, 500),
				6	query = c("test1", "test2", "test3")
				7	)
				8
				9	result <- ci(df)
				10
				11	expect_s3_class(result, "data.frame")
				12	expect_true("f" %in% names(result))
				13	expect_true("conf.low" %in% names(result))
				14	expect_true("conf.high" %in% names(result))
				15	expect_equal(nrow(result), 3)
				16
				17	# Check that relative frequencies are calculated correctly
				18	expect_equal(result$f[1], 0.1, tolerance = 0.001)
				19	expect_equal(result$f[2], 0.1, tolerance = 0.001)
				20	expect_equal(result$f[3], 0.1, tolerance = 0.001)
				21	})
				22
				23	test_that("ci function handles custom column names", {
				24	# Test with custom column names
				25	df <- data.frame(
				26	observed = c(50, 100),
				27	N_total = c(500, 1000),
				28	condition = c("A", "B")
				29	)
				30
				31	result <- ci(df, x = observed, N = N_total)
				32
				33	expect_s3_class(result, "data.frame")
				34	expect_true("f" %in% names(result))
				35	expect_true("conf.low" %in% names(result))
				36	expect_true("conf.high" %in% names(result))
				37	expect_equal(nrow(result), 2)
				38	expect_equal(result$f[1], 0.1, tolerance = 0.001)
				39	expect_equal(result$f[2], 0.1, tolerance = 0.001)
				40	})
				41
				42	test_that("ci function handles different confidence levels", {
				43	df <- data.frame(
				44	totalResults = c(100),
				45	total = c(1000)
				46	)
				47
				48	# Test 90% confidence level
				49	result_90 <- ci(df, conf.level = 0.90)
				50	expect_s3_class(result_90, "data.frame")
				51	expect_true("f" %in% names(result_90))
				52	expect_true("conf.low" %in% names(result_90))
				53	expect_true("conf.high" %in% names(result_90))
				54
				55	# Test 99% confidence level
				56	result_99 <- ci(df, conf.level = 0.99)
				57	expect_s3_class(result_99, "data.frame")
				58
				59	# 99% CI should be wider than 90% CI
				60	ci_width_90 <- result_90$conf.high[1] - result_90$conf.low[1]
				61	ci_width_99 <- result_99$conf.high[1] - result_99$conf.low[1]
				62	expect_true(ci_width_99 > ci_width_90)
				63	})
				64
				65	test_that("ci function handles zero and negative totals", {
				66	df <- data.frame(
				67	totalResults = c(10, 20, 30),
				68	total = c(100, 0, -10)
				69	)
				70
				71	result <- ci(df)
				72
				73	expect_s3_class(result, "data.frame")
				74	expect_equal(nrow(result), 3)
				75
				76	# First row should have valid values
				77	expect_false(is.na(result$f[1]))
				78	expect_false(is.na(result$conf.low[1]))
				79	expect_false(is.na(result$conf.high[1]))
				80
				81	# Rows with zero or negative totals should have NA values
				82	expect_true(is.na(result$f[2]))
				83	expect_true(is.na(result$conf.low[2]))
				84	expect_true(is.na(result$conf.high[2]))
				85	expect_true(is.na(result$f[3]))
				86	expect_true(is.na(result$conf.low[3]))
				87	expect_true(is.na(result$conf.high[3]))
				88	})
				89
				90	test_that("ci function handles NA values in totals", {
				91	df <- data.frame(
				92	totalResults = c(10, 20, 30),
				93	total = c(100, NA, 300)
				94	)
				95
				96	result <- ci(df)
				97
				98	expect_s3_class(result, "data.frame")
				99	expect_equal(nrow(result), 3)
				100
				101	# First and third rows should have valid values
				102	expect_false(is.na(result$f[1]))
				103	expect_false(is.na(result$f[3]))
				104
				105	# Second row (with NA total) should have NA values
				106	expect_true(is.na(result$f[2]))
				107	expect_true(is.na(result$conf.low[2]))
				108	expect_true(is.na(result$conf.high[2]))
				109	})
				110
				111	test_that("ci function handles edge cases with very small frequencies", {
				112	df <- data.frame(
				113	totalResults = c(1, 0),
				114	total = c(1000000, 1000000)
				115	)
				116
				117	result <- ci(df)
				118
				119	expect_s3_class(result, "data.frame")
				120	expect_equal(nrow(result), 2)
				121
				122	# Check that very small frequencies are handled correctly
				123	expect_true(result$f[1] > 0)
				124	expect_true(result$f[1] < 0.01)
				125	expect_equal(result$f[2], 0)
				126	})
				127
				128	test_that("ci function handles large numbers correctly", {
				129	df <- data.frame(
				130	totalResults = c(1000000),
				131	total = c(10000000)
				132	)
				133
				134	result <- ci(df)
				135
				136	expect_s3_class(result, "data.frame")
				137	expect_equal(nrow(result), 1)
				138	expect_equal(result$f[1], 0.1, tolerance = 0.001)
				139	expect_true(result$conf.low[1] > 0)
				140	expect_true(result$conf.high[1] < 1)
				141	})
				142
				143	test_that("ci function preserves original columns", {
				144	df <- data.frame(
				145	totalResults = c(100, 200),
				146	total = c(1000, 2000),
				147	query = c("test1", "test2"),
				148	condition = c("A", "B"),
				149	year = c(2020, 2021)
				150	)
				151
				152	result <- ci(df)
				153
				154	expect_s3_class(result, "data.frame")
				155	expect_true("query" %in% names(result))
				156	expect_true("condition" %in% names(result))
				157	expect_true("year" %in% names(result))
				158	expect_true("totalResults" %in% names(result))
				159	expect_true("total" %in% names(result))
				160
				161	# Check that original values are preserved
				162	expect_equal(result$query, c("test1", "test2"))
				163	expect_equal(result$condition, c("A", "B"))
				164	expect_equal(result$year, c(2020, 2021))
				165	})
				166
				167	test_that("ci function handles empty data frame", {
				168	df <- data.frame(
				169	totalResults = numeric(0),
				170	total = numeric(0)
				171	)
				172
				173	result <- ci(df)
				174
				175	expect_s3_class(result, "data.frame")
				176	expect_equal(nrow(result), 0)
				177	expect_true("f" %in% names(result))
				178	expect_true("conf.low" %in% names(result))
				179	expect_true("conf.high" %in% names(result))
				180	})
				181
				182	test_that("ci function handles all zero totals", {
				183	df <- data.frame(
				184	totalResults = c(10, 20, 30),
				185	total = c(0, 0, 0)
				186	)
				187
				188	result <- ci(df)
				189
				190	expect_s3_class(result, "data.frame")
				191	expect_equal(nrow(result), 3)
				192
				193	# All rows should have NA values
				194	expect_true(all(is.na(result$f)))
				195	expect_true(all(is.na(result$conf.low)))
				196	expect_true(all(is.na(result$conf.high)))
				197	})
				198
				199	test_that("ci function validates confidence level parameter", {
				200	df <- data.frame(
				201	totalResults = c(100),
				202	total = c(1000)
				203	)
				204
				205	# Test invalid confidence levels
				206	expect_error(ci(df, conf.level = 1.1))
				207	expect_error(ci(df, conf.level = 0))
				208	expect_error(ci(df, conf.level = -0.1))
				209	})
				210
				211	test_that("ci function handles tibble input", {
				212	if (requireNamespace("tibble", quietly = TRUE)) {
				213	df <- tibble::tibble(
				214	totalResults = c(100, 200),
				215	total = c(1000, 2000),
				216	query = c("test1", "test2")
				217	)
				218
				219	result <- ci(df)
				220
				221	expect_s3_class(result, "tbl_df")
				222	expect_true("f" %in% names(result))
				223	expect_true("conf.low" %in% names(result))
				224	expect_true("conf.high" %in% names(result))
				225	expect_equal(nrow(result), 2)
				226	}
				227	})
				228
				229	test_that("ci function confidence intervals are reasonable", {
				230	# Test with a known case
				231	df <- data.frame(
				232	totalResults = c(50), # 50 out of 100 = 50%
				233	total = c(100)
				234	)
				235
				236	result <- ci(df, conf.level = 0.95)
				237
				238	expect_s3_class(result, "data.frame")
				239	expect_equal(result$f[1], 0.5, tolerance = 0.001)
				240
				241	# For 50% with n=100, 95% CI should be roughly symmetric around 0.5
				242	expect_true(result$conf.low[1] < 0.5)
				243	expect_true(result$conf.high[1] > 0.5)
				244
				245	# CI should be reasonable width (not too narrow or too wide)
				246	ci_width <- result$conf.high[1] - result$conf.low[1]
				247	expect_true(ci_width > 0.05) # Not too narrow
				248	expect_true(ci_width < 0.5) # Not too wide
				249	})
				250
				251	test_that("ci function works with mixed valid and invalid data", {
				252	df <- data.frame(
				253	totalResults = c(100, 200, 50, 75),
				254	total = c(1000, 0, NA, 500),
				255	condition = c("A", "B", "C", "D")
				256	)
				257
				258	result <- ci(df)
				259
				260	expect_s3_class(result, "data.frame")
				261	expect_equal(nrow(result), 4)
				262
				263	# First and fourth rows should have valid values
				264	expect_false(is.na(result$f[1]))
				265	expect_false(is.na(result$f[4]))
				266
				267	# Second and third rows should have NA values
				268	expect_true(is.na(result$f[2]))
				269	expect_true(is.na(result$f[3]))
				270
				271	# Check that valid calculations are correct
				272	expect_equal(result$f[1], 0.1, tolerance = 0.001)
				273	expect_equal(result$f[4], 0.15, tolerance = 0.001)
				274	})
				275
				276	test_that("ci function preserves row order with mixed valid/invalid data", {
				277	# Test data with alternating valid and invalid rows
				278	df <- data.frame(
				279	totalResults = c(100, 0, 200, NA, 50),
				280	total = c(1000, 0, 2000, 1500, 500),
				281	query = c("first", "second", "third", "fourth", "fifth"),
				282	stringsAsFactors = FALSE
				283	)
				284
				285	result <- ci(df)
				286
				287	# Check that the order is preserved
				288	expect_equal(result$query, c("first", "second", "third", "fourth", "fifth"))
				289
				290	# Check that valid rows have computed values
				291	expect_false(is.na(result$f[1])) # first row should have valid f
				292	expect_false(is.na(result$f[3])) # third row should have valid f
				293	expect_false(is.na(result$f[5])) # fifth row should have valid f
				294
				295	# Check that invalid rows have NA values
				296	expect_true(is.na(result$f[2])) # second row (total = 0)
				297	expect_true(is.na(result$f[4])) # fourth row (total = NA)
				298
				299	expect_true(is.na(result$conf.low[2]))
				300	expect_true(is.na(result$conf.high[2]))
				301	expect_true(is.na(result$conf.low[4]))
				302	expect_true(is.na(result$conf.high[4]))
				303	})