Blame - tests/testthat/test-readme-against-llm.R - KorAP/RKorAPClient

blob: 842a0d5509e7fb63b4e55f7c9cb68fcafd985def [file] [log] [blame]

Marc Kupietz	0614370	2025-07-05 17:49:31 +0200	[diff] [blame^]	1	# Helper function to find README.md file in current or parent directories
				2	find_readme_path <- function() {
				3	readme_paths <- c("Readme.md", "../Readme.md", "../../Readme.md")
				4	for (path in readme_paths) {
				5	if (file.exists(path)) {
				6	return(path)
				7	}
				8	}
				9	return(NULL)
				10	}
				11
				12	# Helper function to read README content
				13	read_readme_content <- function() {
				14	readme_path <- find_readme_path()
				15	if (is.null(readme_path)) {
				16	return(NULL)
				17	}
				18	readme_content <- readLines(readme_path)
				19	paste(readme_content, collapse = "\n")
				20	}
				21
				22	# Helper function to call OpenAI API
				23	call_openai_api <- function(prompt, max_tokens = 500, temperature = 0.1) {
				24	library(httr2)
				25	library(jsonlite)
				26
				27	tryCatch({
				28	response <- request("https://api.openai.com/v1/chat/completions") \|>
				29	req_headers(
				30	"Authorization" = paste("Bearer", Sys.getenv("OPENAI_API_KEY")),
				31	"Content-Type" = "application/json"
				32	) \|>
				33	req_body_json(list(
				34	model = "gpt-4.1-mini",
				35	messages = list(
				36	list(role = "user", content = prompt)
				37	),
				38	max_tokens = max_tokens,
				39	temperature = temperature
				40	)) \|>
				41	req_retry(max_tries = 3) \|>
				42	req_perform()
				43
				44	# Parse the response
				45	result <- response \|> resp_body_json()
				46	result$choices[[1]]$message$content
				47	}, error = function(e) {
				48	if (grepl("429", as.character(e))) {
				49	skip("OpenAI API rate limit exceeded - please try again later or check your API key/credits")
				50	} else if (grepl("401", as.character(e))) {
				51	skip("OpenAI API authentication failed - please check your OPENAI_API_KEY")
				52	} else {
				53	stop(paste("OpenAI API error:", as.character(e)))
				54	}
				55	})
				56	}
				57
				58	# Helper function to create README-guided prompt
				59	create_readme_prompt <- function(task_description, specific_task) {
				60	readme_text <- read_readme_content()
				61	if (is.null(readme_text)) {
				62	stop("README.md not found")
				63	}
				64
				65	paste0(
				66	"You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
				67	task_description, "\n\n",
				68	"README Documentation:\n",
				69	readme_text,
				70	"\n\nTask: ", specific_task,
				71	"\n\nProvide only the R code without explanations."
				72	)
				73	}
				74
				75	# Helper function to extract R code from markdown code blocks
				76	extract_r_code <- function(response_text) {
				77	# Remove markdown code blocks if present
				78	code <- gsub("```[rR]?\\n?", "", response_text)
				79	code <- gsub("```\\n?$", "", code)
				80	# Remove leading/trailing whitespace
				81	trimws(code)
				82	}
				83
				84	test_that("GPT-4.1 mini can solve frequency query task with README guidance", {
				85	skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")), "OPENAI_API_KEY not set")
				86	skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
				87
				88	# Create the prompt with README context and task
				89	prompt <- create_readme_prompt(
				90	"write R code to perform a frequency query for the word 'Deutschland' across multiple years (2010-2015). The code should use the RKorAPClient package and return a data frame with year and frequency columns.",
				91	"Write R code to query frequency of 'Deutschland' from 2010-2015 using RKorAPClient."
				92	)
				93
				94	# Call OpenAI API
				95	generated_response <- call_openai_api(prompt, max_tokens = 500)
				96	generated_code <- extract_r_code(generated_response)
				97
				98	# Basic checks on the generated code
				99	expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
				100	expect_true(grepl("frequencyQuery", generated_code), "Generated code should include frequencyQuery")
				101	expect_true(grepl("Deutschland", generated_code), "Generated code should include the search term 'Deutschland'")
				102	expect_true(grepl("201[0-5]", generated_code), "Generated code should include years 2010-2015")
				103
				104	# Check that the generated code contains essential RKorAPClient patterns
				105	expect_true(grepl("\\\|>", generated_code) \|\| grepl("%>%", generated_code),
				106	"Generated code should use pipe operators")
				107
				108	# Optional: Try to parse the generated code to check for syntax errors
				109	parsed_successfully <- tryCatch({
				110	parse(text = generated_code)
				111	TRUE
				112	}, error = function(e) {
				113	FALSE
				114	})
				115
				116	expect_true(parsed_successfully, "Generated code should be syntactically valid R code")
				117
				118	# Print the generated code for manual inspection
				119	cat("Generated code:\n", generated_code, "\n")
				120	})
				121
				122	test_that("GPT-4.1 mini can solve collocation analysis task with README guidance", {
				123	skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")), "OPENAI_API_KEY not set")
				124	skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
				125
				126	# Create the prompt for collocation analysis
				127	prompt <- create_readme_prompt(
				128	"write R code to perform a collocation analysis for the word 'setzen' (looking for light verb constructions). The code should use the RKorAPClient package's collocationAnalysis function.",
				129	"Write R code to perform collocation analysis for 'setzen' using RKorAPClient."
				130	)
				131
				132	# Call OpenAI API
				133	generated_response <- call_openai_api(prompt, max_tokens = 500)
				134	generated_code <- extract_r_code(generated_response)
				135
				136	# Basic checks on the generated code
				137	expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
				138	expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
				139	expect_true(grepl("setzen", generated_code), "Generated code should include the search term 'setzen'")
				140	expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
				141
				142	# Check for collocation analysis parameters
				143	expect_true(grepl("leftContextSize\|rightContextSize", generated_code),
				144	"Generated code should include context size parameters")
				145
				146	# Print the generated code for manual inspection
				147	cat("Generated collocation analysis code:\n", generated_code, "\n")
				148	})
				149
				150	test_that("GPT-4.1 mini can solve corpus query task with README guidance", {
				151	skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")), "OPENAI_API_KEY not set")
				152	skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
				153
				154	# Create the prompt for corpus query
				155	prompt <- create_readme_prompt(
				156	"write R code to perform a simple corpus query for 'Hello world' and fetch all results. The code should use the RKorAPClient package.",
				157	"Write R code to query 'Hello world' and fetch all results using RKorAPClient."
				158	)
				159
				160	# Call OpenAI API
				161	generated_response <- call_openai_api(prompt, max_tokens = 300)
				162	generated_code <- extract_r_code(generated_response)
				163
				164	# Basic checks on the generated code
				165	expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
				166	expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
				167	expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
				168	expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
				169
				170	# Check that the generated code follows the README example pattern
				171	expect_true(grepl("\\\|>", generated_code) \|\| grepl("%>%", generated_code),
				172	"Generated code should use pipe operators")
				173
				174	# Print the generated code for manual inspection
				175	cat("Generated corpus query code:\n", generated_code, "\n")
				176	})