Use English Wikipedia instance for ca doc test
Change-Id: I811d9d503fdc3dc4c69c4e3f34535c8d1b2d1ef0
diff --git a/tests/testthat/test-readme-against-llm.R b/tests/testthat/test-readme-against-llm.R
index fbac04b..b77b93f 100644
--- a/tests/testthat/test-readme-against-llm.R
+++ b/tests/testthat/test-readme-against-llm.R
@@ -2,10 +2,12 @@
# Helper function to skip if no API keys are available
skip_if_no_api_key <- function() {
- skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")) ||
- nzchar(Sys.getenv("ANTHROPIC_API_KEY")) ||
- nzchar(Sys.getenv("GOOGLE_API_KEY")),
- "No API keys found (need OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
+ skip_if_not(
+ nzchar(Sys.getenv("OPENAI_API_KEY")) ||
+ nzchar(Sys.getenv("ANTHROPIC_API_KEY")) ||
+ nzchar(Sys.getenv("GOOGLE_API_KEY")),
+ "No API keys found (need OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)"
+ )
}
# Helper function to find README.md file in current or parent directories
@@ -26,55 +28,73 @@
return(NULL)
}
readme_content <- readLines(readme_path)
+
+ # Find the line with "## Installation" and truncate before it
+ installation_line <- grep("^## Installation", readme_content, ignore.case = TRUE)
+ if (length(installation_line) > 0) {
+ readme_content <- readme_content[1:(installation_line[1] - 1)]
+ }
+
paste(readme_content, collapse = "\n")
}
# Helper function to call LLM API using tidyllm
call_llm_api <- function(prompt, max_tokens = 500, temperature = 0.1, model = LLM_MODEL) {
- tryCatch({
- # Determine the provider based on model name
- if (grepl("^gpt-", model, ignore.case = TRUE)) {
- provider <- openai()
- } else if (grepl("^claude-", model, ignore.case = TRUE)) {
- provider <- claude()
- } else if (grepl("^gemini-", model, ignore.case = TRUE)) {
- # Debug Gemini API key
- provider <- gemini()
- } else {
- stop(paste("Unsupported model:", model, "- supported prefixes: gpt-, claude-, gemini-"))
- }
+ cat("Calling LLM API with model:", model, "\n")
+ # Only print prompt up to the beginning of README content
+ readme_start <- regexpr("README Documentation:", prompt, fixed = TRUE)
+ if (readme_start > 0) {
+ prompt_preview <- substr(prompt, 1, readme_start - 1)
+ cat("Prompt (up to README):\n", prompt_preview, "\n")
+ } else {
+ cat("Prompt:\n", prompt, "\n")
+ }
+ tryCatch(
+ {
+ # Determine the provider based on model name
+ if (grepl("^gpt-", model, ignore.case = TRUE)) {
+ provider <- openai()
+ } else if (grepl("^claude-", model, ignore.case = TRUE)) {
+ provider <- claude()
+ } else if (grepl("^gemini-", model, ignore.case = TRUE)) {
+ # Debug Gemini API key
+ provider <- gemini()
+ } else {
+ stop(paste("Unsupported model:", model, "- supported prefixes: gpt-, claude-, gemini-"))
+ }
- # Use tidyllm unified API
- result <- llm_message(prompt) |>
- chat(
- .provider = provider,
- .model = model,
- .temperature = temperature,
- .max_tries = 3
- )
+ # Use tidyllm unified API
+ result <- llm_message(prompt) |>
+ chat(
+ .provider = provider,
+ .model = model,
+ .temperature = temperature,
+ .max_tries = 3
+ )
- # Extract the reply text
- get_reply(result)
- }, error = function(e) {
- if (grepl("429", as.character(e))) {
- skip("LLM API rate limit exceeded - please try again later or check your API key/credits")
- } else if (grepl("401", as.character(e))) {
- skip("LLM API authentication failed - please check your API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
- } else {
- stop(paste("LLM API error:", as.character(e)))
+ # Extract the reply text
+ get_reply(result)
+ },
+ error = function(e) {
+ if (grepl("429", as.character(e))) {
+ skip("LLM API rate limit exceeded - please try again later or check your API key/credits")
+ } else if (grepl("401", as.character(e))) {
+ skip("LLM API authentication failed - please check your API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
+ } else {
+ stop(paste("LLM API error:", as.character(e)))
+ }
}
- })
+ )
}
# Configuration variables
-#LLM_MODEL <- "gpt-4o-mini" # OpenAI model option
-LLM_MODEL <- "claude-3-5-sonnet-latest" # Claude model option
-#LLM_MODEL <- "claude-3-7-sonnet-latest" # Claude model option
-#LLM_MODEL <- "claude-sonnet-4-0" # Claude model option
-#LLM_MODEL <- "gemini-2.5-pro" # Google Gemini model option
-#LLM_MODEL <- "gemini-1.5-pro" # Google Gemini model option
-#LLM_MODEL <- "gemini-2.5-flash" # Google Gemini model option (faster)
-KORAP_URL <- "https://korap.ids-mannheim.de/instance/wiki"
+# LLM_MODEL <- "gpt-4o-mini" # OpenAI model option
+# LLM_MODEL <- "claude-3-5-sonnet-latest" # Claude model option
+# LLM_MODEL <- "claude-3-7-sonnet-latest" # Claude model option
+# LLM_MODEL <- "claude-sonnet-4-0" # Claude model option
+LLM_MODEL <- "gemini-2.5-pro" # Google Gemini model option
+# LLM_MODEL <- "gemini-1.5-pro" # Google Gemini model option
+# LLM_MODEL <- "gemini-2.5-flash" # Google Gemini model option (faster)
# Helper function to create README-guided prompt
create_readme_prompt <- function(task_description, specific_task) {
@@ -86,7 +106,6 @@
paste0(
"You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
task_description, "\n\n",
- "IMPORTANT: Use the KorAP URL '", KORAP_URL, "' as the 1st parameter (KorAPUrl) in KorAPConnection.\n\n",
"README Documentation:\n",
readme_text,
"\n\nTask: ", specific_task,
@@ -105,37 +124,43 @@
# Helper function to test code syntax
test_code_syntax <- function(code) {
- tryCatch({
- parse(text = code)
- TRUE
- }, error = function(e) {
- cat("Syntax error:", as.character(e), "\n")
- FALSE
- })
+ tryCatch(
+ {
+ parse(text = code)
+ TRUE
+ },
+ error = function(e) {
+ cat("Syntax error:", as.character(e), "\n")
+ FALSE
+ }
+ )
}
# Helper function to run code if RUN_LLM_CODE is set
run_code_if_enabled <- function(code, test_name) {
if (nzchar(Sys.getenv("RUN_LLM_CODE")) && Sys.getenv("RUN_LLM_CODE") == "true") {
cat("Running generated code for", test_name, "...\n")
- tryCatch({
- result <- eval(parse(text = code))
- cat("Code executed successfully. Result type:", class(result), "\n")
- if (is.data.frame(result)) {
- cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
- if (nrow(result) > 0) {
- cat("First few rows:\n")
- print(head(result, 3))
+ tryCatch(
+ {
+ result <- eval(parse(text = code))
+ cat("Code executed successfully. Result type:", class(result), "\n")
+ if (is.data.frame(result)) {
+ cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
+ if (nrow(result) > 0) {
+ cat("First few rows:\n")
+ print(head(result, 3))
+ }
+ } else {
+ cat("Result preview:\n")
+ print(result)
}
- } else {
- cat("Result preview:\n")
- print(result)
+ return(TRUE)
+ },
+ error = function(e) {
+ cat("Runtime error:", as.character(e), "\n")
+ return(FALSE)
}
- return(TRUE)
- }, error = function(e) {
- cat("Runtime error:", as.character(e), "\n")
- return(FALSE)
- })
+ )
} else {
cat("Skipping code execution (set RUN_LLM_CODE=true to enable)\n")
return(NA)
@@ -145,10 +170,10 @@
test_that(paste(LLM_MODEL, "can solve frequency query task with README guidance"), {
# Skip if offline
skip_if_offline()
-
+
# Skip if no API keys are set
skip_if_no_api_key()
-
+
# Check for README file
skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
@@ -168,8 +193,7 @@
expect_true(grepl("Demokratie", generated_code), "Generated code should include the search term 'Demokratie'")
last_year <- as.numeric(format(Sys.Date(), "%Y")) - 1
- expect_true(grepl("Date in ", generated_code), "Generated code should vc restriction on years")
- expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
+ expect_true(grepl("Date in", generated_code), "Generated code should vc restriction on years")
# Check that the generated code contains essential RKorAPClient patterns
# expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code), "Generated code should use pipe operators")
@@ -192,17 +216,18 @@
test_that(paste(LLM_MODEL, "can solve collocation analysis task with README guidance"), {
# Skip if offline
skip_if_offline()
-
+
# Skip if no API keys are set
skip_if_no_api_key()
-
+
# Check for README file
skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
# Create the prompt for collocation analysis
prompt <- create_readme_prompt(
- "write R code to perform a collocation analysis for the lemma 'setzen'. The code should use the RKorAPClient package's collocationAnalysis function.",
- "Write R code to perform collocation analysis for 'setzen' using RKorAPClient."
+ paste("Write R code to perform a collocation analysis for the lemma 'leverage' based on the current English Wikipedia Corpus using default parameters", "and show the three highest collocates according to their log dice score.
+"),
+ "Write R code to perform collocation analysis for lemma 'leverage' using RKorAPClient."
)
# Call LLM API
@@ -212,9 +237,9 @@
# Basic checks on the generated code
expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
- expect_true(grepl("setzen", generated_code), "Generated code should include the search term 'setzen'")
+ expect_true(grepl("tt/l=leverage", generated_code), "Generated code should include the search the lemma 'leverage'")
# expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
- expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
+ expect_true(grepl("instance/english", generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
# Test code syntax
syntax_valid <- test_code_syntax(generated_code)
@@ -233,10 +258,10 @@
test_that(paste(LLM_MODEL, "can solve corpus query task with README guidance"), {
# Skip if offline
skip_if_offline()
-
+
# Skip if no API keys are set
skip_if_no_api_key()
-
+
# Check for README file
skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
@@ -255,11 +280,12 @@
expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
- expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
# Check that the generated code follows the README example pattern
- expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
- "Generated code should use pipe operators")
+ expect_true(
+ grepl("\\|>", generated_code) || grepl("%>%", generated_code),
+ "Generated code should use pipe operators"
+ )
# Test code syntax
syntax_valid <- test_code_syntax(generated_code)