blob: b77b93f658bf08dbd171c86188a7bec93434b386 [file] [log] [blame]
Marc Kupietza28a99a2025-07-06 14:52:47 +02001library(tidyllm)
2
Marc Kupietzc9cb6772025-07-06 15:55:00 +02003# Helper function to skip if no API keys are available
4skip_if_no_api_key <- function() {
Marc Kupietz2deadd82025-07-09 08:53:33 +02005 skip_if_not(
6 nzchar(Sys.getenv("OPENAI_API_KEY")) ||
7 nzchar(Sys.getenv("ANTHROPIC_API_KEY")) ||
8 nzchar(Sys.getenv("GOOGLE_API_KEY")),
9 "No API keys found (need OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)"
10 )
Marc Kupietzc9cb6772025-07-06 15:55:00 +020011}
12
Marc Kupietz06143702025-07-05 17:49:31 +020013# Helper function to find README.md file in current or parent directories
14find_readme_path <- function() {
15 readme_paths <- c("Readme.md", "../Readme.md", "../../Readme.md")
16 for (path in readme_paths) {
17 if (file.exists(path)) {
18 return(path)
19 }
20 }
21 return(NULL)
22}
23
24# Helper function to read README content
25read_readme_content <- function() {
26 readme_path <- find_readme_path()
27 if (is.null(readme_path)) {
28 return(NULL)
29 }
30 readme_content <- readLines(readme_path)
Marc Kupietz2deadd82025-07-09 08:53:33 +020031
32 # Find the line with "## Installation" and truncate before it
33 installation_line <- grep("^## Installation", readme_content, ignore.case = TRUE)
34 if (length(installation_line) > 0) {
35 readme_content <- readme_content[1:(installation_line[1] - 1)]
36 }
37
Marc Kupietz06143702025-07-05 17:49:31 +020038 paste(readme_content, collapse = "\n")
39}
40
Marc Kupietz345211a2025-07-06 12:52:24 +020041# Helper function to call LLM API using tidyllm
42call_llm_api <- function(prompt, max_tokens = 500, temperature = 0.1, model = LLM_MODEL) {
Marc Kupietz2deadd82025-07-09 08:53:33 +020043 cat("Calling LLM API with model:", model, "\n")
44 # Only print prompt up to the beginning of README content
45 readme_start <- regexpr("README Documentation:", prompt, fixed = TRUE)
46 if (readme_start > 0) {
47 prompt_preview <- substr(prompt, 1, readme_start - 1)
48 cat("Prompt (up to README):\n", prompt_preview, "\n")
49 } else {
50 cat("Prompt:\n", prompt, "\n")
51 }
52 tryCatch(
53 {
54 # Determine the provider based on model name
55 if (grepl("^gpt-", model, ignore.case = TRUE)) {
56 provider <- openai()
57 } else if (grepl("^claude-", model, ignore.case = TRUE)) {
58 provider <- claude()
59 } else if (grepl("^gemini-", model, ignore.case = TRUE)) {
60 # Debug Gemini API key
61 provider <- gemini()
62 } else {
63 stop(paste("Unsupported model:", model, "- supported prefixes: gpt-, claude-, gemini-"))
64 }
Marc Kupietz06143702025-07-05 17:49:31 +020065
Marc Kupietz2deadd82025-07-09 08:53:33 +020066 # Use tidyllm unified API
67 result <- llm_message(prompt) |>
68 chat(
69 .provider = provider,
70 .model = model,
71 .temperature = temperature,
72 .max_tries = 3
73 )
Marc Kupietz345211a2025-07-06 12:52:24 +020074
Marc Kupietz2deadd82025-07-09 08:53:33 +020075 # Extract the reply text
76 get_reply(result)
77 },
78 error = function(e) {
79 if (grepl("429", as.character(e))) {
80 skip("LLM API rate limit exceeded - please try again later or check your API key/credits")
81 } else if (grepl("401", as.character(e))) {
82 skip("LLM API authentication failed - please check your API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
83 } else {
84 stop(paste("LLM API error:", as.character(e)))
85 }
Marc Kupietz06143702025-07-05 17:49:31 +020086 }
Marc Kupietz2deadd82025-07-09 08:53:33 +020087 )
Marc Kupietz06143702025-07-05 17:49:31 +020088}
89
Marc Kupietz8e1b77d2025-07-05 20:21:59 +020090# Configuration variables
Marc Kupietz2deadd82025-07-09 08:53:33 +020091# LLM_MODEL <- "gpt-4o-mini" # OpenAI model option
92# LLM_MODEL <- "claude-3-5-sonnet-latest" # Claude model option
93# LLM_MODEL <- "claude-3-7-sonnet-latest" # Claude model option
94# LLM_MODEL <- "claude-sonnet-4-0" # Claude model option
95LLM_MODEL <- "gemini-2.5-pro" # Google Gemini model option
96# LLM_MODEL <- "gemini-1.5-pro" # Google Gemini model option
97# LLM_MODEL <- "gemini-2.5-flash" # Google Gemini model option (faster)
Marc Kupietz10dcfee2025-07-05 19:13:27 +020098
Marc Kupietz06143702025-07-05 17:49:31 +020099# Helper function to create README-guided prompt
100create_readme_prompt <- function(task_description, specific_task) {
101 readme_text <- read_readme_content()
102 if (is.null(readme_text)) {
103 stop("README.md not found")
104 }
105
106 paste0(
107 "You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
108 task_description, "\n\n",
109 "README Documentation:\n",
110 readme_text,
111 "\n\nTask: ", specific_task,
112 "\n\nProvide only the R code without explanations."
113 )
114}
115
116# Helper function to extract R code from markdown code blocks
117extract_r_code <- function(response_text) {
118 # Remove markdown code blocks if present
119 code <- gsub("```[rR]?\\n?", "", response_text)
120 code <- gsub("```\\n?$", "", code)
121 # Remove leading/trailing whitespace
122 trimws(code)
123}
124
Marc Kupietze759b342025-07-05 19:48:20 +0200125# Helper function to test code syntax
126test_code_syntax <- function(code) {
Marc Kupietz2deadd82025-07-09 08:53:33 +0200127 tryCatch(
128 {
129 parse(text = code)
130 TRUE
131 },
132 error = function(e) {
133 cat("Syntax error:", as.character(e), "\n")
134 FALSE
135 }
136 )
Marc Kupietze759b342025-07-05 19:48:20 +0200137}
138
139# Helper function to run code if RUN_LLM_CODE is set
140run_code_if_enabled <- function(code, test_name) {
141 if (nzchar(Sys.getenv("RUN_LLM_CODE")) && Sys.getenv("RUN_LLM_CODE") == "true") {
142 cat("Running generated code for", test_name, "...\n")
Marc Kupietz2deadd82025-07-09 08:53:33 +0200143 tryCatch(
144 {
145 result <- eval(parse(text = code))
146 cat("Code executed successfully. Result type:", class(result), "\n")
147 if (is.data.frame(result)) {
148 cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
149 if (nrow(result) > 0) {
150 cat("First few rows:\n")
151 print(head(result, 3))
152 }
153 } else {
154 cat("Result preview:\n")
155 print(result)
Marc Kupietze759b342025-07-05 19:48:20 +0200156 }
Marc Kupietz2deadd82025-07-09 08:53:33 +0200157 return(TRUE)
158 },
159 error = function(e) {
160 cat("Runtime error:", as.character(e), "\n")
161 return(FALSE)
Marc Kupietze759b342025-07-05 19:48:20 +0200162 }
Marc Kupietz2deadd82025-07-09 08:53:33 +0200163 )
Marc Kupietze759b342025-07-05 19:48:20 +0200164 } else {
165 cat("Skipping code execution (set RUN_LLM_CODE=true to enable)\n")
166 return(NA)
167 }
168}
169
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200170test_that(paste(LLM_MODEL, "can solve frequency query task with README guidance"), {
Marc Kupietzc9cb6772025-07-06 15:55:00 +0200171 # Skip if offline
172 skip_if_offline()
Marc Kupietz2deadd82025-07-09 08:53:33 +0200173
Marc Kupietzc9cb6772025-07-06 15:55:00 +0200174 # Skip if no API keys are set
175 skip_if_no_api_key()
Marc Kupietz2deadd82025-07-09 08:53:33 +0200176
Marc Kupietz345211a2025-07-06 12:52:24 +0200177 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200178 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
179
180 # Create the prompt with README context and task
181 prompt <- create_readme_prompt(
Marc Kupietzb8839d32025-07-06 14:42:30 +0200182 "write R code to perform a frequency query for the word 'Demokratie' across the past three years. The code should use the RKorAPClient package and return a data frame.",
183 "Write R code to query frequency of 'Demokratie' from the past three years using RKorAPClient."
Marc Kupietz06143702025-07-05 17:49:31 +0200184 )
185
Marc Kupietz345211a2025-07-06 12:52:24 +0200186 # Call LLM API
187 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200188 generated_code <- extract_r_code(generated_response)
189
190 # Basic checks on the generated code
191 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
192 expect_true(grepl("frequencyQuery", generated_code), "Generated code should include frequencyQuery")
Marc Kupietzb8839d32025-07-06 14:42:30 +0200193 expect_true(grepl("Demokratie", generated_code), "Generated code should include the search term 'Demokratie'")
194 last_year <- as.numeric(format(Sys.Date(), "%Y")) - 1
195
Marc Kupietz2deadd82025-07-09 08:53:33 +0200196 expect_true(grepl("Date in", generated_code), "Generated code should vc restriction on years")
Marc Kupietz06143702025-07-05 17:49:31 +0200197
198 # Check that the generated code contains essential RKorAPClient patterns
Marc Kupietz345211a2025-07-06 12:52:24 +0200199 # expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code), "Generated code should use pipe operators")
Marc Kupietz06143702025-07-05 17:49:31 +0200200
Marc Kupietze759b342025-07-05 19:48:20 +0200201 # Test code syntax
202 syntax_valid <- test_code_syntax(generated_code)
203 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
Marc Kupietz06143702025-07-05 17:49:31 +0200204
205 # Print the generated code for manual inspection
206 cat("Generated code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200207
208 # Run the code if RUN_LLM_CODE is set
209 execution_result <- run_code_if_enabled(generated_code, "frequency query")
210 if (!is.na(execution_result)) {
211 expect_true(execution_result, "Generated code should execute without runtime errors")
212 }
Marc Kupietz06143702025-07-05 17:49:31 +0200213})
214
Marc Kupietz345211a2025-07-06 12:52:24 +0200215
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200216test_that(paste(LLM_MODEL, "can solve collocation analysis task with README guidance"), {
Marc Kupietzc9cb6772025-07-06 15:55:00 +0200217 # Skip if offline
218 skip_if_offline()
Marc Kupietz2deadd82025-07-09 08:53:33 +0200219
Marc Kupietzc9cb6772025-07-06 15:55:00 +0200220 # Skip if no API keys are set
221 skip_if_no_api_key()
Marc Kupietz2deadd82025-07-09 08:53:33 +0200222
Marc Kupietz345211a2025-07-06 12:52:24 +0200223 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200224 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
225
226 # Create the prompt for collocation analysis
227 prompt <- create_readme_prompt(
Marc Kupietz2deadd82025-07-09 08:53:33 +0200228 paste("Write R code to perform a collocation analysis for the lemma 'leverage' based on the current English Wikipedia Corpus using default parameters", "and show the three highest collocates according to their log dice score.
229"),
230 "Write R code to perform collocation analysis for lemma 'leverage' using RKorAPClient."
Marc Kupietz06143702025-07-05 17:49:31 +0200231 )
232
Marc Kupietz345211a2025-07-06 12:52:24 +0200233 # Call LLM API
234 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200235 generated_code <- extract_r_code(generated_response)
236
237 # Basic checks on the generated code
238 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
239 expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
Marc Kupietz2deadd82025-07-09 08:53:33 +0200240 expect_true(grepl("tt/l=leverage", generated_code), "Generated code should include the search the lemma 'leverage'")
Marc Kupietzb8839d32025-07-06 14:42:30 +0200241 # expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
Marc Kupietz2deadd82025-07-09 08:53:33 +0200242 expect_true(grepl("instance/english", generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200243
Marc Kupietze759b342025-07-05 19:48:20 +0200244 # Test code syntax
245 syntax_valid <- test_code_syntax(generated_code)
246 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
247
Marc Kupietz06143702025-07-05 17:49:31 +0200248 # Print the generated code for manual inspection
249 cat("Generated collocation analysis code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200250
251 # Run the code if RUN_LLM_CODE is set
252 execution_result <- run_code_if_enabled(generated_code, "collocation analysis")
253 if (!is.na(execution_result)) {
254 expect_true(execution_result, "Generated code should execute without runtime errors")
255 }
Marc Kupietz06143702025-07-05 17:49:31 +0200256})
257
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200258test_that(paste(LLM_MODEL, "can solve corpus query task with README guidance"), {
Marc Kupietzc9cb6772025-07-06 15:55:00 +0200259 # Skip if offline
260 skip_if_offline()
Marc Kupietz2deadd82025-07-09 08:53:33 +0200261
Marc Kupietzc9cb6772025-07-06 15:55:00 +0200262 # Skip if no API keys are set
263 skip_if_no_api_key()
Marc Kupietz2deadd82025-07-09 08:53:33 +0200264
Marc Kupietz345211a2025-07-06 12:52:24 +0200265 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200266 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
267
268 # Create the prompt for corpus query
269 prompt <- create_readme_prompt(
270 "write R code to perform a simple corpus query for 'Hello world' and fetch all results. The code should use the RKorAPClient package.",
271 "Write R code to query 'Hello world' and fetch all results using RKorAPClient."
272 )
273
Marc Kupietz345211a2025-07-06 12:52:24 +0200274 # Call LLM API
275 generated_response <- call_llm_api(prompt, max_tokens = 300)
Marc Kupietz06143702025-07-05 17:49:31 +0200276 generated_code <- extract_r_code(generated_response)
277
278 # Basic checks on the generated code
279 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
280 expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
281 expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
282 expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
Marc Kupietz06143702025-07-05 17:49:31 +0200283
284 # Check that the generated code follows the README example pattern
Marc Kupietz2deadd82025-07-09 08:53:33 +0200285 expect_true(
286 grepl("\\|>", generated_code) || grepl("%>%", generated_code),
287 "Generated code should use pipe operators"
288 )
Marc Kupietz06143702025-07-05 17:49:31 +0200289
Marc Kupietze759b342025-07-05 19:48:20 +0200290 # Test code syntax
291 syntax_valid <- test_code_syntax(generated_code)
292 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
293
Marc Kupietz06143702025-07-05 17:49:31 +0200294 # Print the generated code for manual inspection
295 cat("Generated corpus query code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200296
297 # Run the code if RUN_LLM_CODE is set
298 execution_result <- run_code_if_enabled(generated_code, "corpus query")
299 if (!is.na(execution_result)) {
300 expect_true(execution_result, "Generated code should execute without runtime errors")
301 }
Marc Kupietz06143702025-07-05 17:49:31 +0200302})