blob: fbac04b6b4b50aaa6cab1f50337be7f8ebc3573e [file] [log] [blame]
Marc Kupietza28a99a2025-07-06 14:52:47 +02001library(tidyllm)
2
Marc Kupietzc9cb6772025-07-06 15:55:00 +02003# Helper function to skip if no API keys are available
4skip_if_no_api_key <- function() {
5 skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")) ||
6 nzchar(Sys.getenv("ANTHROPIC_API_KEY")) ||
7 nzchar(Sys.getenv("GOOGLE_API_KEY")),
8 "No API keys found (need OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
9}
10
Marc Kupietz06143702025-07-05 17:49:31 +020011# Helper function to find README.md file in current or parent directories
12find_readme_path <- function() {
13 readme_paths <- c("Readme.md", "../Readme.md", "../../Readme.md")
14 for (path in readme_paths) {
15 if (file.exists(path)) {
16 return(path)
17 }
18 }
19 return(NULL)
20}
21
22# Helper function to read README content
23read_readme_content <- function() {
24 readme_path <- find_readme_path()
25 if (is.null(readme_path)) {
26 return(NULL)
27 }
28 readme_content <- readLines(readme_path)
29 paste(readme_content, collapse = "\n")
30}
31
Marc Kupietz345211a2025-07-06 12:52:24 +020032# Helper function to call LLM API using tidyllm
33call_llm_api <- function(prompt, max_tokens = 500, temperature = 0.1, model = LLM_MODEL) {
Marc Kupietz06143702025-07-05 17:49:31 +020034 tryCatch({
Marc Kupietz345211a2025-07-06 12:52:24 +020035 # Determine the provider based on model name
36 if (grepl("^gpt-", model, ignore.case = TRUE)) {
37 provider <- openai()
38 } else if (grepl("^claude-", model, ignore.case = TRUE)) {
39 provider <- claude()
Marc Kupietz768d8332025-07-06 13:24:21 +020040 } else if (grepl("^gemini-", model, ignore.case = TRUE)) {
41 # Debug Gemini API key
42 provider <- gemini()
Marc Kupietz345211a2025-07-06 12:52:24 +020043 } else {
Marc Kupietz768d8332025-07-06 13:24:21 +020044 stop(paste("Unsupported model:", model, "- supported prefixes: gpt-, claude-, gemini-"))
Marc Kupietz345211a2025-07-06 12:52:24 +020045 }
Marc Kupietz06143702025-07-05 17:49:31 +020046
Marc Kupietz345211a2025-07-06 12:52:24 +020047 # Use tidyllm unified API
48 result <- llm_message(prompt) |>
49 chat(
50 .provider = provider,
51 .model = model,
52 .temperature = temperature,
53 .max_tries = 3
54 )
55
56 # Extract the reply text
57 get_reply(result)
Marc Kupietz06143702025-07-05 17:49:31 +020058 }, error = function(e) {
59 if (grepl("429", as.character(e))) {
Marc Kupietz345211a2025-07-06 12:52:24 +020060 skip("LLM API rate limit exceeded - please try again later or check your API key/credits")
Marc Kupietz06143702025-07-05 17:49:31 +020061 } else if (grepl("401", as.character(e))) {
Marc Kupietz768d8332025-07-06 13:24:21 +020062 skip("LLM API authentication failed - please check your API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
Marc Kupietz06143702025-07-05 17:49:31 +020063 } else {
Marc Kupietz345211a2025-07-06 12:52:24 +020064 stop(paste("LLM API error:", as.character(e)))
Marc Kupietz06143702025-07-05 17:49:31 +020065 }
66 })
67}
68
Marc Kupietz8e1b77d2025-07-05 20:21:59 +020069# Configuration variables
Marc Kupietz768d8332025-07-06 13:24:21 +020070#LLM_MODEL <- "gpt-4o-mini" # OpenAI model option
Marc Kupietzb8839d32025-07-06 14:42:30 +020071LLM_MODEL <- "claude-3-5-sonnet-latest" # Claude model option
72#LLM_MODEL <- "claude-3-7-sonnet-latest" # Claude model option
Marc Kupietz768d8332025-07-06 13:24:21 +020073#LLM_MODEL <- "claude-sonnet-4-0" # Claude model option
Marc Kupietzc1202d02025-07-06 13:28:51 +020074#LLM_MODEL <- "gemini-2.5-pro" # Google Gemini model option
Marc Kupietz768d8332025-07-06 13:24:21 +020075#LLM_MODEL <- "gemini-1.5-pro" # Google Gemini model option
76#LLM_MODEL <- "gemini-2.5-flash" # Google Gemini model option (faster)
Marc Kupietz10dcfee2025-07-05 19:13:27 +020077KORAP_URL <- "https://korap.ids-mannheim.de/instance/wiki"
78
Marc Kupietz06143702025-07-05 17:49:31 +020079# Helper function to create README-guided prompt
80create_readme_prompt <- function(task_description, specific_task) {
81 readme_text <- read_readme_content()
82 if (is.null(readme_text)) {
83 stop("README.md not found")
84 }
85
86 paste0(
87 "You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
88 task_description, "\n\n",
Marc Kupietz8e1b77d2025-07-05 20:21:59 +020089 "IMPORTANT: Use the KorAP URL '", KORAP_URL, "' as the 1st parameter (KorAPUrl) in KorAPConnection.\n\n",
Marc Kupietz06143702025-07-05 17:49:31 +020090 "README Documentation:\n",
91 readme_text,
92 "\n\nTask: ", specific_task,
93 "\n\nProvide only the R code without explanations."
94 )
95}
96
97# Helper function to extract R code from markdown code blocks
98extract_r_code <- function(response_text) {
99 # Remove markdown code blocks if present
100 code <- gsub("```[rR]?\\n?", "", response_text)
101 code <- gsub("```\\n?$", "", code)
102 # Remove leading/trailing whitespace
103 trimws(code)
104}
105
Marc Kupietze759b342025-07-05 19:48:20 +0200106# Helper function to test code syntax
107test_code_syntax <- function(code) {
108 tryCatch({
109 parse(text = code)
110 TRUE
111 }, error = function(e) {
112 cat("Syntax error:", as.character(e), "\n")
113 FALSE
114 })
115}
116
117# Helper function to run code if RUN_LLM_CODE is set
118run_code_if_enabled <- function(code, test_name) {
119 if (nzchar(Sys.getenv("RUN_LLM_CODE")) && Sys.getenv("RUN_LLM_CODE") == "true") {
120 cat("Running generated code for", test_name, "...\n")
121 tryCatch({
122 result <- eval(parse(text = code))
123 cat("Code executed successfully. Result type:", class(result), "\n")
124 if (is.data.frame(result)) {
125 cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
126 if (nrow(result) > 0) {
127 cat("First few rows:\n")
128 print(head(result, 3))
129 }
130 } else {
131 cat("Result preview:\n")
132 print(result)
133 }
134 return(TRUE)
135 }, error = function(e) {
136 cat("Runtime error:", as.character(e), "\n")
137 return(FALSE)
138 })
139 } else {
140 cat("Skipping code execution (set RUN_LLM_CODE=true to enable)\n")
141 return(NA)
142 }
143}
144
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200145test_that(paste(LLM_MODEL, "can solve frequency query task with README guidance"), {
Marc Kupietzc9cb6772025-07-06 15:55:00 +0200146 # Skip if offline
147 skip_if_offline()
148
149 # Skip if no API keys are set
150 skip_if_no_api_key()
151
Marc Kupietz345211a2025-07-06 12:52:24 +0200152 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200153 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
154
155 # Create the prompt with README context and task
156 prompt <- create_readme_prompt(
Marc Kupietzb8839d32025-07-06 14:42:30 +0200157 "write R code to perform a frequency query for the word 'Demokratie' across the past three years. The code should use the RKorAPClient package and return a data frame.",
158 "Write R code to query frequency of 'Demokratie' from the past three years using RKorAPClient."
Marc Kupietz06143702025-07-05 17:49:31 +0200159 )
160
Marc Kupietz345211a2025-07-06 12:52:24 +0200161 # Call LLM API
162 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200163 generated_code <- extract_r_code(generated_response)
164
165 # Basic checks on the generated code
166 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
167 expect_true(grepl("frequencyQuery", generated_code), "Generated code should include frequencyQuery")
Marc Kupietzb8839d32025-07-06 14:42:30 +0200168 expect_true(grepl("Demokratie", generated_code), "Generated code should include the search term 'Demokratie'")
169 last_year <- as.numeric(format(Sys.Date(), "%Y")) - 1
170
171 expect_true(grepl("Date in ", generated_code), "Generated code should vc restriction on years")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200172 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200173
174 # Check that the generated code contains essential RKorAPClient patterns
Marc Kupietz345211a2025-07-06 12:52:24 +0200175 # expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code), "Generated code should use pipe operators")
Marc Kupietz06143702025-07-05 17:49:31 +0200176
Marc Kupietze759b342025-07-05 19:48:20 +0200177 # Test code syntax
178 syntax_valid <- test_code_syntax(generated_code)
179 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
Marc Kupietz06143702025-07-05 17:49:31 +0200180
181 # Print the generated code for manual inspection
182 cat("Generated code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200183
184 # Run the code if RUN_LLM_CODE is set
185 execution_result <- run_code_if_enabled(generated_code, "frequency query")
186 if (!is.na(execution_result)) {
187 expect_true(execution_result, "Generated code should execute without runtime errors")
188 }
Marc Kupietz06143702025-07-05 17:49:31 +0200189})
190
Marc Kupietz345211a2025-07-06 12:52:24 +0200191
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200192test_that(paste(LLM_MODEL, "can solve collocation analysis task with README guidance"), {
Marc Kupietzc9cb6772025-07-06 15:55:00 +0200193 # Skip if offline
194 skip_if_offline()
195
196 # Skip if no API keys are set
197 skip_if_no_api_key()
198
Marc Kupietz345211a2025-07-06 12:52:24 +0200199 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200200 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
201
202 # Create the prompt for collocation analysis
203 prompt <- create_readme_prompt(
Marc Kupietze759b342025-07-05 19:48:20 +0200204 "write R code to perform a collocation analysis for the lemma 'setzen'. The code should use the RKorAPClient package's collocationAnalysis function.",
Marc Kupietz06143702025-07-05 17:49:31 +0200205 "Write R code to perform collocation analysis for 'setzen' using RKorAPClient."
206 )
207
Marc Kupietz345211a2025-07-06 12:52:24 +0200208 # Call LLM API
209 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200210 generated_code <- extract_r_code(generated_response)
211
212 # Basic checks on the generated code
213 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
214 expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
215 expect_true(grepl("setzen", generated_code), "Generated code should include the search term 'setzen'")
Marc Kupietzb8839d32025-07-06 14:42:30 +0200216 # expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200217 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200218
Marc Kupietze759b342025-07-05 19:48:20 +0200219 # Test code syntax
220 syntax_valid <- test_code_syntax(generated_code)
221 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
222
Marc Kupietz06143702025-07-05 17:49:31 +0200223 # Print the generated code for manual inspection
224 cat("Generated collocation analysis code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200225
226 # Run the code if RUN_LLM_CODE is set
227 execution_result <- run_code_if_enabled(generated_code, "collocation analysis")
228 if (!is.na(execution_result)) {
229 expect_true(execution_result, "Generated code should execute without runtime errors")
230 }
Marc Kupietz06143702025-07-05 17:49:31 +0200231})
232
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200233test_that(paste(LLM_MODEL, "can solve corpus query task with README guidance"), {
Marc Kupietzc9cb6772025-07-06 15:55:00 +0200234 # Skip if offline
235 skip_if_offline()
236
237 # Skip if no API keys are set
238 skip_if_no_api_key()
239
Marc Kupietz345211a2025-07-06 12:52:24 +0200240 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200241 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
242
243 # Create the prompt for corpus query
244 prompt <- create_readme_prompt(
245 "write R code to perform a simple corpus query for 'Hello world' and fetch all results. The code should use the RKorAPClient package.",
246 "Write R code to query 'Hello world' and fetch all results using RKorAPClient."
247 )
248
Marc Kupietz345211a2025-07-06 12:52:24 +0200249 # Call LLM API
250 generated_response <- call_llm_api(prompt, max_tokens = 300)
Marc Kupietz06143702025-07-05 17:49:31 +0200251 generated_code <- extract_r_code(generated_response)
252
253 # Basic checks on the generated code
254 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
255 expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
256 expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
257 expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200258 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200259
260 # Check that the generated code follows the README example pattern
261 expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
262 "Generated code should use pipe operators")
263
Marc Kupietze759b342025-07-05 19:48:20 +0200264 # Test code syntax
265 syntax_valid <- test_code_syntax(generated_code)
266 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
267
Marc Kupietz06143702025-07-05 17:49:31 +0200268 # Print the generated code for manual inspection
269 cat("Generated corpus query code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200270
271 # Run the code if RUN_LLM_CODE is set
272 execution_result <- run_code_if_enabled(generated_code, "corpus query")
273 if (!is.na(execution_result)) {
274 expect_true(execution_result, "Generated code should execute without runtime errors")
275 }
Marc Kupietz06143702025-07-05 17:49:31 +0200276})