blob: b3da59ff2c46ee7899542c837641a075afc83a79 [file] [log] [blame]
Marc Kupietza28a99a2025-07-06 14:52:47 +02001library(tidyllm)
2
Marc Kupietz06143702025-07-05 17:49:31 +02003# Helper function to find README.md file in current or parent directories
4find_readme_path <- function() {
5 readme_paths <- c("Readme.md", "../Readme.md", "../../Readme.md")
6 for (path in readme_paths) {
7 if (file.exists(path)) {
8 return(path)
9 }
10 }
11 return(NULL)
12}
13
14# Helper function to read README content
15read_readme_content <- function() {
16 readme_path <- find_readme_path()
17 if (is.null(readme_path)) {
18 return(NULL)
19 }
20 readme_content <- readLines(readme_path)
21 paste(readme_content, collapse = "\n")
22}
23
Marc Kupietz345211a2025-07-06 12:52:24 +020024# Helper function to call LLM API using tidyllm
25call_llm_api <- function(prompt, max_tokens = 500, temperature = 0.1, model = LLM_MODEL) {
Marc Kupietz06143702025-07-05 17:49:31 +020026 tryCatch({
Marc Kupietz345211a2025-07-06 12:52:24 +020027 # Determine the provider based on model name
28 if (grepl("^gpt-", model, ignore.case = TRUE)) {
29 provider <- openai()
30 } else if (grepl("^claude-", model, ignore.case = TRUE)) {
31 provider <- claude()
Marc Kupietz768d8332025-07-06 13:24:21 +020032 } else if (grepl("^gemini-", model, ignore.case = TRUE)) {
33 # Debug Gemini API key
34 provider <- gemini()
Marc Kupietz345211a2025-07-06 12:52:24 +020035 } else {
Marc Kupietz768d8332025-07-06 13:24:21 +020036 stop(paste("Unsupported model:", model, "- supported prefixes: gpt-, claude-, gemini-"))
Marc Kupietz345211a2025-07-06 12:52:24 +020037 }
Marc Kupietz06143702025-07-05 17:49:31 +020038
Marc Kupietz345211a2025-07-06 12:52:24 +020039 # Use tidyllm unified API
40 result <- llm_message(prompt) |>
41 chat(
42 .provider = provider,
43 .model = model,
44 .temperature = temperature,
45 .max_tries = 3
46 )
47
48 # Extract the reply text
49 get_reply(result)
Marc Kupietz06143702025-07-05 17:49:31 +020050 }, error = function(e) {
51 if (grepl("429", as.character(e))) {
Marc Kupietz345211a2025-07-06 12:52:24 +020052 skip("LLM API rate limit exceeded - please try again later or check your API key/credits")
Marc Kupietz06143702025-07-05 17:49:31 +020053 } else if (grepl("401", as.character(e))) {
Marc Kupietz768d8332025-07-06 13:24:21 +020054 skip("LLM API authentication failed - please check your API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
Marc Kupietz06143702025-07-05 17:49:31 +020055 } else {
Marc Kupietz345211a2025-07-06 12:52:24 +020056 stop(paste("LLM API error:", as.character(e)))
Marc Kupietz06143702025-07-05 17:49:31 +020057 }
58 })
59}
60
Marc Kupietz345211a2025-07-06 12:52:24 +020061
Marc Kupietz8e1b77d2025-07-05 20:21:59 +020062# Configuration variables
Marc Kupietz768d8332025-07-06 13:24:21 +020063#LLM_MODEL <- "gpt-4o-mini" # OpenAI model option
Marc Kupietzb8839d32025-07-06 14:42:30 +020064LLM_MODEL <- "claude-3-5-sonnet-latest" # Claude model option
65#LLM_MODEL <- "claude-3-7-sonnet-latest" # Claude model option
Marc Kupietz768d8332025-07-06 13:24:21 +020066#LLM_MODEL <- "claude-sonnet-4-0" # Claude model option
Marc Kupietzc1202d02025-07-06 13:28:51 +020067#LLM_MODEL <- "gemini-2.5-pro" # Google Gemini model option
Marc Kupietz768d8332025-07-06 13:24:21 +020068#LLM_MODEL <- "gemini-1.5-pro" # Google Gemini model option
69#LLM_MODEL <- "gemini-2.5-flash" # Google Gemini model option (faster)
Marc Kupietz10dcfee2025-07-05 19:13:27 +020070KORAP_URL <- "https://korap.ids-mannheim.de/instance/wiki"
71
Marc Kupietz06143702025-07-05 17:49:31 +020072# Helper function to create README-guided prompt
73create_readme_prompt <- function(task_description, specific_task) {
74 readme_text <- read_readme_content()
75 if (is.null(readme_text)) {
76 stop("README.md not found")
77 }
78
79 paste0(
80 "You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
81 task_description, "\n\n",
Marc Kupietz8e1b77d2025-07-05 20:21:59 +020082 "IMPORTANT: Use the KorAP URL '", KORAP_URL, "' as the 1st parameter (KorAPUrl) in KorAPConnection.\n\n",
Marc Kupietz06143702025-07-05 17:49:31 +020083 "README Documentation:\n",
84 readme_text,
85 "\n\nTask: ", specific_task,
86 "\n\nProvide only the R code without explanations."
87 )
88}
89
90# Helper function to extract R code from markdown code blocks
91extract_r_code <- function(response_text) {
92 # Remove markdown code blocks if present
93 code <- gsub("```[rR]?\\n?", "", response_text)
94 code <- gsub("```\\n?$", "", code)
95 # Remove leading/trailing whitespace
96 trimws(code)
97}
98
Marc Kupietze759b342025-07-05 19:48:20 +020099# Helper function to test code syntax
100test_code_syntax <- function(code) {
101 tryCatch({
102 parse(text = code)
103 TRUE
104 }, error = function(e) {
105 cat("Syntax error:", as.character(e), "\n")
106 FALSE
107 })
108}
109
110# Helper function to run code if RUN_LLM_CODE is set
111run_code_if_enabled <- function(code, test_name) {
112 if (nzchar(Sys.getenv("RUN_LLM_CODE")) && Sys.getenv("RUN_LLM_CODE") == "true") {
113 cat("Running generated code for", test_name, "...\n")
114 tryCatch({
115 result <- eval(parse(text = code))
116 cat("Code executed successfully. Result type:", class(result), "\n")
117 if (is.data.frame(result)) {
118 cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
119 if (nrow(result) > 0) {
120 cat("First few rows:\n")
121 print(head(result, 3))
122 }
123 } else {
124 cat("Result preview:\n")
125 print(result)
126 }
127 return(TRUE)
128 }, error = function(e) {
129 cat("Runtime error:", as.character(e), "\n")
130 return(FALSE)
131 })
132 } else {
133 cat("Skipping code execution (set RUN_LLM_CODE=true to enable)\n")
134 return(NA)
135 }
136}
137
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200138test_that(paste(LLM_MODEL, "can solve frequency query task with README guidance"), {
Marc Kupietz345211a2025-07-06 12:52:24 +0200139 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200140 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
141
Marc Kupietz345211a2025-07-06 12:52:24 +0200142 # Note: tidyllm will handle API key checking and give appropriate errors
143
Marc Kupietz06143702025-07-05 17:49:31 +0200144 # Create the prompt with README context and task
145 prompt <- create_readme_prompt(
Marc Kupietzb8839d32025-07-06 14:42:30 +0200146 "write R code to perform a frequency query for the word 'Demokratie' across the past three years. The code should use the RKorAPClient package and return a data frame.",
147 "Write R code to query frequency of 'Demokratie' from the past three years using RKorAPClient."
Marc Kupietz06143702025-07-05 17:49:31 +0200148 )
149
Marc Kupietz345211a2025-07-06 12:52:24 +0200150 # Call LLM API
151 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200152 generated_code <- extract_r_code(generated_response)
153
154 # Basic checks on the generated code
155 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
156 expect_true(grepl("frequencyQuery", generated_code), "Generated code should include frequencyQuery")
Marc Kupietzb8839d32025-07-06 14:42:30 +0200157 expect_true(grepl("Demokratie", generated_code), "Generated code should include the search term 'Demokratie'")
158 last_year <- as.numeric(format(Sys.Date(), "%Y")) - 1
159
160 expect_true(grepl("Date in ", generated_code), "Generated code should vc restriction on years")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200161 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200162
163 # Check that the generated code contains essential RKorAPClient patterns
Marc Kupietz345211a2025-07-06 12:52:24 +0200164 # expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code), "Generated code should use pipe operators")
Marc Kupietz06143702025-07-05 17:49:31 +0200165
Marc Kupietze759b342025-07-05 19:48:20 +0200166 # Test code syntax
167 syntax_valid <- test_code_syntax(generated_code)
168 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
Marc Kupietz06143702025-07-05 17:49:31 +0200169
170 # Print the generated code for manual inspection
171 cat("Generated code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200172
173 # Run the code if RUN_LLM_CODE is set
174 execution_result <- run_code_if_enabled(generated_code, "frequency query")
175 if (!is.na(execution_result)) {
176 expect_true(execution_result, "Generated code should execute without runtime errors")
177 }
Marc Kupietz06143702025-07-05 17:49:31 +0200178})
179
Marc Kupietz345211a2025-07-06 12:52:24 +0200180
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200181test_that(paste(LLM_MODEL, "can solve collocation analysis task with README guidance"), {
Marc Kupietz345211a2025-07-06 12:52:24 +0200182 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200183 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
184
Marc Kupietz345211a2025-07-06 12:52:24 +0200185 # Note: tidyllm will handle API key checking and give appropriate errors
186
Marc Kupietz06143702025-07-05 17:49:31 +0200187 # Create the prompt for collocation analysis
188 prompt <- create_readme_prompt(
Marc Kupietze759b342025-07-05 19:48:20 +0200189 "write R code to perform a collocation analysis for the lemma 'setzen'. The code should use the RKorAPClient package's collocationAnalysis function.",
Marc Kupietz06143702025-07-05 17:49:31 +0200190 "Write R code to perform collocation analysis for 'setzen' using RKorAPClient."
191 )
192
Marc Kupietz345211a2025-07-06 12:52:24 +0200193 # Call LLM API
194 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200195 generated_code <- extract_r_code(generated_response)
196
197 # Basic checks on the generated code
198 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
199 expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
200 expect_true(grepl("setzen", generated_code), "Generated code should include the search term 'setzen'")
Marc Kupietzb8839d32025-07-06 14:42:30 +0200201 # expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200202 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200203
Marc Kupietze759b342025-07-05 19:48:20 +0200204 # Test code syntax
205 syntax_valid <- test_code_syntax(generated_code)
206 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
207
Marc Kupietz06143702025-07-05 17:49:31 +0200208 # Print the generated code for manual inspection
209 cat("Generated collocation analysis code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200210
211 # Run the code if RUN_LLM_CODE is set
212 execution_result <- run_code_if_enabled(generated_code, "collocation analysis")
213 if (!is.na(execution_result)) {
214 expect_true(execution_result, "Generated code should execute without runtime errors")
215 }
Marc Kupietz06143702025-07-05 17:49:31 +0200216})
217
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200218test_that(paste(LLM_MODEL, "can solve corpus query task with README guidance"), {
Marc Kupietz345211a2025-07-06 12:52:24 +0200219 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200220 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
221
Marc Kupietz345211a2025-07-06 12:52:24 +0200222 # Note: tidyllm will handle API key checking and give appropriate errors
223
Marc Kupietz06143702025-07-05 17:49:31 +0200224 # Create the prompt for corpus query
225 prompt <- create_readme_prompt(
226 "write R code to perform a simple corpus query for 'Hello world' and fetch all results. The code should use the RKorAPClient package.",
227 "Write R code to query 'Hello world' and fetch all results using RKorAPClient."
228 )
229
Marc Kupietz345211a2025-07-06 12:52:24 +0200230 # Call LLM API
231 generated_response <- call_llm_api(prompt, max_tokens = 300)
Marc Kupietz06143702025-07-05 17:49:31 +0200232 generated_code <- extract_r_code(generated_response)
233
234 # Basic checks on the generated code
235 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
236 expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
237 expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
238 expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200239 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200240
241 # Check that the generated code follows the README example pattern
242 expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
243 "Generated code should use pipe operators")
244
Marc Kupietze759b342025-07-05 19:48:20 +0200245 # Test code syntax
246 syntax_valid <- test_code_syntax(generated_code)
247 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
248
Marc Kupietz06143702025-07-05 17:49:31 +0200249 # Print the generated code for manual inspection
250 cat("Generated corpus query code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200251
252 # Run the code if RUN_LLM_CODE is set
253 execution_result <- run_code_if_enabled(generated_code, "corpus query")
254 if (!is.na(execution_result)) {
255 expect_true(execution_result, "Generated code should execute without runtime errors")
256 }
Marc Kupietz06143702025-07-05 17:49:31 +0200257})