blob: 2cfcd78eac96313d024e04fcc5a9d6cb692b0e3b [file] [log] [blame]
Marc Kupietz06143702025-07-05 17:49:31 +02001# Helper function to find README.md file in current or parent directories
2find_readme_path <- function() {
3 readme_paths <- c("Readme.md", "../Readme.md", "../../Readme.md")
4 for (path in readme_paths) {
5 if (file.exists(path)) {
6 return(path)
7 }
8 }
9 return(NULL)
10}
11
12# Helper function to read README content
13read_readme_content <- function() {
14 readme_path <- find_readme_path()
15 if (is.null(readme_path)) {
16 return(NULL)
17 }
18 readme_content <- readLines(readme_path)
19 paste(readme_content, collapse = "\n")
20}
21
Marc Kupietz345211a2025-07-06 12:52:24 +020022# Helper function to call LLM API using tidyllm
23call_llm_api <- function(prompt, max_tokens = 500, temperature = 0.1, model = LLM_MODEL) {
24 library(tidyllm)
Marc Kupietz06143702025-07-05 17:49:31 +020025
26 tryCatch({
Marc Kupietz345211a2025-07-06 12:52:24 +020027 # Determine the provider based on model name
28 if (grepl("^gpt-", model, ignore.case = TRUE)) {
29 provider <- openai()
30 } else if (grepl("^claude-", model, ignore.case = TRUE)) {
31 provider <- claude()
Marc Kupietz768d8332025-07-06 13:24:21 +020032 } else if (grepl("^gemini-", model, ignore.case = TRUE)) {
33 # Debug Gemini API key
34 provider <- gemini()
Marc Kupietz345211a2025-07-06 12:52:24 +020035 } else {
Marc Kupietz768d8332025-07-06 13:24:21 +020036 stop(paste("Unsupported model:", model, "- supported prefixes: gpt-, claude-, gemini-"))
Marc Kupietz345211a2025-07-06 12:52:24 +020037 }
Marc Kupietz06143702025-07-05 17:49:31 +020038
Marc Kupietz345211a2025-07-06 12:52:24 +020039 # Use tidyllm unified API
40 result <- llm_message(prompt) |>
41 chat(
42 .provider = provider,
43 .model = model,
44 .temperature = temperature,
45 .max_tries = 3
46 )
47
48 # Extract the reply text
49 get_reply(result)
Marc Kupietz06143702025-07-05 17:49:31 +020050 }, error = function(e) {
51 if (grepl("429", as.character(e))) {
Marc Kupietz345211a2025-07-06 12:52:24 +020052 skip("LLM API rate limit exceeded - please try again later or check your API key/credits")
Marc Kupietz06143702025-07-05 17:49:31 +020053 } else if (grepl("401", as.character(e))) {
Marc Kupietz768d8332025-07-06 13:24:21 +020054 skip("LLM API authentication failed - please check your API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
Marc Kupietz06143702025-07-05 17:49:31 +020055 } else {
Marc Kupietz345211a2025-07-06 12:52:24 +020056 stop(paste("LLM API error:", as.character(e)))
Marc Kupietz06143702025-07-05 17:49:31 +020057 }
58 })
59}
60
Marc Kupietz345211a2025-07-06 12:52:24 +020061
Marc Kupietz8e1b77d2025-07-05 20:21:59 +020062# Configuration variables
Marc Kupietz768d8332025-07-06 13:24:21 +020063#LLM_MODEL <- "gpt-4o-mini" # OpenAI model option
Marc Kupietzc1202d02025-07-06 13:28:51 +020064#LLM_MODEL <- "claude-3-5-sonnet-latest" # Claude model option
65LLM_MODEL <- "claude-3-7-sonnet-latest" # Claude model option
Marc Kupietz768d8332025-07-06 13:24:21 +020066#LLM_MODEL <- "claude-sonnet-4-0" # Claude model option
Marc Kupietzc1202d02025-07-06 13:28:51 +020067#LLM_MODEL <- "gemini-2.5-pro" # Google Gemini model option
Marc Kupietz768d8332025-07-06 13:24:21 +020068#LLM_MODEL <- "gemini-1.5-pro" # Google Gemini model option
69#LLM_MODEL <- "gemini-2.5-flash" # Google Gemini model option (faster)
Marc Kupietz10dcfee2025-07-05 19:13:27 +020070KORAP_URL <- "https://korap.ids-mannheim.de/instance/wiki"
71
Marc Kupietz06143702025-07-05 17:49:31 +020072# Helper function to create README-guided prompt
73create_readme_prompt <- function(task_description, specific_task) {
74 readme_text <- read_readme_content()
75 if (is.null(readme_text)) {
76 stop("README.md not found")
77 }
78
79 paste0(
80 "You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
81 task_description, "\n\n",
Marc Kupietz8e1b77d2025-07-05 20:21:59 +020082 "IMPORTANT: Use the KorAP URL '", KORAP_URL, "' as the 1st parameter (KorAPUrl) in KorAPConnection.\n\n",
Marc Kupietz06143702025-07-05 17:49:31 +020083 "README Documentation:\n",
84 readme_text,
85 "\n\nTask: ", specific_task,
86 "\n\nProvide only the R code without explanations."
87 )
88}
89
90# Helper function to extract R code from markdown code blocks
91extract_r_code <- function(response_text) {
92 # Remove markdown code blocks if present
93 code <- gsub("```[rR]?\\n?", "", response_text)
94 code <- gsub("```\\n?$", "", code)
95 # Remove leading/trailing whitespace
96 trimws(code)
97}
98
Marc Kupietze759b342025-07-05 19:48:20 +020099# Helper function to test code syntax
100test_code_syntax <- function(code) {
101 tryCatch({
102 parse(text = code)
103 TRUE
104 }, error = function(e) {
105 cat("Syntax error:", as.character(e), "\n")
106 FALSE
107 })
108}
109
110# Helper function to run code if RUN_LLM_CODE is set
111run_code_if_enabled <- function(code, test_name) {
112 if (nzchar(Sys.getenv("RUN_LLM_CODE")) && Sys.getenv("RUN_LLM_CODE") == "true") {
113 cat("Running generated code for", test_name, "...\n")
114 tryCatch({
115 result <- eval(parse(text = code))
116 cat("Code executed successfully. Result type:", class(result), "\n")
117 if (is.data.frame(result)) {
118 cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
119 if (nrow(result) > 0) {
120 cat("First few rows:\n")
121 print(head(result, 3))
122 }
123 } else {
124 cat("Result preview:\n")
125 print(result)
126 }
127 return(TRUE)
128 }, error = function(e) {
129 cat("Runtime error:", as.character(e), "\n")
130 return(FALSE)
131 })
132 } else {
133 cat("Skipping code execution (set RUN_LLM_CODE=true to enable)\n")
134 return(NA)
135 }
136}
137
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200138test_that(paste(LLM_MODEL, "can solve frequency query task with README guidance"), {
Marc Kupietz345211a2025-07-06 12:52:24 +0200139 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200140 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
141
Marc Kupietz345211a2025-07-06 12:52:24 +0200142 # Note: tidyllm will handle API key checking and give appropriate errors
143
Marc Kupietz06143702025-07-05 17:49:31 +0200144 # Create the prompt with README context and task
145 prompt <- create_readme_prompt(
Marc Kupietz345211a2025-07-06 12:52:24 +0200146 "write R code to perform a frequency query for the word 'Deutschland' across multiple years (2022-2024). The code should use the RKorAPClient package and return a data frame.",
147 "Write R code to query frequency of 'Deutschland' from 2022-2024 using RKorAPClient."
Marc Kupietz06143702025-07-05 17:49:31 +0200148 )
149
Marc Kupietz345211a2025-07-06 12:52:24 +0200150 # Call LLM API
151 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200152 generated_code <- extract_r_code(generated_response)
153
154 # Basic checks on the generated code
155 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
156 expect_true(grepl("frequencyQuery", generated_code), "Generated code should include frequencyQuery")
157 expect_true(grepl("Deutschland", generated_code), "Generated code should include the search term 'Deutschland'")
Marc Kupietz345211a2025-07-06 12:52:24 +0200158 expect_true(grepl("202[2-4]", generated_code), "Generated code should include years 2022-2024")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200159 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200160
161 # Check that the generated code contains essential RKorAPClient patterns
Marc Kupietz345211a2025-07-06 12:52:24 +0200162 # expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code), "Generated code should use pipe operators")
Marc Kupietz06143702025-07-05 17:49:31 +0200163
Marc Kupietze759b342025-07-05 19:48:20 +0200164 # Test code syntax
165 syntax_valid <- test_code_syntax(generated_code)
166 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
Marc Kupietz06143702025-07-05 17:49:31 +0200167
168 # Print the generated code for manual inspection
169 cat("Generated code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200170
171 # Run the code if RUN_LLM_CODE is set
172 execution_result <- run_code_if_enabled(generated_code, "frequency query")
173 if (!is.na(execution_result)) {
174 expect_true(execution_result, "Generated code should execute without runtime errors")
175 }
Marc Kupietz06143702025-07-05 17:49:31 +0200176})
177
Marc Kupietz345211a2025-07-06 12:52:24 +0200178
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200179test_that(paste(LLM_MODEL, "can solve collocation analysis task with README guidance"), {
Marc Kupietz345211a2025-07-06 12:52:24 +0200180 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200181 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
182
Marc Kupietz345211a2025-07-06 12:52:24 +0200183 # Note: tidyllm will handle API key checking and give appropriate errors
184
Marc Kupietz06143702025-07-05 17:49:31 +0200185 # Create the prompt for collocation analysis
186 prompt <- create_readme_prompt(
Marc Kupietze759b342025-07-05 19:48:20 +0200187 "write R code to perform a collocation analysis for the lemma 'setzen'. The code should use the RKorAPClient package's collocationAnalysis function.",
Marc Kupietz06143702025-07-05 17:49:31 +0200188 "Write R code to perform collocation analysis for 'setzen' using RKorAPClient."
189 )
190
Marc Kupietz345211a2025-07-06 12:52:24 +0200191 # Call LLM API
192 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200193 generated_code <- extract_r_code(generated_response)
194
195 # Basic checks on the generated code
196 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
197 expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
198 expect_true(grepl("setzen", generated_code), "Generated code should include the search term 'setzen'")
199 expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200200 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200201
Marc Kupietze759b342025-07-05 19:48:20 +0200202 # Test code syntax
203 syntax_valid <- test_code_syntax(generated_code)
204 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
205
Marc Kupietz06143702025-07-05 17:49:31 +0200206 # Print the generated code for manual inspection
207 cat("Generated collocation analysis code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200208
209 # Run the code if RUN_LLM_CODE is set
210 execution_result <- run_code_if_enabled(generated_code, "collocation analysis")
211 if (!is.na(execution_result)) {
212 expect_true(execution_result, "Generated code should execute without runtime errors")
213 }
Marc Kupietz06143702025-07-05 17:49:31 +0200214})
215
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200216test_that(paste(LLM_MODEL, "can solve corpus query task with README guidance"), {
Marc Kupietz345211a2025-07-06 12:52:24 +0200217 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200218 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
219
Marc Kupietz345211a2025-07-06 12:52:24 +0200220 # Note: tidyllm will handle API key checking and give appropriate errors
221
Marc Kupietz06143702025-07-05 17:49:31 +0200222 # Create the prompt for corpus query
223 prompt <- create_readme_prompt(
224 "write R code to perform a simple corpus query for 'Hello world' and fetch all results. The code should use the RKorAPClient package.",
225 "Write R code to query 'Hello world' and fetch all results using RKorAPClient."
226 )
227
Marc Kupietz345211a2025-07-06 12:52:24 +0200228 # Call LLM API
229 generated_response <- call_llm_api(prompt, max_tokens = 300)
Marc Kupietz06143702025-07-05 17:49:31 +0200230 generated_code <- extract_r_code(generated_response)
231
232 # Basic checks on the generated code
233 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
234 expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
235 expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
236 expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200237 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200238
239 # Check that the generated code follows the README example pattern
240 expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
241 "Generated code should use pipe operators")
242
Marc Kupietze759b342025-07-05 19:48:20 +0200243 # Test code syntax
244 syntax_valid <- test_code_syntax(generated_code)
245 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
246
Marc Kupietz06143702025-07-05 17:49:31 +0200247 # Print the generated code for manual inspection
248 cat("Generated corpus query code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200249
250 # Run the code if RUN_LLM_CODE is set
251 execution_result <- run_code_if_enabled(generated_code, "corpus query")
252 if (!is.na(execution_result)) {
253 expect_true(execution_result, "Generated code should execute without runtime errors")
254 }
Marc Kupietz06143702025-07-05 17:49:31 +0200255})