blob: 302a55e578b85f36299c94c79cb868e7dae000fc [file] [log] [blame]
Marc Kupietz06143702025-07-05 17:49:31 +02001# Helper function to find README.md file in current or parent directories
2find_readme_path <- function() {
3 readme_paths <- c("Readme.md", "../Readme.md", "../../Readme.md")
4 for (path in readme_paths) {
5 if (file.exists(path)) {
6 return(path)
7 }
8 }
9 return(NULL)
10}
11
12# Helper function to read README content
13read_readme_content <- function() {
14 readme_path <- find_readme_path()
15 if (is.null(readme_path)) {
16 return(NULL)
17 }
18 readme_content <- readLines(readme_path)
19 paste(readme_content, collapse = "\n")
20}
21
Marc Kupietz345211a2025-07-06 12:52:24 +020022# Helper function to call LLM API using tidyllm
23call_llm_api <- function(prompt, max_tokens = 500, temperature = 0.1, model = LLM_MODEL) {
24 library(tidyllm)
Marc Kupietz06143702025-07-05 17:49:31 +020025
26 tryCatch({
Marc Kupietz345211a2025-07-06 12:52:24 +020027 # Determine the provider based on model name
28 if (grepl("^gpt-", model, ignore.case = TRUE)) {
29 provider <- openai()
30 } else if (grepl("^claude-", model, ignore.case = TRUE)) {
31 provider <- claude()
32 } else {
33 stop(paste("Unsupported model:", model))
34 }
Marc Kupietz06143702025-07-05 17:49:31 +020035
Marc Kupietz345211a2025-07-06 12:52:24 +020036 # Use tidyllm unified API
37 result <- llm_message(prompt) |>
38 chat(
39 .provider = provider,
40 .model = model,
41 .temperature = temperature,
42 .max_tries = 3
43 )
44
45 # Extract the reply text
46 get_reply(result)
Marc Kupietz06143702025-07-05 17:49:31 +020047 }, error = function(e) {
48 if (grepl("429", as.character(e))) {
Marc Kupietz345211a2025-07-06 12:52:24 +020049 skip("LLM API rate limit exceeded - please try again later or check your API key/credits")
Marc Kupietz06143702025-07-05 17:49:31 +020050 } else if (grepl("401", as.character(e))) {
Marc Kupietz345211a2025-07-06 12:52:24 +020051 skip("LLM API authentication failed - please check your API keys (OPENAI_API_KEY or ANTHROPIC_API_KEY)")
Marc Kupietz06143702025-07-05 17:49:31 +020052 } else {
Marc Kupietz345211a2025-07-06 12:52:24 +020053 stop(paste("LLM API error:", as.character(e)))
Marc Kupietz06143702025-07-05 17:49:31 +020054 }
55 })
56}
57
Marc Kupietz345211a2025-07-06 12:52:24 +020058
Marc Kupietz8e1b77d2025-07-05 20:21:59 +020059# Configuration variables
Marc Kupietz345211a2025-07-06 12:52:24 +020060#LLM_MODEL <- "gpt-4o-mini"
61LLM_MODEL <- "claude-3-5-sonnet-latest"
62#LLM_MODEL <- "claude-3-7-sonnet-latest"
63#LLM_MODEL <- "claude-sonnet-4-0"
Marc Kupietz10dcfee2025-07-05 19:13:27 +020064KORAP_URL <- "https://korap.ids-mannheim.de/instance/wiki"
65
Marc Kupietz06143702025-07-05 17:49:31 +020066# Helper function to create README-guided prompt
67create_readme_prompt <- function(task_description, specific_task) {
68 readme_text <- read_readme_content()
69 if (is.null(readme_text)) {
70 stop("README.md not found")
71 }
72
73 paste0(
74 "You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
75 task_description, "\n\n",
Marc Kupietz8e1b77d2025-07-05 20:21:59 +020076 "IMPORTANT: Use the KorAP URL '", KORAP_URL, "' as the 1st parameter (KorAPUrl) in KorAPConnection.\n\n",
Marc Kupietz06143702025-07-05 17:49:31 +020077 "README Documentation:\n",
78 readme_text,
79 "\n\nTask: ", specific_task,
80 "\n\nProvide only the R code without explanations."
81 )
82}
83
84# Helper function to extract R code from markdown code blocks
85extract_r_code <- function(response_text) {
86 # Remove markdown code blocks if present
87 code <- gsub("```[rR]?\\n?", "", response_text)
88 code <- gsub("```\\n?$", "", code)
89 # Remove leading/trailing whitespace
90 trimws(code)
91}
92
Marc Kupietze759b342025-07-05 19:48:20 +020093# Helper function to test code syntax
94test_code_syntax <- function(code) {
95 tryCatch({
96 parse(text = code)
97 TRUE
98 }, error = function(e) {
99 cat("Syntax error:", as.character(e), "\n")
100 FALSE
101 })
102}
103
104# Helper function to run code if RUN_LLM_CODE is set
105run_code_if_enabled <- function(code, test_name) {
106 if (nzchar(Sys.getenv("RUN_LLM_CODE")) && Sys.getenv("RUN_LLM_CODE") == "true") {
107 cat("Running generated code for", test_name, "...\n")
108 tryCatch({
109 result <- eval(parse(text = code))
110 cat("Code executed successfully. Result type:", class(result), "\n")
111 if (is.data.frame(result)) {
112 cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
113 if (nrow(result) > 0) {
114 cat("First few rows:\n")
115 print(head(result, 3))
116 }
117 } else {
118 cat("Result preview:\n")
119 print(result)
120 }
121 return(TRUE)
122 }, error = function(e) {
123 cat("Runtime error:", as.character(e), "\n")
124 return(FALSE)
125 })
126 } else {
127 cat("Skipping code execution (set RUN_LLM_CODE=true to enable)\n")
128 return(NA)
129 }
130}
131
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200132test_that(paste(LLM_MODEL, "can solve frequency query task with README guidance"), {
Marc Kupietz345211a2025-07-06 12:52:24 +0200133 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200134 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
135
Marc Kupietz345211a2025-07-06 12:52:24 +0200136 # Note: tidyllm will handle API key checking and give appropriate errors
137
Marc Kupietz06143702025-07-05 17:49:31 +0200138 # Create the prompt with README context and task
139 prompt <- create_readme_prompt(
Marc Kupietz345211a2025-07-06 12:52:24 +0200140 "write R code to perform a frequency query for the word 'Deutschland' across multiple years (2022-2024). The code should use the RKorAPClient package and return a data frame.",
141 "Write R code to query frequency of 'Deutschland' from 2022-2024 using RKorAPClient."
Marc Kupietz06143702025-07-05 17:49:31 +0200142 )
143
Marc Kupietz345211a2025-07-06 12:52:24 +0200144 # Call LLM API
145 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200146 generated_code <- extract_r_code(generated_response)
147
148 # Basic checks on the generated code
149 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
150 expect_true(grepl("frequencyQuery", generated_code), "Generated code should include frequencyQuery")
151 expect_true(grepl("Deutschland", generated_code), "Generated code should include the search term 'Deutschland'")
Marc Kupietz345211a2025-07-06 12:52:24 +0200152 expect_true(grepl("202[2-4]", generated_code), "Generated code should include years 2022-2024")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200153 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200154
155 # Check that the generated code contains essential RKorAPClient patterns
Marc Kupietz345211a2025-07-06 12:52:24 +0200156 # expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code), "Generated code should use pipe operators")
Marc Kupietz06143702025-07-05 17:49:31 +0200157
Marc Kupietze759b342025-07-05 19:48:20 +0200158 # Test code syntax
159 syntax_valid <- test_code_syntax(generated_code)
160 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
Marc Kupietz06143702025-07-05 17:49:31 +0200161
162 # Print the generated code for manual inspection
163 cat("Generated code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200164
165 # Run the code if RUN_LLM_CODE is set
166 execution_result <- run_code_if_enabled(generated_code, "frequency query")
167 if (!is.na(execution_result)) {
168 expect_true(execution_result, "Generated code should execute without runtime errors")
169 }
Marc Kupietz06143702025-07-05 17:49:31 +0200170})
171
Marc Kupietz345211a2025-07-06 12:52:24 +0200172
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200173test_that(paste(LLM_MODEL, "can solve collocation analysis task with README guidance"), {
Marc Kupietz345211a2025-07-06 12:52:24 +0200174 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200175 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
176
Marc Kupietz345211a2025-07-06 12:52:24 +0200177 # Note: tidyllm will handle API key checking and give appropriate errors
178
Marc Kupietz06143702025-07-05 17:49:31 +0200179 # Create the prompt for collocation analysis
180 prompt <- create_readme_prompt(
Marc Kupietze759b342025-07-05 19:48:20 +0200181 "write R code to perform a collocation analysis for the lemma 'setzen'. The code should use the RKorAPClient package's collocationAnalysis function.",
Marc Kupietz06143702025-07-05 17:49:31 +0200182 "Write R code to perform collocation analysis for 'setzen' using RKorAPClient."
183 )
184
Marc Kupietz345211a2025-07-06 12:52:24 +0200185 # Call LLM API
186 generated_response <- call_llm_api(prompt, max_tokens = 500)
Marc Kupietz06143702025-07-05 17:49:31 +0200187 generated_code <- extract_r_code(generated_response)
188
189 # Basic checks on the generated code
190 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
191 expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
192 expect_true(grepl("setzen", generated_code), "Generated code should include the search term 'setzen'")
193 expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200194 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200195
Marc Kupietze759b342025-07-05 19:48:20 +0200196 # Test code syntax
197 syntax_valid <- test_code_syntax(generated_code)
198 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
199
Marc Kupietz06143702025-07-05 17:49:31 +0200200 # Print the generated code for manual inspection
201 cat("Generated collocation analysis code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200202
203 # Run the code if RUN_LLM_CODE is set
204 execution_result <- run_code_if_enabled(generated_code, "collocation analysis")
205 if (!is.na(execution_result)) {
206 expect_true(execution_result, "Generated code should execute without runtime errors")
207 }
Marc Kupietz06143702025-07-05 17:49:31 +0200208})
209
Marc Kupietz8e1b77d2025-07-05 20:21:59 +0200210test_that(paste(LLM_MODEL, "can solve corpus query task with README guidance"), {
Marc Kupietz345211a2025-07-06 12:52:24 +0200211 # Check for README file
Marc Kupietz06143702025-07-05 17:49:31 +0200212 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
213
Marc Kupietz345211a2025-07-06 12:52:24 +0200214 # Note: tidyllm will handle API key checking and give appropriate errors
215
Marc Kupietz06143702025-07-05 17:49:31 +0200216 # Create the prompt for corpus query
217 prompt <- create_readme_prompt(
218 "write R code to perform a simple corpus query for 'Hello world' and fetch all results. The code should use the RKorAPClient package.",
219 "Write R code to query 'Hello world' and fetch all results using RKorAPClient."
220 )
221
Marc Kupietz345211a2025-07-06 12:52:24 +0200222 # Call LLM API
223 generated_response <- call_llm_api(prompt, max_tokens = 300)
Marc Kupietz06143702025-07-05 17:49:31 +0200224 generated_code <- extract_r_code(generated_response)
225
226 # Basic checks on the generated code
227 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
228 expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
229 expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
230 expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200231 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200232
233 # Check that the generated code follows the README example pattern
234 expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
235 "Generated code should use pipe operators")
236
Marc Kupietze759b342025-07-05 19:48:20 +0200237 # Test code syntax
238 syntax_valid <- test_code_syntax(generated_code)
239 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
240
Marc Kupietz06143702025-07-05 17:49:31 +0200241 # Print the generated code for manual inspection
242 cat("Generated corpus query code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200243
244 # Run the code if RUN_LLM_CODE is set
245 execution_result <- run_code_if_enabled(generated_code, "corpus query")
246 if (!is.na(execution_result)) {
247 expect_true(execution_result, "Generated code should execute without runtime errors")
248 }
Marc Kupietz06143702025-07-05 17:49:31 +0200249})