blob: 93aed658f407ba11d48d588ea1059a252e690d66 [file] [log] [blame]
Marc Kupietz06143702025-07-05 17:49:31 +02001# Helper function to find README.md file in current or parent directories
2find_readme_path <- function() {
3 readme_paths <- c("Readme.md", "../Readme.md", "../../Readme.md")
4 for (path in readme_paths) {
5 if (file.exists(path)) {
6 return(path)
7 }
8 }
9 return(NULL)
10}
11
12# Helper function to read README content
13read_readme_content <- function() {
14 readme_path <- find_readme_path()
15 if (is.null(readme_path)) {
16 return(NULL)
17 }
18 readme_content <- readLines(readme_path)
19 paste(readme_content, collapse = "\n")
20}
21
22# Helper function to call OpenAI API
23call_openai_api <- function(prompt, max_tokens = 500, temperature = 0.1) {
24 library(httr2)
25 library(jsonlite)
26
27 tryCatch({
28 response <- request("https://api.openai.com/v1/chat/completions") |>
29 req_headers(
30 "Authorization" = paste("Bearer", Sys.getenv("OPENAI_API_KEY")),
31 "Content-Type" = "application/json"
32 ) |>
33 req_body_json(list(
34 model = "gpt-4.1-mini",
35 messages = list(
36 list(role = "user", content = prompt)
37 ),
38 max_tokens = max_tokens,
39 temperature = temperature
40 )) |>
41 req_retry(max_tries = 3) |>
42 req_perform()
43
44 # Parse the response
45 result <- response |> resp_body_json()
46 result$choices[[1]]$message$content
47 }, error = function(e) {
48 if (grepl("429", as.character(e))) {
49 skip("OpenAI API rate limit exceeded - please try again later or check your API key/credits")
50 } else if (grepl("401", as.character(e))) {
51 skip("OpenAI API authentication failed - please check your OPENAI_API_KEY")
52 } else {
53 stop(paste("OpenAI API error:", as.character(e)))
54 }
55 })
56}
57
Marc Kupietz10dcfee2025-07-05 19:13:27 +020058# KorAP URL for testing
59KORAP_URL <- "https://korap.ids-mannheim.de/instance/wiki"
60
Marc Kupietz06143702025-07-05 17:49:31 +020061# Helper function to create README-guided prompt
62create_readme_prompt <- function(task_description, specific_task) {
63 readme_text <- read_readme_content()
64 if (is.null(readme_text)) {
65 stop("README.md not found")
66 }
67
68 paste0(
69 "You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
70 task_description, "\n\n",
Marc Kupietz10dcfee2025-07-05 19:13:27 +020071 "IMPORTANT: Use the KorAP URL '", KORAP_URL, "' as the KorAPUrl parameter in KorAPConnection.\n\n",
Marc Kupietz06143702025-07-05 17:49:31 +020072 "README Documentation:\n",
73 readme_text,
74 "\n\nTask: ", specific_task,
75 "\n\nProvide only the R code without explanations."
76 )
77}
78
79# Helper function to extract R code from markdown code blocks
80extract_r_code <- function(response_text) {
81 # Remove markdown code blocks if present
82 code <- gsub("```[rR]?\\n?", "", response_text)
83 code <- gsub("```\\n?$", "", code)
84 # Remove leading/trailing whitespace
85 trimws(code)
86}
87
Marc Kupietze759b342025-07-05 19:48:20 +020088# Helper function to test code syntax
89test_code_syntax <- function(code) {
90 tryCatch({
91 parse(text = code)
92 TRUE
93 }, error = function(e) {
94 cat("Syntax error:", as.character(e), "\n")
95 FALSE
96 })
97}
98
99# Helper function to run code if RUN_LLM_CODE is set
100run_code_if_enabled <- function(code, test_name) {
101 if (nzchar(Sys.getenv("RUN_LLM_CODE")) && Sys.getenv("RUN_LLM_CODE") == "true") {
102 cat("Running generated code for", test_name, "...\n")
103 tryCatch({
104 result <- eval(parse(text = code))
105 cat("Code executed successfully. Result type:", class(result), "\n")
106 if (is.data.frame(result)) {
107 cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
108 if (nrow(result) > 0) {
109 cat("First few rows:\n")
110 print(head(result, 3))
111 }
112 } else {
113 cat("Result preview:\n")
114 print(result)
115 }
116 return(TRUE)
117 }, error = function(e) {
118 cat("Runtime error:", as.character(e), "\n")
119 return(FALSE)
120 })
121 } else {
122 cat("Skipping code execution (set RUN_LLM_CODE=true to enable)\n")
123 return(NA)
124 }
125}
126
Marc Kupietz06143702025-07-05 17:49:31 +0200127test_that("GPT-4.1 mini can solve frequency query task with README guidance", {
128 skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")), "OPENAI_API_KEY not set")
129 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
130
131 # Create the prompt with README context and task
132 prompt <- create_readme_prompt(
133 "write R code to perform a frequency query for the word 'Deutschland' across multiple years (2010-2015). The code should use the RKorAPClient package and return a data frame with year and frequency columns.",
134 "Write R code to query frequency of 'Deutschland' from 2010-2015 using RKorAPClient."
135 )
136
137 # Call OpenAI API
138 generated_response <- call_openai_api(prompt, max_tokens = 500)
139 generated_code <- extract_r_code(generated_response)
140
141 # Basic checks on the generated code
142 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
143 expect_true(grepl("frequencyQuery", generated_code), "Generated code should include frequencyQuery")
144 expect_true(grepl("Deutschland", generated_code), "Generated code should include the search term 'Deutschland'")
145 expect_true(grepl("201[0-5]", generated_code), "Generated code should include years 2010-2015")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200146 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200147
148 # Check that the generated code contains essential RKorAPClient patterns
149 expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
150 "Generated code should use pipe operators")
151
Marc Kupietze759b342025-07-05 19:48:20 +0200152 # Test code syntax
153 syntax_valid <- test_code_syntax(generated_code)
154 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
Marc Kupietz06143702025-07-05 17:49:31 +0200155
156 # Print the generated code for manual inspection
157 cat("Generated code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200158
159 # Run the code if RUN_LLM_CODE is set
160 execution_result <- run_code_if_enabled(generated_code, "frequency query")
161 if (!is.na(execution_result)) {
162 expect_true(execution_result, "Generated code should execute without runtime errors")
163 }
Marc Kupietz06143702025-07-05 17:49:31 +0200164})
165
166test_that("GPT-4.1 mini can solve collocation analysis task with README guidance", {
167 skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")), "OPENAI_API_KEY not set")
168 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
169
170 # Create the prompt for collocation analysis
171 prompt <- create_readme_prompt(
Marc Kupietze759b342025-07-05 19:48:20 +0200172 "write R code to perform a collocation analysis for the lemma 'setzen'. The code should use the RKorAPClient package's collocationAnalysis function.",
Marc Kupietz06143702025-07-05 17:49:31 +0200173 "Write R code to perform collocation analysis for 'setzen' using RKorAPClient."
174 )
175
176 # Call OpenAI API
177 generated_response <- call_openai_api(prompt, max_tokens = 500)
178 generated_code <- extract_r_code(generated_response)
179
180 # Basic checks on the generated code
181 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
182 expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
183 expect_true(grepl("setzen", generated_code), "Generated code should include the search term 'setzen'")
184 expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200185 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200186
187 # Check for collocation analysis parameters
188 expect_true(grepl("leftContextSize|rightContextSize", generated_code),
189 "Generated code should include context size parameters")
190
Marc Kupietze759b342025-07-05 19:48:20 +0200191 # Test code syntax
192 syntax_valid <- test_code_syntax(generated_code)
193 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
194
Marc Kupietz06143702025-07-05 17:49:31 +0200195 # Print the generated code for manual inspection
196 cat("Generated collocation analysis code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200197
198 # Run the code if RUN_LLM_CODE is set
199 execution_result <- run_code_if_enabled(generated_code, "collocation analysis")
200 if (!is.na(execution_result)) {
201 expect_true(execution_result, "Generated code should execute without runtime errors")
202 }
Marc Kupietz06143702025-07-05 17:49:31 +0200203})
204
205test_that("GPT-4.1 mini can solve corpus query task with README guidance", {
206 skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")), "OPENAI_API_KEY not set")
207 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
208
209 # Create the prompt for corpus query
210 prompt <- create_readme_prompt(
211 "write R code to perform a simple corpus query for 'Hello world' and fetch all results. The code should use the RKorAPClient package.",
212 "Write R code to query 'Hello world' and fetch all results using RKorAPClient."
213 )
214
215 # Call OpenAI API
216 generated_response <- call_openai_api(prompt, max_tokens = 300)
217 generated_code <- extract_r_code(generated_response)
218
219 # Basic checks on the generated code
220 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
221 expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
222 expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
223 expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
Marc Kupietz10dcfee2025-07-05 19:13:27 +0200224 expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
Marc Kupietz06143702025-07-05 17:49:31 +0200225
226 # Check that the generated code follows the README example pattern
227 expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
228 "Generated code should use pipe operators")
229
Marc Kupietze759b342025-07-05 19:48:20 +0200230 # Test code syntax
231 syntax_valid <- test_code_syntax(generated_code)
232 expect_true(syntax_valid, "Generated code should be syntactically valid R code")
233
Marc Kupietz06143702025-07-05 17:49:31 +0200234 # Print the generated code for manual inspection
235 cat("Generated corpus query code:\n", generated_code, "\n")
Marc Kupietze759b342025-07-05 19:48:20 +0200236
237 # Run the code if RUN_LLM_CODE is set
238 execution_result <- run_code_if_enabled(generated_code, "corpus query")
239 if (!is.na(execution_result)) {
240 expect_true(execution_result, "Generated code should execute without runtime errors")
241 }
Marc Kupietz06143702025-07-05 17:49:31 +0200242})