blob: 842a0d5509e7fb63b4e55f7c9cb68fcafd985def [file] [log] [blame]
Marc Kupietz06143702025-07-05 17:49:31 +02001# Helper function to find README.md file in current or parent directories
2find_readme_path <- function() {
3 readme_paths <- c("Readme.md", "../Readme.md", "../../Readme.md")
4 for (path in readme_paths) {
5 if (file.exists(path)) {
6 return(path)
7 }
8 }
9 return(NULL)
10}
11
12# Helper function to read README content
13read_readme_content <- function() {
14 readme_path <- find_readme_path()
15 if (is.null(readme_path)) {
16 return(NULL)
17 }
18 readme_content <- readLines(readme_path)
19 paste(readme_content, collapse = "\n")
20}
21
22# Helper function to call OpenAI API
23call_openai_api <- function(prompt, max_tokens = 500, temperature = 0.1) {
24 library(httr2)
25 library(jsonlite)
26
27 tryCatch({
28 response <- request("https://api.openai.com/v1/chat/completions") |>
29 req_headers(
30 "Authorization" = paste("Bearer", Sys.getenv("OPENAI_API_KEY")),
31 "Content-Type" = "application/json"
32 ) |>
33 req_body_json(list(
34 model = "gpt-4.1-mini",
35 messages = list(
36 list(role = "user", content = prompt)
37 ),
38 max_tokens = max_tokens,
39 temperature = temperature
40 )) |>
41 req_retry(max_tries = 3) |>
42 req_perform()
43
44 # Parse the response
45 result <- response |> resp_body_json()
46 result$choices[[1]]$message$content
47 }, error = function(e) {
48 if (grepl("429", as.character(e))) {
49 skip("OpenAI API rate limit exceeded - please try again later or check your API key/credits")
50 } else if (grepl("401", as.character(e))) {
51 skip("OpenAI API authentication failed - please check your OPENAI_API_KEY")
52 } else {
53 stop(paste("OpenAI API error:", as.character(e)))
54 }
55 })
56}
57
58# Helper function to create README-guided prompt
59create_readme_prompt <- function(task_description, specific_task) {
60 readme_text <- read_readme_content()
61 if (is.null(readme_text)) {
62 stop("README.md not found")
63 }
64
65 paste0(
66 "You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
67 task_description, "\n\n",
68 "README Documentation:\n",
69 readme_text,
70 "\n\nTask: ", specific_task,
71 "\n\nProvide only the R code without explanations."
72 )
73}
74
75# Helper function to extract R code from markdown code blocks
76extract_r_code <- function(response_text) {
77 # Remove markdown code blocks if present
78 code <- gsub("```[rR]?\\n?", "", response_text)
79 code <- gsub("```\\n?$", "", code)
80 # Remove leading/trailing whitespace
81 trimws(code)
82}
83
84test_that("GPT-4.1 mini can solve frequency query task with README guidance", {
85 skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")), "OPENAI_API_KEY not set")
86 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
87
88 # Create the prompt with README context and task
89 prompt <- create_readme_prompt(
90 "write R code to perform a frequency query for the word 'Deutschland' across multiple years (2010-2015). The code should use the RKorAPClient package and return a data frame with year and frequency columns.",
91 "Write R code to query frequency of 'Deutschland' from 2010-2015 using RKorAPClient."
92 )
93
94 # Call OpenAI API
95 generated_response <- call_openai_api(prompt, max_tokens = 500)
96 generated_code <- extract_r_code(generated_response)
97
98 # Basic checks on the generated code
99 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
100 expect_true(grepl("frequencyQuery", generated_code), "Generated code should include frequencyQuery")
101 expect_true(grepl("Deutschland", generated_code), "Generated code should include the search term 'Deutschland'")
102 expect_true(grepl("201[0-5]", generated_code), "Generated code should include years 2010-2015")
103
104 # Check that the generated code contains essential RKorAPClient patterns
105 expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
106 "Generated code should use pipe operators")
107
108 # Optional: Try to parse the generated code to check for syntax errors
109 parsed_successfully <- tryCatch({
110 parse(text = generated_code)
111 TRUE
112 }, error = function(e) {
113 FALSE
114 })
115
116 expect_true(parsed_successfully, "Generated code should be syntactically valid R code")
117
118 # Print the generated code for manual inspection
119 cat("Generated code:\n", generated_code, "\n")
120})
121
122test_that("GPT-4.1 mini can solve collocation analysis task with README guidance", {
123 skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")), "OPENAI_API_KEY not set")
124 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
125
126 # Create the prompt for collocation analysis
127 prompt <- create_readme_prompt(
128 "write R code to perform a collocation analysis for the word 'setzen' (looking for light verb constructions). The code should use the RKorAPClient package's collocationAnalysis function.",
129 "Write R code to perform collocation analysis for 'setzen' using RKorAPClient."
130 )
131
132 # Call OpenAI API
133 generated_response <- call_openai_api(prompt, max_tokens = 500)
134 generated_code <- extract_r_code(generated_response)
135
136 # Basic checks on the generated code
137 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
138 expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
139 expect_true(grepl("setzen", generated_code), "Generated code should include the search term 'setzen'")
140 expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
141
142 # Check for collocation analysis parameters
143 expect_true(grepl("leftContextSize|rightContextSize", generated_code),
144 "Generated code should include context size parameters")
145
146 # Print the generated code for manual inspection
147 cat("Generated collocation analysis code:\n", generated_code, "\n")
148})
149
150test_that("GPT-4.1 mini can solve corpus query task with README guidance", {
151 skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")), "OPENAI_API_KEY not set")
152 skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
153
154 # Create the prompt for corpus query
155 prompt <- create_readme_prompt(
156 "write R code to perform a simple corpus query for 'Hello world' and fetch all results. The code should use the RKorAPClient package.",
157 "Write R code to query 'Hello world' and fetch all results using RKorAPClient."
158 )
159
160 # Call OpenAI API
161 generated_response <- call_openai_api(prompt, max_tokens = 300)
162 generated_code <- extract_r_code(generated_response)
163
164 # Basic checks on the generated code
165 expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
166 expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
167 expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
168 expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
169
170 # Check that the generated code follows the README example pattern
171 expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
172 "Generated code should use pipe operators")
173
174 # Print the generated code for manual inspection
175 cat("Generated corpus query code:\n", generated_code, "\n")
176})