Use English Wikipedia instance for ca doc test Change-Id: I811d9d503fdc3dc4c69c4e3f34535c8d1b2d1ef0

commit: 2deadd8a2d40689eb599ce2ecef1c7a222943a5e [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Wed Jul 09 08:53:33 2025 +0200
committer: Marc Kupietz <kupietz@ids-mannheim.de> Mon Jul 21 11:39:29 2025 +0200
tree: 015c47992834b4e4cb83caddd6aaeb4b1a49045c
parent: ef9082852c0580638df6ba1e83e21b8ae76fd5da [diff]
diff --git a/tests/testthat/test-readme-against-llm.R b/tests/testthat/test-readme-against-llm.R
index fbac04b..b77b93f 100644
--- a/tests/testthat/test-readme-against-llm.R
+++ b/tests/testthat/test-readme-against-llm.R

@@ -2,10 +2,12 @@
 
 # Helper function to skip if no API keys are available
 skip_if_no_api_key <- function() {
-  skip_if_not(nzchar(Sys.getenv("OPENAI_API_KEY")) || 
-              nzchar(Sys.getenv("ANTHROPIC_API_KEY")) || 
-              nzchar(Sys.getenv("GOOGLE_API_KEY")), 
-              "No API keys found (need OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
+  skip_if_not(
+    nzchar(Sys.getenv("OPENAI_API_KEY")) ||
+      nzchar(Sys.getenv("ANTHROPIC_API_KEY")) ||
+      nzchar(Sys.getenv("GOOGLE_API_KEY")),
+    "No API keys found (need OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)"
+  )
 }
 
 # Helper function to find README.md file in current or parent directories
@@ -26,55 +28,73 @@
     return(NULL)
   }
   readme_content <- readLines(readme_path)
+
+  # Find the line with "## Installation" and truncate before it
+  installation_line <- grep("^## Installation", readme_content, ignore.case = TRUE)
+  if (length(installation_line) > 0) {
+    readme_content <- readme_content[1:(installation_line[1] - 1)]
+  }
+
   paste(readme_content, collapse = "\n")
 }
 
 # Helper function to call LLM API using tidyllm
 call_llm_api <- function(prompt, max_tokens = 500, temperature = 0.1, model = LLM_MODEL) {
-  tryCatch({
-    # Determine the provider based on model name
-    if (grepl("^gpt-", model, ignore.case = TRUE)) {
-      provider <- openai()
-    } else if (grepl("^claude-", model, ignore.case = TRUE)) {
-      provider <- claude()
-    } else if (grepl("^gemini-", model, ignore.case = TRUE)) {
-      # Debug Gemini API key
-      provider <- gemini()
-    } else {
-      stop(paste("Unsupported model:", model, "- supported prefixes: gpt-, claude-, gemini-"))
-    }
+  cat("Calling LLM API with model:", model, "\n")
+  # Only print prompt up to the beginning of README content
+  readme_start <- regexpr("README Documentation:", prompt, fixed = TRUE)
+  if (readme_start > 0) {
+    prompt_preview <- substr(prompt, 1, readme_start - 1)
+    cat("Prompt (up to README):\n", prompt_preview, "\n")
+  } else {
+    cat("Prompt:\n", prompt, "\n")
+  }
+  tryCatch(
+    {
+      # Determine the provider based on model name
+      if (grepl("^gpt-", model, ignore.case = TRUE)) {
+        provider <- openai()
+      } else if (grepl("^claude-", model, ignore.case = TRUE)) {
+        provider <- claude()
+      } else if (grepl("^gemini-", model, ignore.case = TRUE)) {
+        # Debug Gemini API key
+        provider <- gemini()
+      } else {
+        stop(paste("Unsupported model:", model, "- supported prefixes: gpt-, claude-, gemini-"))
+      }
 
-    # Use tidyllm unified API
-    result <- llm_message(prompt) |>
-      chat(
-        .provider = provider,
-        .model = model,
-        .temperature = temperature,
-        .max_tries = 3
-      )
+      # Use tidyllm unified API
+      result <- llm_message(prompt) |>
+        chat(
+          .provider = provider,
+          .model = model,
+          .temperature = temperature,
+          .max_tries = 3
+        )
 
-    # Extract the reply text
-    get_reply(result)
-  }, error = function(e) {
-    if (grepl("429", as.character(e))) {
-      skip("LLM API rate limit exceeded - please try again later or check your API key/credits")
-    } else if (grepl("401", as.character(e))) {
-      skip("LLM API authentication failed - please check your API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
-    } else {
-      stop(paste("LLM API error:", as.character(e)))
+      # Extract the reply text
+      get_reply(result)
+    },
+    error = function(e) {
+      if (grepl("429", as.character(e))) {
+        skip("LLM API rate limit exceeded - please try again later or check your API key/credits")
+      } else if (grepl("401", as.character(e))) {
+        skip("LLM API authentication failed - please check your API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY)")
+      } else {
+        stop(paste("LLM API error:", as.character(e)))
+      }
     }
-  })
+  )
 }
 
 # Configuration variables
-#LLM_MODEL <- "gpt-4o-mini"                  # OpenAI model option
-LLM_MODEL <- "claude-3-5-sonnet-latest"      # Claude model option
-#LLM_MODEL <- "claude-3-7-sonnet-latest"     # Claude model option
-#LLM_MODEL <- "claude-sonnet-4-0"            # Claude model option
-#LLM_MODEL <- "gemini-2.5-pro"               # Google Gemini model option
-#LLM_MODEL <- "gemini-1.5-pro"               # Google Gemini model option
-#LLM_MODEL <- "gemini-2.5-flash"             # Google Gemini model option (faster)
-KORAP_URL <- "https://korap.ids-mannheim.de/instance/wiki"
+# LLM_MODEL <- "gpt-4o-mini"                  # OpenAI model option
+# LLM_MODEL <- "claude-3-5-sonnet-latest" # Claude model option
+# LLM_MODEL <- "claude-3-7-sonnet-latest"     # Claude model option
+# LLM_MODEL <- "claude-sonnet-4-0"            # Claude model option
+LLM_MODEL <- "gemini-2.5-pro"               # Google Gemini model option
+# LLM_MODEL <- "gemini-1.5-pro"               # Google Gemini model option
+# LLM_MODEL <- "gemini-2.5-flash"             # Google Gemini model option (faster)
 
 # Helper function to create README-guided prompt
 create_readme_prompt <- function(task_description, specific_task) {
@@ -86,7 +106,6 @@
   paste0(
     "You are an expert R programmer. Based on the following README documentation for the RKorAPClient package, ",
     task_description, "\n\n",
-    "IMPORTANT: Use the KorAP URL '", KORAP_URL, "' as the 1st parameter (KorAPUrl) in KorAPConnection.\n\n",
     "README Documentation:\n",
     readme_text,
     "\n\nTask: ", specific_task,
@@ -105,37 +124,43 @@
 
 # Helper function to test code syntax
 test_code_syntax <- function(code) {
-  tryCatch({
-    parse(text = code)
-    TRUE
-  }, error = function(e) {
-    cat("Syntax error:", as.character(e), "\n")
-    FALSE
-  })
+  tryCatch(
+    {
+      parse(text = code)
+      TRUE
+    },
+    error = function(e) {
+      cat("Syntax error:", as.character(e), "\n")
+      FALSE
+    }
+  )
 }
 
 # Helper function to run code if RUN_LLM_CODE is set
 run_code_if_enabled <- function(code, test_name) {
   if (nzchar(Sys.getenv("RUN_LLM_CODE")) && Sys.getenv("RUN_LLM_CODE") == "true") {
     cat("Running generated code for", test_name, "...\n")
-    tryCatch({
-      result <- eval(parse(text = code))
-      cat("Code executed successfully. Result type:", class(result), "\n")
-      if (is.data.frame(result)) {
-        cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
-        if (nrow(result) > 0) {
-          cat("First few rows:\n")
-          print(head(result, 3))
+    tryCatch(
+      {
+        result <- eval(parse(text = code))
+        cat("Code executed successfully. Result type:", class(result), "\n")
+        if (is.data.frame(result)) {
+          cat("Result dimensions:", nrow(result), "rows,", ncol(result), "columns\n")
+          if (nrow(result) > 0) {
+            cat("First few rows:\n")
+            print(head(result, 3))
+          }
+        } else {
+          cat("Result preview:\n")
+          print(result)
         }
-      } else {
-        cat("Result preview:\n")
-        print(result)
+        return(TRUE)
+      },
+      error = function(e) {
+        cat("Runtime error:", as.character(e), "\n")
+        return(FALSE)
       }
-      return(TRUE)
-    }, error = function(e) {
-      cat("Runtime error:", as.character(e), "\n")
-      return(FALSE)
-    })
+    )
   } else {
     cat("Skipping code execution (set RUN_LLM_CODE=true to enable)\n")
     return(NA)
@@ -145,10 +170,10 @@
 test_that(paste(LLM_MODEL, "can solve frequency query task with README guidance"), {
   # Skip if offline
   skip_if_offline()
-  
+
   # Skip if no API keys are set
   skip_if_no_api_key()
-  
+
   # Check for README file
   skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
 
@@ -168,8 +193,7 @@
   expect_true(grepl("Demokratie", generated_code), "Generated code should include the search term 'Demokratie'")
   last_year <- as.numeric(format(Sys.Date(), "%Y")) - 1
 
-  expect_true(grepl("Date in ", generated_code), "Generated code should vc restriction on years")
-  expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
+  expect_true(grepl("Date in", generated_code), "Generated code should vc restriction on years")
 
   # Check that the generated code contains essential RKorAPClient patterns
   # expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code), "Generated code should use pipe operators")
@@ -192,17 +216,18 @@
 test_that(paste(LLM_MODEL, "can solve collocation analysis task with README guidance"), {
   # Skip if offline
   skip_if_offline()
-  
+
   # Skip if no API keys are set
   skip_if_no_api_key()
-  
+
   # Check for README file
   skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
 
   # Create the prompt for collocation analysis
   prompt <- create_readme_prompt(
-    "write R code to perform a collocation analysis for the lemma 'setzen'. The code should use the RKorAPClient package's collocationAnalysis function.",
-    "Write R code to perform collocation analysis for 'setzen' using RKorAPClient."
+    paste("Write R code to perform a collocation analysis for the lemma 'leverage' based on the current English Wikipedia Corpus using default parameters", "and show the three highest collocates according to their log dice score.
+"),
+    "Write R code to perform collocation analysis for lemma 'leverage' using RKorAPClient."
   )
 
   # Call LLM API
@@ -212,9 +237,9 @@
   # Basic checks on the generated code
   expect_true(grepl("KorAPConnection", generated_code), "Generated code should include KorAPConnection")
   expect_true(grepl("collocationAnalysis", generated_code), "Generated code should include collocationAnalysis")
-  expect_true(grepl("setzen", generated_code), "Generated code should include the search term 'setzen'")
+  expect_true(grepl("tt/l=leverage", generated_code), "Generated code should include the search the lemma 'leverage'")
   # expect_true(grepl("auth", generated_code), "Generated code should include auth() for collocation analysis")
-  expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
+  expect_true(grepl("instance/english", generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
 
   # Test code syntax
   syntax_valid <- test_code_syntax(generated_code)
@@ -233,10 +258,10 @@
 test_that(paste(LLM_MODEL, "can solve corpus query task with README guidance"), {
   # Skip if offline
   skip_if_offline()
-  
+
   # Skip if no API keys are set
   skip_if_no_api_key()
-  
+
   # Check for README file
   skip_if_not(!is.null(find_readme_path()), "Readme.md not found in current or parent directories")
 
@@ -255,11 +280,12 @@
   expect_true(grepl("corpusQuery", generated_code), "Generated code should include corpusQuery")
   expect_true(grepl("Hello world", generated_code), "Generated code should include the search term 'Hello world'")
   expect_true(grepl("fetchAll", generated_code), "Generated code should include fetchAll")
-  expect_true(grepl(KORAP_URL, generated_code, fixed = TRUE), "Generated code should include the specified KorAP URL")
 
   # Check that the generated code follows the README example pattern
-  expect_true(grepl("\\|>", generated_code) || grepl("%>%", generated_code),
-              "Generated code should use pipe operators")
+  expect_true(
+    grepl("\\|>", generated_code) || grepl("%>%", generated_code),
+    "Generated code should use pipe operators"
+  )
 
   # Test code syntax
   syntax_valid <- test_code_syntax(generated_code)
commit	2deadd8a2d40689eb599ce2ecef1c7a222943a5e	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Wed Jul 09 08:53:33 2025 +0200
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Mon Jul 21 11:39:29 2025 +0200
tree	015c47992834b4e4cb83caddd6aaeb4b1a49045c
parent	ef9082852c0580638df6ba1e83e21b8ae76fd5da [diff]