embeddings functions
diff --git a/R/bunch_embedding.R b/R/bunch_embedding.R
index 29e3dc6..d22a3ac 100644
--- a/R/bunch_embedding.R
+++ b/R/bunch_embedding.R
@@ -1,11 +1,48 @@
-gpt3.bunch_embedding = function(data
- , text_var
+#' Retrieves text embeddings for character input from a vector from the GPT-3 API
+#'
+#' @description
+#' `gpt3_bunch_embedding()` extends the single embeddings function `gpt3_make_embedding()` to allow for the processing of a whole vector
+#' @details The returned data.table contains the column `id` which indicates the text id (or its generic alternative if not specified) and the columns `dim_1` ... `dim_{max}`, where `max` is the length of the text embeddings vector that the four different models return. For the default "Ada" model, these are 1024 dimensions (i.e., `dim_1`... `dim_1024`).
+#'
+#' The function supports the text similarity embeddings for the four GPT-3 models as specified in the parameter list. The main difference between the four models is the sophistication of the embedding representation as indicated by the vector embedding size.
+#' - Ada (1024 dimensions)
+#' - Babbage (2048 dimensions)
+#' - Curie (4096 dimensions)
+#' - Davinci (12288 dimensions)
+#'
+#' Note that the dimension size (= vector length), speed and [associated costs](https://openai.com/api/pricing/) differ considerably.
+#'
+#' These vectors can be used for downstream tasks such as (vector) similarity calculations.
+#' @param input_var character vector that contains the texts for which you want to obtain text embeddings from the GPT-3 model
+#' #' @param id_var (optional) character vector that contains the user-defined ids of the prompts. See details.
+#' @param param_model a character vector that indicates the [similarity embedding model](https://beta.openai.com/docs/guides/embeddings/similarity-embeddings); one of "text-similarity-ada-001" (default), "text-similarity-curie-001", "text-similarity-babbage-001", "text-similarity-davinci-001"
+#' @return A data.table with the embeddings as separate columns; one row represents one input text. See details.
+#' @examples
+#' # First authenticate with your API key via `gpt3_authenticate('pathtokey')`
+#'
+#' # Use example data:
+#' ## The data below were generated with the `gpt3_make_request()` function as follows:
+#' ##### DO NOT RUN #####
+#' # travel_blog_data = gpt3_make_request(prompt_input = "Write a travel blog about a dog's journey through the UK:", temperature = 0.8, n = 10, max_tokens = 200)[[1]]
+#' ##### END DO NOT RUN #####
+#'
+#' # You can load these data with:
+#' data("travel_blog_data") # the dataset contains 10 completions for the above request
+#'
+#' ## Obtain text embeddings for the completion texts:
+#' emb_travelblogs = gpt3_bunch_embedding(input_var = travel_blog_data$gpt3)
+#' dim(emb_travelblogs)
+#' @export
+gpt3_bunch_embedding = function(input_var
, id_var
, param_model = 'text-similarity-ada-001'){
- data_ = data
-
- data_length = data_[, .N]
+ data_length = length(input_var)
+ if(missing(id_var)){
+ data_id = paste0('prompt_', 1:data_length)
+ } else {
+ data_id = id_var
+ }
empty_list = list()
@@ -13,12 +50,12 @@
print(paste0('Embedding: ', i, '/', data_length))
- row_outcome = gpt3.make_embedding(model_ = param_model
- , input_ = as.character(unname(data_[i, ..text_var])))
+ row_outcome = gpt3_make_embedding(model = param_model
+ , input = input_var[i])
empty_df = data.frame(t(row_outcome))
names(empty_df) = paste0('dim_', 1:length(row_outcome))
- empty_df$id_full = as.character(unname(data_[i, ..id_var]))
+ empty_df$id = data_id[i]
empty_list[[i]] = empty_df
diff --git a/R/bunch_request.R b/R/bunch_request.R
index 398a7aa..dc9a7f2 100644
--- a/R/bunch_request.R
+++ b/R/bunch_request.R
@@ -1,12 +1,12 @@
#' Makes bunch completion requests to the GPT-3 API
#'
#' @description
-#' `gpt3_bunch_request()` is the package's main function for rquests and takes as input a vector of prompts and processes each prompt as per the defined parameters. It extends the `gpt3_simple_request()` function to allow for bunch processing of requests to the Open AI GPT-3 API.
+#' `gpt3_bunch_request()` is the package's main function for rquests and takes as input a vector of prompts and processes each prompt as per the defined parameters. It extends the `gpt3_make_request()` function to allow for bunch processing of requests to the Open AI GPT-3 API.
#' @details
#' The easiest (and intended) use case for this function is to create a data.frame or data.table with variables that contain the prompts to be requested from GPT-3 and a prompt id (see examples below).
#' For a general guide on the completion requests, see [https://beta.openai.com/docs/guides/completion](https://beta.openai.com/docs/guides/completion). This function provides you with an R wrapper to send requests with the full range of request parameters as detailed on [https://beta.openai.com/docs/api-reference/completions](https://beta.openai.com/docs/api-reference/completions) and reproduced below.
#'
-#' For the `best_of` parameter: The `gpt3_simple_request()` (which is used here in a vectorised manner) handles the issue that best_of must be greater than n by setting if(best_of <= n){ best_of = n}.
+#' For the `best_of` parameter: The `gpt3_make_request()` (which is used here in a vectorised manner) handles the issue that best_of must be greater than n by setting `if(best_of <= n){ best_of = n}`.
#'
#' If `id_var` is not provided, the function will use `prompt_1` ... `prompt_n` as id variable.
#'
@@ -93,7 +93,7 @@
print(paste0('Request: ', i, '/', data_length))
- row_outcome = gpt3_simple_request(prompt_input = prompt_var[i]
+ row_outcome = gpt3_make_request(prompt_input = prompt_var[i]
, model = param_model
, output_type = 'complete'
, suffix = param_suffix
diff --git a/R/make_embedding.R b/R/make_embedding.R
index afdb610..a916ac6 100644
--- a/R/make_embedding.R
+++ b/R/make_embedding.R
@@ -1,8 +1,38 @@
-gpt3.make_embedding = function(model_ = 'text-similarity-ada-001'
- , input_){
+#' Obtains text embeddings for a single character (string) from the GPT-3 API
+#'
+#' @description
+#' `gpt3_make_embedding()` sends a single [embedding request](https://beta.openai.com/docs/guides/embeddings) to the Open AI GPT-3 API.
+#' @details The function supports the text similarity embeddings for the four GPT-3 models as specified in the parameter list. The main difference between the four models is the sophistication of the embedding representation as indicated by the vector embedding size.
+#' - Ada (1024 dimensions)
+#' - Babbage (2048 dimensions)
+#' - Curie (4096 dimensions)
+#' - Davinci (12288 dimensions)
+#'
+#' Note that the dimension size (= vector length), speed and [associated costs](https://openai.com/api/pricing/) differ considerably.
+#'
+#' These vectors can be used for downstream tasks such as (vector) similarity calculations.
+#' @param input character that contains the text for which you want to obtain text embeddings from the GPT-3 model
+#' @param model a character vector that indicates the [similarity embedding model](https://beta.openai.com/docs/guides/embeddings/similarity-embeddings); one of "text-similarity-ada-001" (default), "text-similarity-curie-001", "text-similarity-babbage-001", "text-similarity-davinci-001"
+#' @return A numeric vector (= the embedding vector)
+#' @examples
+#' # First authenticate with your API key via `gpt3_authenticate('pathtokey')`
+#'
+#' # Once authenticated:
+#'
+#' ## Simple request with defaults:
+#' sample_string = "London is one of the most liveable cities in the world. The city is always full of energy and people. It's always a great place to explore and have fun."
+#' gpt3_make_embedding(input = sample_string)
+#'
+#' ## Change the model:
+#' #' gpt3_make_embedding(input = sample_string
+#' , model = 'text-similarity-curie-001')
+#' @export
+gpt3_make_embedding = function(input
+ , model = 'text-similarity-ada-001'
+ ){
- parameter_list = list(model = model_
- , input = input_)
+ parameter_list = list(model = model
+ , input = input)
request_base = httr::POST(url = url.embeddings
, body = parameter_list
@@ -12,7 +42,7 @@
output_base = httr::content(request_base)
- embedding_raw = toNumeric(unlist(output_base$data[[1]]$embedding))
+ embedding_raw = to_numeric(unlist(output_base$data[[1]]$embedding))
return(embedding_raw)
diff --git a/R/make_request.R b/R/make_request.R
index 3fb02e0..7fa653a 100644
--- a/R/make_request.R
+++ b/R/make_request.R
@@ -1,7 +1,7 @@
#' Makes a single completion request to the GPT-3 API
#'
#' @description
-#' `gpt3_simple_request()` sends a single [completion request](https://beta.openai.com/docs/api-reference/completions) to the Open AI GPT-3 API.
+#' `gpt3_make_request()` sends a single [completion request](https://beta.openai.com/docs/api-reference/completions) to the Open AI GPT-3 API.
#' @details For a general guide on the completion requests, see [https://beta.openai.com/docs/guides/completion](https://beta.openai.com/docs/guides/completion). This function provides you with an R wrapper to send requests with the full range of request parameters as detailed on [https://beta.openai.com/docs/api-reference/completions](https://beta.openai.com/docs/api-reference/completions) and reproduced below.
#'
#' For the `best_of` parameter: When used with n, best_of controls the number of candidate completions and n specifies how many to return – best_of must be greater than n. Note that this is handled by the wrapper automatically if(best_of <= n){ best_of = n}.
@@ -36,26 +36,26 @@
#' # Once authenticated:
#'
#' ## Simple request with defaults:
-#' gpt3_simple_request(prompt_input = 'How old are you?')
+#' gpt3_make_request(prompt_input = 'How old are you?')
#'
#' ## Instruct GPT-3 to write ten research ideas of max. 150 tokens with some controls:
-#'gpt3_simple_request(prompt_input = 'Write a research idea about using text data to understand human behaviour:'
+#'gpt3_make_request(prompt_input = 'Write a research idea about using text data to understand human behaviour:'
#' , temperature = 0.8
#' , n = 10
#' , max_tokens = 150)
#'
#' ## For fully reproducible results, we need `temperature = 0`, e.g.:
-#' gpt3_simple_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
+#' gpt3_make_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
#' , temperature = 0.0
#' , max_tokens = 50)
#'
#' ## The same example with a different GPT-3 model:
-#' gpt3_simple_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
+#' gpt3_make_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
#' , model = 'text-babbage-001'
#' , temperature = 0.0
#' , max_tokens = 50)
#' @export
-gpt3_simple_request = function(prompt_input
+gpt3_make_request = function(prompt_input
, model = 'text-davinci-002'
, output_type = 'complete'
, suffix = NULL
diff --git a/R/test_request.R b/R/test_request.R
index dbc3160..7b8c7e4 100644
--- a/R/test_request.R
+++ b/R/test_request.R
@@ -12,7 +12,7 @@
check_apikey_form()
test_prompt = 'Write a story about R Studio:'
- test_outout = gpt3_simple_request(prompt_ = test_prompt
+ test_outout = gpt3_make_request(prompt_ = test_prompt
, max_tokens = 100)
print(paste0('.. test successful ..'))
diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 0000000..5c771d2
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,14 @@
+#' Convert character vector of numeric values into a numeric vector
+#'
+#' @description
+#' Converts a character vector of numeric values into a numeric vector
+#' @param x a character vector of numeric values
+#'
+#' @return A numeric vector
+#' @examples
+#' to_numeric('12312')
+#' @export
+to_numeric = function(x){
+ proxy = as.numeric(as.character(x))
+ return(proxy)
+}