embeddings functions
diff --git a/NAMESPACE b/NAMESPACE
index 81ac3c8..9a80e08 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,8 @@
export(gpt3_authenticate)
export(gpt3_bunch_request)
-export(gpt3_simple_request)
+export(gpt3_make_embedding)
+export(gpt3_make_request)
export(gpt3_test_request)
+export(to_numeric)
export(url.completions)
diff --git a/R/bunch_embedding.R b/R/bunch_embedding.R
index 29e3dc6..d22a3ac 100644
--- a/R/bunch_embedding.R
+++ b/R/bunch_embedding.R
@@ -1,11 +1,48 @@
-gpt3.bunch_embedding = function(data
- , text_var
+#' Retrieves text embeddings for character input from a vector from the GPT-3 API
+#'
+#' @description
+#' `gpt3_bunch_embedding()` extends the single embeddings function `gpt3_make_embedding()` to allow for the processing of a whole vector
+#' @details The returned data.table contains the column `id` which indicates the text id (or its generic alternative if not specified) and the columns `dim_1` ... `dim_{max}`, where `max` is the length of the text embeddings vector that the four different models return. For the default "Ada" model, these are 1024 dimensions (i.e., `dim_1`... `dim_1024`).
+#'
+#' The function supports the text similarity embeddings for the four GPT-3 models as specified in the parameter list. The main difference between the four models is the sophistication of the embedding representation as indicated by the vector embedding size.
+#' - Ada (1024 dimensions)
+#' - Babbage (2048 dimensions)
+#' - Curie (4096 dimensions)
+#' - Davinci (12288 dimensions)
+#'
+#' Note that the dimension size (= vector length), speed and [associated costs](https://openai.com/api/pricing/) differ considerably.
+#'
+#' These vectors can be used for downstream tasks such as (vector) similarity calculations.
+#' @param input_var character vector that contains the texts for which you want to obtain text embeddings from the GPT-3 model
+#' #' @param id_var (optional) character vector that contains the user-defined ids of the prompts. See details.
+#' @param param_model a character vector that indicates the [similarity embedding model](https://beta.openai.com/docs/guides/embeddings/similarity-embeddings); one of "text-similarity-ada-001" (default), "text-similarity-curie-001", "text-similarity-babbage-001", "text-similarity-davinci-001"
+#' @return A data.table with the embeddings as separate columns; one row represents one input text. See details.
+#' @examples
+#' # First authenticate with your API key via `gpt3_authenticate('pathtokey')`
+#'
+#' # Use example data:
+#' ## The data below were generated with the `gpt3_make_request()` function as follows:
+#' ##### DO NOT RUN #####
+#' # travel_blog_data = gpt3_make_request(prompt_input = "Write a travel blog about a dog's journey through the UK:", temperature = 0.8, n = 10, max_tokens = 200)[[1]]
+#' ##### END DO NOT RUN #####
+#'
+#' # You can load these data with:
+#' data("travel_blog_data") # the dataset contains 10 completions for the above request
+#'
+#' ## Obtain text embeddings for the completion texts:
+#' emb_travelblogs = gpt3_bunch_embedding(input_var = travel_blog_data$gpt3)
+#' dim(emb_travelblogs)
+#' @export
+gpt3_bunch_embedding = function(input_var
, id_var
, param_model = 'text-similarity-ada-001'){
- data_ = data
-
- data_length = data_[, .N]
+ data_length = length(input_var)
+ if(missing(id_var)){
+ data_id = paste0('prompt_', 1:data_length)
+ } else {
+ data_id = id_var
+ }
empty_list = list()
@@ -13,12 +50,12 @@
print(paste0('Embedding: ', i, '/', data_length))
- row_outcome = gpt3.make_embedding(model_ = param_model
- , input_ = as.character(unname(data_[i, ..text_var])))
+ row_outcome = gpt3_make_embedding(model = param_model
+ , input = input_var[i])
empty_df = data.frame(t(row_outcome))
names(empty_df) = paste0('dim_', 1:length(row_outcome))
- empty_df$id_full = as.character(unname(data_[i, ..id_var]))
+ empty_df$id = data_id[i]
empty_list[[i]] = empty_df
diff --git a/R/bunch_request.R b/R/bunch_request.R
index 398a7aa..dc9a7f2 100644
--- a/R/bunch_request.R
+++ b/R/bunch_request.R
@@ -1,12 +1,12 @@
#' Makes bunch completion requests to the GPT-3 API
#'
#' @description
-#' `gpt3_bunch_request()` is the package's main function for rquests and takes as input a vector of prompts and processes each prompt as per the defined parameters. It extends the `gpt3_simple_request()` function to allow for bunch processing of requests to the Open AI GPT-3 API.
+#' `gpt3_bunch_request()` is the package's main function for rquests and takes as input a vector of prompts and processes each prompt as per the defined parameters. It extends the `gpt3_make_request()` function to allow for bunch processing of requests to the Open AI GPT-3 API.
#' @details
#' The easiest (and intended) use case for this function is to create a data.frame or data.table with variables that contain the prompts to be requested from GPT-3 and a prompt id (see examples below).
#' For a general guide on the completion requests, see [https://beta.openai.com/docs/guides/completion](https://beta.openai.com/docs/guides/completion). This function provides you with an R wrapper to send requests with the full range of request parameters as detailed on [https://beta.openai.com/docs/api-reference/completions](https://beta.openai.com/docs/api-reference/completions) and reproduced below.
#'
-#' For the `best_of` parameter: The `gpt3_simple_request()` (which is used here in a vectorised manner) handles the issue that best_of must be greater than n by setting if(best_of <= n){ best_of = n}.
+#' For the `best_of` parameter: The `gpt3_make_request()` (which is used here in a vectorised manner) handles the issue that best_of must be greater than n by setting `if(best_of <= n){ best_of = n}`.
#'
#' If `id_var` is not provided, the function will use `prompt_1` ... `prompt_n` as id variable.
#'
@@ -93,7 +93,7 @@
print(paste0('Request: ', i, '/', data_length))
- row_outcome = gpt3_simple_request(prompt_input = prompt_var[i]
+ row_outcome = gpt3_make_request(prompt_input = prompt_var[i]
, model = param_model
, output_type = 'complete'
, suffix = param_suffix
diff --git a/R/make_embedding.R b/R/make_embedding.R
index afdb610..a916ac6 100644
--- a/R/make_embedding.R
+++ b/R/make_embedding.R
@@ -1,8 +1,38 @@
-gpt3.make_embedding = function(model_ = 'text-similarity-ada-001'
- , input_){
+#' Obtains text embeddings for a single character (string) from the GPT-3 API
+#'
+#' @description
+#' `gpt3_make_embedding()` sends a single [embedding request](https://beta.openai.com/docs/guides/embeddings) to the Open AI GPT-3 API.
+#' @details The function supports the text similarity embeddings for the four GPT-3 models as specified in the parameter list. The main difference between the four models is the sophistication of the embedding representation as indicated by the vector embedding size.
+#' - Ada (1024 dimensions)
+#' - Babbage (2048 dimensions)
+#' - Curie (4096 dimensions)
+#' - Davinci (12288 dimensions)
+#'
+#' Note that the dimension size (= vector length), speed and [associated costs](https://openai.com/api/pricing/) differ considerably.
+#'
+#' These vectors can be used for downstream tasks such as (vector) similarity calculations.
+#' @param input character that contains the text for which you want to obtain text embeddings from the GPT-3 model
+#' @param model a character vector that indicates the [similarity embedding model](https://beta.openai.com/docs/guides/embeddings/similarity-embeddings); one of "text-similarity-ada-001" (default), "text-similarity-curie-001", "text-similarity-babbage-001", "text-similarity-davinci-001"
+#' @return A numeric vector (= the embedding vector)
+#' @examples
+#' # First authenticate with your API key via `gpt3_authenticate('pathtokey')`
+#'
+#' # Once authenticated:
+#'
+#' ## Simple request with defaults:
+#' sample_string = "London is one of the most liveable cities in the world. The city is always full of energy and people. It's always a great place to explore and have fun."
+#' gpt3_make_embedding(input = sample_string)
+#'
+#' ## Change the model:
+#' #' gpt3_make_embedding(input = sample_string
+#' , model = 'text-similarity-curie-001')
+#' @export
+gpt3_make_embedding = function(input
+ , model = 'text-similarity-ada-001'
+ ){
- parameter_list = list(model = model_
- , input = input_)
+ parameter_list = list(model = model
+ , input = input)
request_base = httr::POST(url = url.embeddings
, body = parameter_list
@@ -12,7 +42,7 @@
output_base = httr::content(request_base)
- embedding_raw = toNumeric(unlist(output_base$data[[1]]$embedding))
+ embedding_raw = to_numeric(unlist(output_base$data[[1]]$embedding))
return(embedding_raw)
diff --git a/R/make_request.R b/R/make_request.R
index 3fb02e0..7fa653a 100644
--- a/R/make_request.R
+++ b/R/make_request.R
@@ -1,7 +1,7 @@
#' Makes a single completion request to the GPT-3 API
#'
#' @description
-#' `gpt3_simple_request()` sends a single [completion request](https://beta.openai.com/docs/api-reference/completions) to the Open AI GPT-3 API.
+#' `gpt3_make_request()` sends a single [completion request](https://beta.openai.com/docs/api-reference/completions) to the Open AI GPT-3 API.
#' @details For a general guide on the completion requests, see [https://beta.openai.com/docs/guides/completion](https://beta.openai.com/docs/guides/completion). This function provides you with an R wrapper to send requests with the full range of request parameters as detailed on [https://beta.openai.com/docs/api-reference/completions](https://beta.openai.com/docs/api-reference/completions) and reproduced below.
#'
#' For the `best_of` parameter: When used with n, best_of controls the number of candidate completions and n specifies how many to return – best_of must be greater than n. Note that this is handled by the wrapper automatically if(best_of <= n){ best_of = n}.
@@ -36,26 +36,26 @@
#' # Once authenticated:
#'
#' ## Simple request with defaults:
-#' gpt3_simple_request(prompt_input = 'How old are you?')
+#' gpt3_make_request(prompt_input = 'How old are you?')
#'
#' ## Instruct GPT-3 to write ten research ideas of max. 150 tokens with some controls:
-#'gpt3_simple_request(prompt_input = 'Write a research idea about using text data to understand human behaviour:'
+#'gpt3_make_request(prompt_input = 'Write a research idea about using text data to understand human behaviour:'
#' , temperature = 0.8
#' , n = 10
#' , max_tokens = 150)
#'
#' ## For fully reproducible results, we need `temperature = 0`, e.g.:
-#' gpt3_simple_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
+#' gpt3_make_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
#' , temperature = 0.0
#' , max_tokens = 50)
#'
#' ## The same example with a different GPT-3 model:
-#' gpt3_simple_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
+#' gpt3_make_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
#' , model = 'text-babbage-001'
#' , temperature = 0.0
#' , max_tokens = 50)
#' @export
-gpt3_simple_request = function(prompt_input
+gpt3_make_request = function(prompt_input
, model = 'text-davinci-002'
, output_type = 'complete'
, suffix = NULL
diff --git a/R/test_request.R b/R/test_request.R
index dbc3160..7b8c7e4 100644
--- a/R/test_request.R
+++ b/R/test_request.R
@@ -12,7 +12,7 @@
check_apikey_form()
test_prompt = 'Write a story about R Studio:'
- test_outout = gpt3_simple_request(prompt_ = test_prompt
+ test_outout = gpt3_make_request(prompt_ = test_prompt
, max_tokens = 100)
print(paste0('.. test successful ..'))
diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 0000000..5c771d2
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,14 @@
+#' Convert character vector of numeric values into a numeric vector
+#'
+#' @description
+#' Converts a character vector of numeric values into a numeric vector
+#' @param x a character vector of numeric values
+#'
+#' @return A numeric vector
+#' @examples
+#' to_numeric('12312')
+#' @export
+to_numeric = function(x){
+ proxy = as.numeric(as.character(x))
+ return(proxy)
+}
diff --git a/data/travel_blog_data.RData b/data/travel_blog_data.RData
new file mode 100644
index 0000000..017c49c
--- /dev/null
+++ b/data/travel_blog_data.RData
Binary files differ
diff --git a/man/gpt3_bunch_embedding.Rd b/man/gpt3_bunch_embedding.Rd
new file mode 100644
index 0000000..9a33169
--- /dev/null
+++ b/man/gpt3_bunch_embedding.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bunch_embedding.R
+\name{gpt3_bunch_embedding}
+\alias{gpt3_bunch_embedding}
+\title{Retrieves text embeddings for character input from a vector from the GPT-3 API}
+\usage{
+gpt3_bunch_embedding(
+ input_var,
+ id_var,
+ param_model = "text-similarity-ada-001"
+)
+}
+\arguments{
+\item{input_var}{character vector that contains the texts for which you want to obtain text embeddings from the GPT-3 model
+#' @param id_var (optional) character vector that contains the user-defined ids of the prompts. See details.}
+
+\item{param_model}{a character vector that indicates the \href{https://beta.openai.com/docs/guides/embeddings/similarity-embeddings}{similarity embedding model}; one of "text-similarity-ada-001" (default), "text-similarity-curie-001", "text-similarity-babbage-001", "text-similarity-davinci-001"}
+}
+\value{
+A data.table with the embeddings as separate columns; one row represents one input text. See details.
+}
+\description{
+\code{gpt3_bunch_embedding()} extends the single embeddings function \code{gpt3_make_embedding()} to allow for the processing of a whole vector
+}
+\details{
+The returned data.table contains the column \code{id} which indicates the text id (or its generic alternative if not specified) and the columns \code{dim_1} ... \verb{dim_\{max\}}, where \code{max} is the length of the text embeddings vector that the four different models return. For the default "Ada" model, these are 1024 dimensions (i.e., \code{dim_1}... \code{dim_1024}).
+
+The function supports the text similarity embeddings for the four GPT-3 models as specified in the parameter list. The main difference between the four models is the sophistication of the embedding representation as indicated by the vector embedding size.
+\itemize{
+\item Ada (1024 dimensions)
+\item Babbage (2048 dimensions)
+\item Curie (4096 dimensions)
+\item Davinci (12288 dimensions)
+}
+
+Note that the dimension size (= vector length), speed and \href{https://openai.com/api/pricing/}{associated costs} differ considerably.
+
+These vectors can be used for downstream tasks such as (vector) similarity calculations.
+}
+\examples{
+# First authenticate with your API key via `gpt3_authenticate('pathtokey')`
+
+# Use example data:
+## The data below were generated with the `gpt3_make_request()` function as follows:
+##### DO NOT RUN #####
+# travel_blog_data = gpt3_make_request(prompt_input = "Write a travel blog about a dog's journey through the UK:", temperature = 0.8, n = 10, max_tokens = 200)[[1]]
+##### END DO NOT RUN #####
+
+# You can load these data with:
+data("travel_blog_data") # the dataset contains 10 completions for the above request
+
+
+## Obtain text embeddings for the completion texts:
+gpt3_bunch_embedding(input = sample_string
+ , model = 'text-similarity-curie-001')
+}
diff --git a/man/gpt3_bunch_request.Rd b/man/gpt3_bunch_request.Rd
index 45c4949..347dfc4 100644
--- a/man/gpt3_bunch_request.Rd
+++ b/man/gpt3_bunch_request.Rd
@@ -58,13 +58,13 @@
If \code{output_type} is "meta", only the data table in slot [\link{2}] is returned.
}
\description{
-\code{gpt3_bunch_request()} is the package's main function for rquests and takes as input a vector of prompts and processes each prompt as per the defined parameters. It extends the \code{gpt3_simple_request()} function to allow for bunch processing of requests to the Open AI GPT-3 API.
+\code{gpt3_bunch_request()} is the package's main function for rquests and takes as input a vector of prompts and processes each prompt as per the defined parameters. It extends the \code{gpt3_make_request()} function to allow for bunch processing of requests to the Open AI GPT-3 API.
}
\details{
The easiest (and intended) use case for this function is to create a data.frame or data.table with variables that contain the prompts to be requested from GPT-3 and a prompt id (see examples below).
For a general guide on the completion requests, see \url{https://beta.openai.com/docs/guides/completion}. This function provides you with an R wrapper to send requests with the full range of request parameters as detailed on \url{https://beta.openai.com/docs/api-reference/completions} and reproduced below.
-For the \code{best_of} parameter: The \code{gpt3_simple_request()} (which is used here in a vectorised manner) handles the issue that best_of must be greater than n by setting if(best_of <= n){ best_of = n}.
+For the \code{best_of} parameter: The \code{gpt3_make_request()} (which is used here in a vectorised manner) handles the issue that best_of must be greater than n by setting \code{if(best_of <= n){ best_of = n}}.
If \code{id_var} is not provided, the function will use \code{prompt_1} ... \code{prompt_n} as id variable.
diff --git a/man/gpt3_make_embedding.Rd b/man/gpt3_make_embedding.Rd
new file mode 100644
index 0000000..b339ab5
--- /dev/null
+++ b/man/gpt3_make_embedding.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/make_embedding.R
+\name{gpt3_make_embedding}
+\alias{gpt3_make_embedding}
+\title{Obtains text embeddings for a single character (string) from the GPT-3 API}
+\usage{
+gpt3_make_embedding(input, model = "text-similarity-ada-001")
+}
+\arguments{
+\item{input}{character that contains the text for which you want to obtain text embeddings from the GPT-3 model}
+
+\item{model}{a character vector that indicates the \href{https://beta.openai.com/docs/guides/embeddings/similarity-embeddings}{similarity embedding model}; one of "text-similarity-ada-001" (default), "text-similarity-curie-001", "text-similarity-babbage-001", "text-similarity-davinci-001"}
+}
+\value{
+A numeric vector (= the embedding vector)
+}
+\description{
+\code{gpt3_make_embedding()} sends a single \href{https://beta.openai.com/docs/guides/embeddings}{embedding request} to the Open AI GPT-3 API.
+}
+\details{
+The function supports the text similarity embeddings for the four GPT-3 models as specified in the parameter list. The main difference between the four models is the sophistication of the embedding representation as indicated by the vector embedding size.
+\itemize{
+\item Ada (1024 dimensions)
+\item Babbage (2048 dimensions)
+\item Curie (4096 dimensions)
+\item Davinci (12288 dimensions)
+}
+
+Note that the dimension size (= vector length), speed and \href{https://openai.com/api/pricing/}{associated costs} differ considerably.
+
+These vectors can be used for downstream tasks such as (vector) similarity calculations.
+}
+\examples{
+# First authenticate with your API key via `gpt3_authenticate('pathtokey')`
+
+# Once authenticated:
+
+## Simple request with defaults:
+sample_string = "London is one of the most liveable cities in the world. The city is always full of energy and people. It's always a great place to explore and have fun."
+gpt3_make_embedding(input = sample_string)
+
+## Change the model:
+#' gpt3_make_embedding(input = sample_string
+ , model = 'text-similarity-curie-001')
+}
diff --git a/man/gpt3_simple_request.Rd b/man/gpt3_make_request.Rd
similarity index 91%
rename from man/gpt3_simple_request.Rd
rename to man/gpt3_make_request.Rd
index 8570a76..504c112 100644
--- a/man/gpt3_simple_request.Rd
+++ b/man/gpt3_make_request.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/make_request.R
-\name{gpt3_simple_request}
-\alias{gpt3_simple_request}
+\name{gpt3_make_request}
+\alias{gpt3_make_request}
\title{Makes a single completion request to the GPT-3 API}
\usage{
-gpt3_simple_request(
+gpt3_make_request(
prompt_input,
model = "text-davinci-002",
output_type = "complete",
@@ -55,7 +55,7 @@
If \code{output_type} is "meta", only the data table in slot [\link{2}] is returned.
}
\description{
-\code{gpt3_simple_request()} sends a single \href{https://beta.openai.com/docs/api-reference/completions}{completion request} to the Open AI GPT-3 API.
+\code{gpt3_make_request()} sends a single \href{https://beta.openai.com/docs/api-reference/completions}{completion request} to the Open AI GPT-3 API.
}
\details{
For a general guide on the completion requests, see \url{https://beta.openai.com/docs/guides/completion}. This function provides you with an R wrapper to send requests with the full range of request parameters as detailed on \url{https://beta.openai.com/docs/api-reference/completions} and reproduced below.
@@ -75,21 +75,21 @@
# Once authenticated:
## Simple request with defaults:
-gpt3_simple_request(prompt_input = 'How old are you?')
+gpt3_make_request(prompt_input = 'How old are you?')
## Instruct GPT-3 to write ten research ideas of max. 150 tokens with some controls:
-gpt3_simple_request(prompt_input = 'Write a research idea about using text data to understand human behaviour:'
+gpt3_make_request(prompt_input = 'Write a research idea about using text data to understand human behaviour:'
, temperature = 0.8
, n = 10
, max_tokens = 150)
## For fully reproducible results, we need `temperature = 0`, e.g.:
-gpt3_simple_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
+gpt3_make_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
, temperature = 0.0
, max_tokens = 50)
## The same example with a different GPT-3 model:
-gpt3_simple_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
+gpt3_make_request(prompt_input = 'Finish this sentence:/n There is no easier way to learn R than'
, model = 'text-babbage-001'
, temperature = 0.0
, max_tokens = 50)
diff --git a/man/to_numeric.Rd b/man/to_numeric.Rd
new file mode 100644
index 0000000..17cfc9c
--- /dev/null
+++ b/man/to_numeric.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{to_numeric}
+\alias{to_numeric}
+\title{Convert character vector of numeric values into a numeric vector}
+\usage{
+to_numeric(x)
+}
+\arguments{
+\item{x}{a character vector of numeric values}
+}
+\value{
+A numeric vector
+}
+\description{
+Converts a character vector of numeric values into a numeric vector
+}
+\examples{
+to_numeric('12312')
+}