Blame - man/gpt3_embeddings.Rd - ids-kl/rgpt3

blob: 6c2ac9af370eb97d56e7ba1b79cff183672f45ca [file] [log] [blame]

ben-aaron188	287b30b	2022-09-11 16:46:37 +0200	[diff] [blame]	1	% Generated by roxygen2: do not edit by hand
ben-aaron188	2b89c2a	2022-09-11 16:54:25 +0200	[diff] [blame]	2	% Please edit documentation in R/gpt3_embeddings.R
				3	\name{gpt3_embeddings}
				4	\alias{gpt3_embeddings}
ben-aaron188	287b30b	2022-09-11 16:46:37 +0200	[diff] [blame]	5	\title{Retrieves text embeddings for character input from a vector from the GPT-3 API}
				6	\usage{
ben-aaron188	2b89c2a	2022-09-11 16:54:25 +0200	[diff] [blame]	7	gpt3_embeddings(input_var, id_var, param_model = "text-similarity-ada-001")
ben-aaron188	287b30b	2022-09-11 16:46:37 +0200	[diff] [blame]	8	}
				9	\arguments{
				10	\item{input_var}{character vector that contains the texts for which you want to obtain text embeddings from the GPT-3 model
				11	#' @param id_var (optional) character vector that contains the user-defined ids of the prompts. See details.}
				12
				13	\item{param_model}{a character vector that indicates the \href{https://beta.openai.com/docs/guides/embeddings/similarity-embeddings}{similarity embedding model}; one of "text-similarity-ada-001" (default), "text-similarity-curie-001", "text-similarity-babbage-001", "text-similarity-davinci-001"}
				14	}
				15	\value{
				16	A data.table with the embeddings as separate columns; one row represents one input text. See details.
				17	}
				18	\description{
ben-aaron188	2b89c2a	2022-09-11 16:54:25 +0200	[diff] [blame]	19	\code{gpt3_embeddings()} extends the single embeddings function \code{gpt3_single_embedding()} to allow for the processing of a whole vector
ben-aaron188	287b30b	2022-09-11 16:46:37 +0200	[diff] [blame]	20	}
				21	\details{
				22	The returned data.table contains the column \code{id} which indicates the text id (or its generic alternative if not specified) and the columns \code{dim_1} ... \verb{dim_\{max\}}, where \code{max} is the length of the text embeddings vector that the four different models return. For the default "Ada" model, these are 1024 dimensions (i.e., \code{dim_1}... \code{dim_1024}).
				23
				24	The function supports the text similarity embeddings for the four GPT-3 models as specified in the parameter list. The main difference between the four models is the sophistication of the embedding representation as indicated by the vector embedding size.
				25	\itemize{
				26	\item Ada (1024 dimensions)
				27	\item Babbage (2048 dimensions)
				28	\item Curie (4096 dimensions)
				29	\item Davinci (12288 dimensions)
				30	}
				31
				32	Note that the dimension size (= vector length), speed and \href{https://openai.com/api/pricing/}{associated costs} differ considerably.
				33
				34	These vectors can be used for downstream tasks such as (vector) similarity calculations.
				35	}
				36	\examples{
				37	# First authenticate with your API key via `gpt3_authenticate('pathtokey')`
				38
				39	# Use example data:
ben-aaron188	2b89c2a	2022-09-11 16:54:25 +0200	[diff] [blame]	40	## The data below were generated with the `gpt3_single_request()` function as follows:
ben-aaron188	287b30b	2022-09-11 16:46:37 +0200	[diff] [blame]	41	##### DO NOT RUN #####
ben-aaron188	2b89c2a	2022-09-11 16:54:25 +0200	[diff] [blame]	42	# travel_blog_data = gpt3_single_request(prompt_input = "Write a travel blog about a dog's journey through the UK:", temperature = 0.8, n = 10, max_tokens = 200)[[1]]
ben-aaron188	287b30b	2022-09-11 16:46:37 +0200	[diff] [blame]	43	##### END DO NOT RUN #####
				44
				45	# You can load these data with:
				46	data("travel_blog_data") # the dataset contains 10 completions for the above request
				47
ben-aaron188	287b30b	2022-09-11 16:46:37 +0200	[diff] [blame]	48	## Obtain text embeddings for the completion texts:
ben-aaron188	2b89c2a	2022-09-11 16:54:25 +0200	[diff] [blame]	49	emb_travelblogs = gpt3_embeddings(input_var = travel_blog_data$gpt3)
				50	dim(emb_travelblogs)
ben-aaron188	287b30b	2022-09-11 16:46:37 +0200	[diff] [blame]	51	}