blob: 29e3dc6d18451d6bf29f44b76fc17b80f402786a [file] [log] [blame]
ben-aaron1883818e7c2022-09-08 17:49:01 +02001gpt3.bunch_embedding = function(data
2 , text_var
3 , id_var
4 , param_model = 'text-similarity-ada-001'){
5
6 data_ = data
7
8 data_length = data_[, .N]
9
10 empty_list = list()
11
12 for(i in 1:data_length){
13
14 print(paste0('Embedding: ', i, '/', data_length))
15
16 row_outcome = gpt3.make_embedding(model_ = param_model
17 , input_ = as.character(unname(data_[i, ..text_var])))
18
19 empty_df = data.frame(t(row_outcome))
20 names(empty_df) = paste0('dim_', 1:length(row_outcome))
21 empty_df$id_full = as.character(unname(data_[i, ..id_var]))
22
23 empty_list[[i]] = empty_df
24
25
26 }
27
28 output_data = rbindlist(empty_list)
29
30 return(output_data)
31
32}