ben-aaron188 | 3818e7c | 2022-09-08 17:49:01 +0200 | [diff] [blame^] | 1 | gpt3.bunch_embedding = function(data |
| 2 | , text_var |
| 3 | , id_var |
| 4 | , param_model = 'text-similarity-ada-001'){ |
| 5 | |
| 6 | data_ = data |
| 7 | |
| 8 | data_length = data_[, .N] |
| 9 | |
| 10 | empty_list = list() |
| 11 | |
| 12 | for(i in 1:data_length){ |
| 13 | |
| 14 | print(paste0('Embedding: ', i, '/', data_length)) |
| 15 | |
| 16 | row_outcome = gpt3.make_embedding(model_ = param_model |
| 17 | , input_ = as.character(unname(data_[i, ..text_var]))) |
| 18 | |
| 19 | empty_df = data.frame(t(row_outcome)) |
| 20 | names(empty_df) = paste0('dim_', 1:length(row_outcome)) |
| 21 | empty_df$id_full = as.character(unname(data_[i, ..id_var])) |
| 22 | |
| 23 | empty_list[[i]] = empty_df |
| 24 | |
| 25 | |
| 26 | } |
| 27 | |
| 28 | output_data = rbindlist(empty_list) |
| 29 | |
| 30 | return(output_data) |
| 31 | |
| 32 | } |