Blame - word2vecExt.c - ids-kl/dereko2vec

blob: 4e05e962fe7c2c46159ad0335a21aad7efc76e42 [file] [log] [blame]

Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	1	// Copyright 2013 Google Inc. All Rights Reserved.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#include <stdio.h>
				16	#include <stdlib.h>
				17	#include <string.h>
				18	#include <math.h>
				19	#include <pthread.h>
				20
				21	#define MAX_STRING 100
				22	#define EXP_TABLE_SIZE 1000
				23	#define MAX_EXP 6
				24	#define MAX_SENTENCE_LENGTH 1000
				25	#define MAX_CODE_LENGTH 40
				26
				27	const int vocab_hash_size = 30000000; // Maximum 30 * 0.7 = 21M words in the vocabulary
				28
				29	typedef float real; // Precision of float numbers
				30
				31	struct vocab_word {
				32	long long cn;
				33	int *point;
				34	char word, code, codelen;
				35	};
				36
				37	char train_file[MAX_STRING], output_file[MAX_STRING];
				38	char save_vocab_file[MAX_STRING], read_vocab_file[MAX_STRING];
				39	char save_net_file[MAX_STRING], read_net_file[MAX_STRING];
				40	struct vocab_word *vocab;
				41	int binary = 0, type = 1, debug_mode = 2, window = 5, min_count = 5,
				42	num_threads = 12, min_reduce = 1;
				43	int *vocab_hash;
				44	long long vocab_max_size = 1000, vocab_size = 0, layer1_size = 100;
				45	long long train_words = 0, word_count_actual = 0, iter = 5, file_size = 0,
				46	classes = 0;
				47	real alpha = 0.025, starting_alpha, sample = 1e-3;
				48	real syn0, syn1, syn1neg, syn1nce, *expTable;
				49	clock_t start;
				50
				51	real syn1_window, syn1neg_window, *syn1nce_window;
				52	int w_offset, window_layer_size;
				53
				54	int window_hidden_size = 500;
				55	real syn_window_hidden, syn_hidden_word, *syn_hidden_word_neg,
				56	*syn_hidden_word_nce;
				57
				58	int hs = 0, negative = 5;
				59	const int table_size = 1e8;
				60	int *table;
				61
				62	//constrastive negative sampling
				63	char negative_classes_file[MAX_STRING];
				64	int *word_to_group;
				65	int group_to_table; //group_sizetable_size
				66	int class_number;
				67
				68	//nce
				69	real* noise_distribution;
				70	int nce = 0;
				71
				72	//param caps
				73	real CAP_VALUE = 50;
				74	int cap = 0;
				75
				76	void capParam(real* array, int index) {
				77	if (array[index] > CAP_VALUE)
				78	array[index] = CAP_VALUE;
				79	else if (array[index] < -CAP_VALUE)
				80	array[index] = -CAP_VALUE;
				81	}
				82
				83	real hardTanh(real x) {
				84	if (x >= 1) {
				85	return 1;
				86	} else if (x <= -1) {
				87	return -1;
				88	} else {
				89	return x;
				90	}
				91	}
				92
				93	real dHardTanh(real x, real g) {
				94	if (x > 1 && g > 0) {
				95	return 0;
				96	}
				97	if (x < -1 && g < 0) {
				98	return 0;
				99	}
				100	return 1;
				101	}
				102
				103	void InitUnigramTable() {
				104	int a, i;
				105	long long train_words_pow = 0;
				106	real d1, power = 0.75;
				107	table = (int ) malloc(table_size sizeof(int));
				108	for (a = 0; a < vocab_size; a++)
				109	train_words_pow += pow(vocab[a].cn, power);
				110	i = 0;
				111	d1 = pow(vocab[i].cn, power) / (real) train_words_pow;
				112	for (a = 0; a < table_size; a++) {
				113	table[a] = i;
				114	if (a / (real) table_size > d1) {
				115	i++;
				116	d1 += pow(vocab[i].cn, power) / (real) train_words_pow;
				117	}
				118	if (i >= vocab_size)
				119	i = vocab_size - 1;
				120	}
				121
				122	noise_distribution = (real *) calloc(vocab_size, sizeof(real));
				123	for (a = 0; a < vocab_size; a++)
				124	noise_distribution[a] = pow(vocab[a].cn, power)
				125	/ (real) train_words_pow;
				126	}
				127
				128	// Reads a single word from a file, assuming space + tab + EOL to be word boundaries
				129	void ReadWord(char word, FILE fin) {
				130	int a = 0, ch;
				131	while (!feof(fin)) {
				132	ch = fgetc(fin);
				133	if (ch == 13)
				134	continue;
				135	if ((ch == ' ') \|\| (ch == '\t') \|\| (ch == '\n')) {
				136	if (a > 0) {
				137	if (ch == '\n')
				138	ungetc(ch, fin);
				139	break;
				140	}
				141	if (ch == '\n') {
				142	strcpy(word, (char *) "</s>");
				143	return;
				144	} else
				145	continue;
				146	}
				147	word[a] = ch;
				148	a++;
				149	if (a >= MAX_STRING - 1)
				150	a--; // Truncate too long words
				151	}
				152	word[a] = 0;
				153	}
				154
				155	// Returns hash value of a word
				156	int GetWordHash(char *word) {
				157	unsigned long long a, hash = 0;
				158	for (a = 0; a < strlen(word); a++)
				159	hash = hash * 257 + word[a];
				160	hash = hash % vocab_hash_size;
				161	return hash;
				162	}
				163
				164	// Returns position of a word in the vocabulary; if the word is not found, returns -1
				165	int SearchVocab(char *word) {
				166	unsigned int hash = GetWordHash(word);
				167	while (1) {
				168	if (vocab_hash[hash] == -1)
				169	return -1;
				170	if (!strcmp(word, vocab[vocab_hash[hash]].word))
				171	return vocab_hash[hash];
				172	hash = (hash + 1) % vocab_hash_size;
				173	}
				174	return -1;
				175	}
				176
				177	// Reads a word and returns its index in the vocabulary
				178	int ReadWordIndex(FILE *fin) {
				179	char word[MAX_STRING];
				180	ReadWord(word, fin);
				181	if (feof(fin))
				182	return -1;
				183	return SearchVocab(word);
				184	}
				185
				186	// Adds a word to the vocabulary
				187	int AddWordToVocab(char *word) {
				188	unsigned int hash, length = strlen(word) + 1;
				189	if (length > MAX_STRING)
				190	length = MAX_STRING;
				191	vocab[vocab_size].word = (char *) calloc(length, sizeof(char));
				192	strcpy(vocab[vocab_size].word, word);
				193	vocab[vocab_size].cn = 0;
				194	vocab_size++;
				195	// Reallocate memory if needed
				196	if (vocab_size + 2 >= vocab_max_size) {
				197	vocab_max_size += 1000;
				198	vocab = (struct vocab_word *) realloc(vocab,
				199	vocab_max_size * sizeof(struct vocab_word));
				200	}
				201	hash = GetWordHash(word);
				202	while (vocab_hash[hash] != -1)
				203	hash = (hash + 1) % vocab_hash_size;
				204	vocab_hash[hash] = vocab_size - 1;
				205	return vocab_size - 1;
				206	}
				207
				208	// Used later for sorting by word counts
				209	int VocabCompare(const void a, const void b) {
				210	return ((struct vocab_word ) b)->cn - ((struct vocab_word ) a)->cn;
				211	}
				212
				213	// Sorts the vocabulary by frequency using word counts
				214	void SortVocab() {
				215	int a, size;
				216	unsigned int hash;
				217	// Sort the vocabulary and keep </s> at the first position
				218	qsort(&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
				219	for (a = 0; a < vocab_hash_size; a++)
				220	vocab_hash[a] = -1;
				221	size = vocab_size;
				222	train_words = 0;
				223	for (a = 0; a < size; a++) {
				224	// Words occuring less than min_count times will be discarded from the vocab
				225	if ((vocab[a].cn < min_count) && (a != 0)) {
				226	vocab_size--;
				227	free(vocab[a].word);
				228	} else {
				229	// Hash will be re-computed, as after the sorting it is not actual
				230	hash = GetWordHash(vocab[a].word);
				231	while (vocab_hash[hash] != -1)
				232	hash = (hash + 1) % vocab_hash_size;
				233	vocab_hash[hash] = a;
				234	train_words += vocab[a].cn;
				235	}
				236	}
				237	vocab = (struct vocab_word *) realloc(vocab,
				238	(vocab_size + 1) * sizeof(struct vocab_word));
				239	// Allocate memory for the binary tree construction
				240	for (a = 0; a < vocab_size; a++) {
				241	vocab[a].code = (char *) calloc(MAX_CODE_LENGTH, sizeof(char));
				242	vocab[a].point = (int *) calloc(MAX_CODE_LENGTH, sizeof(int));
				243	}
				244	}
				245
				246	// Reduces the vocabulary by removing infrequent tokens
				247	void ReduceVocab() {
				248	int a, b = 0;
				249	unsigned int hash;
				250	for (a = 0; a < vocab_size; a++)
				251	if (vocab[a].cn > min_reduce) {
				252	vocab[b].cn = vocab[a].cn;
				253	vocab[b].word = vocab[a].word;
				254	b++;
				255	} else
				256	free(vocab[a].word);
				257	vocab_size = b;
				258	for (a = 0; a < vocab_hash_size; a++)
				259	vocab_hash[a] = -1;
				260	for (a = 0; a < vocab_size; a++) {
				261	// Hash will be re-computed, as it is not actual
				262	hash = GetWordHash(vocab[a].word);
				263	while (vocab_hash[hash] != -1)
				264	hash = (hash + 1) % vocab_hash_size;
				265	vocab_hash[hash] = a;
				266	}
				267	fflush(stdout);
				268	min_reduce++;
				269	}
				270
				271	// Create binary Huffman tree using the word counts
				272	// Frequent words will have short uniqe binary codes
				273	void CreateBinaryTree() {
				274	long long a, b, i, min1i, min2i, pos1, pos2, point[MAX_CODE_LENGTH];
				275	char code[MAX_CODE_LENGTH];
				276	long long count = (long long ) calloc(vocab_size * 2 + 1,
				277	sizeof(long long));
				278	long long binary = (long long ) calloc(vocab_size * 2 + 1,
				279	sizeof(long long));
				280	long long parent_node = (long long ) calloc(vocab_size * 2 + 1,
				281	sizeof(long long));
				282	for (a = 0; a < vocab_size; a++)
				283	count[a] = vocab[a].cn;
				284	for (a = vocab_size; a < vocab_size * 2; a++)
				285	count[a] = 1e15;
				286	pos1 = vocab_size - 1;
				287	pos2 = vocab_size;
				288	// Following algorithm constructs the Huffman tree by adding one node at a time
				289	for (a = 0; a < vocab_size - 1; a++) {
				290	// First, find two smallest nodes 'min1, min2'
				291	if (pos1 >= 0) {
				292	if (count[pos1] < count[pos2]) {
				293	min1i = pos1;
				294	pos1--;
				295	} else {
				296	min1i = pos2;
				297	pos2++;
				298	}
				299	} else {
				300	min1i = pos2;
				301	pos2++;
				302	}
				303	if (pos1 >= 0) {
				304	if (count[pos1] < count[pos2]) {
				305	min2i = pos1;
				306	pos1--;
				307	} else {
				308	min2i = pos2;
				309	pos2++;
				310	}
				311	} else {
				312	min2i = pos2;
				313	pos2++;
				314	}
				315	count[vocab_size + a] = count[min1i] + count[min2i];
				316	parent_node[min1i] = vocab_size + a;
				317	parent_node[min2i] = vocab_size + a;
				318	binary[min2i] = 1;
				319	}
				320	// Now assign binary code to each vocabulary word
				321	for (a = 0; a < vocab_size; a++) {
				322	b = a;
				323	i = 0;
				324	while (1) {
				325	code[i] = binary[b];
				326	point[i] = b;
				327	i++;
				328	b = parent_node[b];
				329	if (b == vocab_size * 2 - 2)
				330	break;
				331	}
				332	vocab[a].codelen = i;
				333	vocab[a].point[0] = vocab_size - 2;
				334	for (b = 0; b < i; b++) {
				335	vocab[a].code[i - b - 1] = code[b];
				336	vocab[a].point[i - b] = point[b] - vocab_size;
				337	}
				338	}
				339	free(count);
				340	free(binary);
				341	free(parent_node);
				342	}
				343
				344	void LearnVocabFromTrainFile() {
				345	char word[MAX_STRING];
				346	FILE *fin;
				347	long long a, i;
				348	for (a = 0; a < vocab_hash_size; a++)
				349	vocab_hash[a] = -1;
				350	fin = fopen(train_file, "rb");
				351	if (fin == NULL) {
				352	printf("ERROR: training data file not found!\n");
				353	exit(1);
				354	}
				355	vocab_size = 0;
				356	AddWordToVocab((char *) "</s>");
				357	while (1) {
				358	ReadWord(word, fin);
				359	if (feof(fin))
				360	break;
				361	train_words++;
				362	if ((debug_mode > 1) && (train_words % 100000 == 0)) {
				363	printf("%lldK%c", train_words / 1000, 13);
				364	fflush(stdout);
				365	}
				366	i = SearchVocab(word);
				367	if (i == -1) {
				368	a = AddWordToVocab(word);
				369	vocab[a].cn = 1;
				370	} else
				371	vocab[i].cn++;
				372	if (vocab_size > vocab_hash_size * 0.7)
				373	ReduceVocab();
				374	}
				375	SortVocab();
				376	if (debug_mode > 0) {
				377	printf("Vocab size: %lld\n", vocab_size);
				378	printf("Words in train file: %lld\n", train_words);
				379	}
				380	file_size = ftell(fin);
				381	fclose(fin);
				382	}
				383
				384	void SaveVocab() {
				385	long long i;
				386	FILE *fo = fopen(save_vocab_file, "wb");
				387	for (i = 0; i < vocab_size; i++)
				388	fprintf(fo, "%s %lld\n", vocab[i].word, vocab[i].cn);
				389	fclose(fo);
				390	}
				391
				392	void ReadVocab() {
				393	long long a, i = 0;
				394	char c;
				395	char word[MAX_STRING];
				396	FILE *fin = fopen(read_vocab_file, "rb");
				397	if (fin == NULL) {
				398	printf("Vocabulary file not found\n");
				399	exit(1);
				400	}
				401	for (a = 0; a < vocab_hash_size; a++)
				402	vocab_hash[a] = -1;
				403	vocab_size = 0;
				404	while (1) {
				405	ReadWord(word, fin);
				406	if (feof(fin))
				407	break;
				408	a = AddWordToVocab(word);
				409	fscanf(fin, "%lld%c", &vocab[a].cn, &c);
				410	i++;
				411	}
				412	SortVocab();
				413	if (debug_mode > 0) {
				414	printf("Vocab size: %lld\n", vocab_size);
				415	printf("Words in train file: %lld\n", train_words);
				416	}
				417	fin = fopen(train_file, "rb");
				418	if (fin == NULL) {
				419	printf("ERROR: training data file not found!\n");
				420	exit(1);
				421	}
				422	fseek(fin, 0, SEEK_END);
				423	file_size = ftell(fin);
				424	fclose(fin);
				425	}
				426
				427	void InitClassUnigramTable() {
				428	long long a, c;
				429	printf("loading class unigrams \n");
				430	FILE *fin = fopen(negative_classes_file, "rb");
				431	if (fin == NULL) {
				432	printf("ERROR: class file not found!\n");
				433	exit(1);
				434	}
				435	word_to_group = (int ) malloc(vocab_size sizeof(int));
				436	for (a = 0; a < vocab_size; a++)
				437	word_to_group[a] = -1;
				438	char class[MAX_STRING];
				439	char prev_class[MAX_STRING];
				440	prev_class[0] = 0;
				441	char word[MAX_STRING];
				442	class_number = -1;
				443	while (1) {
				444	if (feof(fin))
				445	break;
				446	ReadWord(class, fin);
				447	ReadWord(word, fin);
				448	int word_index = SearchVocab(word);
				449	if (word_index != -1) {
				450	if (strcmp(class, prev_class) != 0) {
				451	class_number++;
				452	strcpy(prev_class, class);
				453	}
				454	word_to_group[word_index] = class_number;
				455	}
				456	ReadWord(word, fin);
				457	}
				458	class_number++;
				459	fclose(fin);
				460
				461	group_to_table = (int ) malloc(table_size class_number * sizeof(int));
				462	long long train_words_pow = 0;
				463	real d1, power = 0.75;
				464
				465	for (c = 0; c < class_number; c++) {
				466	long long offset = c * table_size;
				467	train_words_pow = 0;
				468	for (a = 0; a < vocab_size; a++)
				469	if (word_to_group[a] == c)
				470	train_words_pow += pow(vocab[a].cn, power);
				471	int i = 0;
				472	while (word_to_group[i] != c && i < vocab_size)
				473	i++;
				474	d1 = pow(vocab[i].cn, power) / (real) train_words_pow;
				475	for (a = 0; a < table_size; a++) {
				476	//printf("index %lld , word %d\n", a, i);
				477	group_to_table[offset + a] = i;
				478	if (a / (real) table_size > d1) {
				479	i++;
				480	while (word_to_group[i] != c && i < vocab_size)
				481	i++;
				482	d1 += pow(vocab[i].cn, power) / (real) train_words_pow;
				483	}
				484	if (i >= vocab_size)
				485	while (word_to_group[i] != c && i >= 0)
				486	i--;
				487	}
				488	}
				489	}
				490
				491	void SaveNet() {
Marc Kupietz	313fcc5	2016-03-16 16:43:37 +0100	[diff] [blame^]	492	if(type != 3 \|\| negative <= 0) {
				493	fprintf(stderr, "save-net only supported for type 3 with negative sampling\n");
				494	return;
				495	}
				496
Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	497	FILE *fnet = fopen(save_net_file, "wb");
				498	if (fnet == NULL) {
				499	printf("Net parameter file not found\n");
				500	exit(1);
				501	}
Marc Kupietz	c697933	2016-03-16 15:29:07 +0100	[diff] [blame]	502	fwrite(syn0, sizeof(real), vocab_size * layer1_size, fnet);
Marc Kupietz	313fcc5	2016-03-16 16:43:37 +0100	[diff] [blame^]	503	fwrite(syn1neg_window, sizeof(real), vocab_size * window_layer_size, fnet);
Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	504	fclose(fnet);
				505	}
				506
				507	void InitNet() {
				508	long long a, b;
				509	unsigned long long next_random = 1;
				510	window_layer_size = layer1_size * window * 2;
				511	a = posix_memalign((void **) &syn0, 128,
				512	(long long) vocab_size * layer1_size * sizeof(real));
				513	if (syn0 == NULL) {
				514	printf("Memory allocation failed\n");
				515	exit(1);
				516	}
				517
				518	if (hs) {
				519	a = posix_memalign((void **) &syn1, 128,
				520	(long long) vocab_size * layer1_size * sizeof(real));
				521	if (syn1 == NULL) {
				522	printf("Memory allocation failed\n");
				523	exit(1);
				524	}
				525	a = posix_memalign((void **) &syn1_window, 128,
				526	(long long) vocab_size * window_layer_size * sizeof(real));
				527	if (syn1_window == NULL) {
				528	printf("Memory allocation failed\n");
				529	exit(1);
				530	}
				531	a = posix_memalign((void **) &syn_hidden_word, 128,
				532	(long long) vocab_size * window_hidden_size * sizeof(real));
				533	if (syn_hidden_word == NULL) {
				534	printf("Memory allocation failed\n");
				535	exit(1);
				536	}
				537
				538	for (a = 0; a < vocab_size; a++)
				539	for (b = 0; b < layer1_size; b++)
				540	syn1[a * layer1_size + b] = 0;
				541	for (a = 0; a < vocab_size; a++)
				542	for (b = 0; b < window_layer_size; b++)
				543	syn1_window[a * window_layer_size + b] = 0;
				544	for (a = 0; a < vocab_size; a++)
				545	for (b = 0; b < window_hidden_size; b++)
				546	syn_hidden_word[a * window_hidden_size + b] = 0;
				547	}
				548	if (negative > 0) {
Marc Kupietz	1006a27	2016-03-16 15:50:20 +0100	[diff] [blame]	549	if(type == 0) {
				550	a = posix_memalign((void **) &syn1neg, 128,
				551	(long long) vocab_size * layer1_size * sizeof(real));
				552	if (syn1neg == NULL) {
				553	printf("Memory allocation failed\n");
				554	exit(1);
				555	}
				556	for (a = 0; a < vocab_size; a++)
				557	for (b = 0; b < layer1_size; b++)
				558	syn1neg[a * layer1_size + b] = 0;
				559	} else if (type == 3) {
				560	a = posix_memalign((void **) &syn1neg_window, 128,
				561	(long long) vocab_size * window_layer_size * sizeof(real));
				562	if (syn1neg_window == NULL) {
				563	printf("Memory allocation failed\n");
				564	exit(1);
				565	}
				566	for (a = 0; a < vocab_size; a++)
				567	for (b = 0; b < window_layer_size; b++)
				568	syn1neg_window[a * window_layer_size + b] = 0;
				569	} else if (type == 4) {
				570	a = posix_memalign((void **) &syn_hidden_word_neg, 128,
				571	(long long) vocab_size * window_hidden_size * sizeof(real));
				572	if (syn_hidden_word_neg == NULL) {
				573	printf("Memory allocation failed\n");
				574	exit(1);
				575	}
				576	for (a = 0; a < vocab_size; a++)
				577	for (b = 0; b < window_hidden_size; b++)
				578	syn_hidden_word_neg[a * window_hidden_size + b] = 0;
Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	579	}
Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	580	}
				581	if (nce > 0) {
				582	a = posix_memalign((void **) &syn1nce, 128,
				583	(long long) vocab_size * layer1_size * sizeof(real));
				584	if (syn1nce == NULL) {
				585	printf("Memory allocation failed\n");
				586	exit(1);
				587	}
				588	a = posix_memalign((void **) &syn1nce_window, 128,
				589	(long long) vocab_size * window_layer_size * sizeof(real));
				590	if (syn1nce_window == NULL) {
				591	printf("Memory allocation failed\n");
				592	exit(1);
				593	}
				594	a = posix_memalign((void **) &syn_hidden_word_nce, 128,
				595	(long long) vocab_size * window_hidden_size * sizeof(real));
				596	if (syn_hidden_word_nce == NULL) {
				597	printf("Memory allocation failed\n");
				598	exit(1);
				599	}
				600
				601	for (a = 0; a < vocab_size; a++)
				602	for (b = 0; b < layer1_size; b++)
				603	syn1nce[a * layer1_size + b] = 0;
				604	for (a = 0; a < vocab_size; a++)
				605	for (b = 0; b < window_layer_size; b++)
				606	syn1nce_window[a * window_layer_size + b] = 0;
				607	for (a = 0; a < vocab_size; a++)
				608	for (b = 0; b < window_hidden_size; b++)
				609	syn_hidden_word_nce[a * window_hidden_size + b] = 0;
				610	}
Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	611
Marc Kupietz	1006a27	2016-03-16 15:50:20 +0100	[diff] [blame]	612	if(type == 4) {
Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	613	a = posix_memalign((void **) &syn_window_hidden, 128,
				614	window_hidden_size * window_layer_size * sizeof(real));
				615	if (syn_window_hidden == NULL) {
				616	printf("Memory allocation failed\n");
				617	exit(1);
				618	}
				619	for (a = 0; a < window_hidden_size * window_layer_size; a++) {
				620	next_random = next_random * (unsigned long long) 25214903917 + 11;
				621	syn_window_hidden[a] = (((next_random & 0xFFFF) / (real) 65536)
				622	- 0.5) / (window_hidden_size * window_layer_size);
				623	}
				624	}
Marc Kupietz	1006a27	2016-03-16 15:50:20 +0100	[diff] [blame]	625
				626	if (read_net_file[0] == 0) {
				627	for (a = 0; a < vocab_size; a++)
				628	for (b = 0; b < layer1_size; b++) {
				629	next_random = next_random * (unsigned long long) 25214903917
				630	+ 11;
				631	syn0[a * layer1_size + b] = (((next_random & 0xFFFF)
				632	/ (real) 65536) - 0.5) / layer1_size;
				633	}
Marc Kupietz	313fcc5	2016-03-16 16:43:37 +0100	[diff] [blame^]	634	} else if(type == 3 && negative > 0) {
Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	635	FILE *fnet = fopen(read_net_file, "rb");
				636	if (fnet == NULL) {
				637	printf("Net parameter file not found\n");
				638	exit(1);
				639	}
Marc Kupietz	c697933	2016-03-16 15:29:07 +0100	[diff] [blame]	640	fread(syn0, sizeof(real), vocab_size * layer1_size, fnet);
Marc Kupietz	313fcc5	2016-03-16 16:43:37 +0100	[diff] [blame^]	641	fread(syn1neg_window, sizeof(real), vocab_size * window_layer_size, fnet);
Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	642	fclose(fnet);
Marc Kupietz	313fcc5	2016-03-16 16:43:37 +0100	[diff] [blame^]	643	} else {
				644	fprintf(stderr, "read-net only supported for type 3 with negative sampling\n");
				645	exit(-1);
Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame]	646	}
				647
				648	CreateBinaryTree();
				649	}
				650
				651	void TrainModelThread(void id) {
				652	long long a, b, d, cw, word, last_word, sentence_length = 0,
				653	sentence_position = 0;
				654	long long word_count = 0, last_word_count = 0, sen[MAX_SENTENCE_LENGTH + 1];
				655	long long l1, l2, c, target, label, local_iter = iter;
				656	unsigned long long next_random = (long long) id;
				657	real f, g;
				658	clock_t now;
				659	int input_len_1 = layer1_size;
				660	int window_offset = -1;
				661	if (type == 2 \|\| type == 4) {
				662	input_len_1 = window_layer_size;
				663	}
				664	real neu1 = (real ) calloc(input_len_1, sizeof(real));
				665	real neu1e = (real ) calloc(input_len_1, sizeof(real));
				666
				667	int input_len_2 = 0;
				668	if (type == 4) {
				669	input_len_2 = window_hidden_size;
				670	}
				671	real neu2 = (real ) calloc(input_len_2, sizeof(real));
				672	real neu2e = (real ) calloc(input_len_2, sizeof(real));
				673
				674	FILE *fi = fopen(train_file, "rb");
				675	fseek(fi, file_size / (long long) num_threads * (long long) id, SEEK_SET);
				676	while (1) {
				677	if (word_count - last_word_count > 10000) {
				678	word_count_actual += word_count - last_word_count;
				679	last_word_count = word_count;
				680	if ((debug_mode > 1)) {
				681	now = clock();
				682	printf(
				683	"%cAlpha: %f Progress: %.2f%% Words/thread/sec: %.2fk ",
				684	13, alpha,
				685	word_count_actual / (real) (iter * train_words + 1)
				686	* 100,
				687	word_count_actual
				688	/ ((real) (now - start + 1)
				689	/ (real) CLOCKS_PER_SEC * 1000));
				690	fflush(stdout);
				691	}
				692	alpha = starting_alpha
				693	* (1 - word_count_actual / (real) (iter * train_words + 1));
				694	if (alpha < starting_alpha * 0.0001)
				695	alpha = starting_alpha * 0.0001;
				696	}
				697	if (sentence_length == 0) {
				698	while (1) {
				699	word = ReadWordIndex(fi);
				700	if (feof(fi))
				701	break;
				702	if (word == -1)
				703	continue;
				704	word_count++;
				705	if (word == 0)
				706	break;
				707	// The subsampling randomly discards frequent words while keeping the ranking same
				708	if (sample > 0) {
				709	real ran = (sqrt(vocab[word].cn / (sample * train_words))
				710	+ 1) * (sample * train_words) / vocab[word].cn;
				711	next_random = next_random * (unsigned long long) 25214903917
				712	+ 11;
				713	if (ran < (next_random & 0xFFFF) / (real) 65536)
				714	continue;
				715	}
				716	sen[sentence_length] = word;
				717	sentence_length++;
				718	if (sentence_length >= MAX_SENTENCE_LENGTH)
				719	break;
				720	}
				721	sentence_position = 0;
				722	}
				723	if (feof(fi) \|\| (word_count > train_words / num_threads)) {
				724	word_count_actual += word_count - last_word_count;
				725	local_iter--;
				726	if (local_iter == 0)
				727	break;
				728	word_count = 0;
				729	last_word_count = 0;
				730	sentence_length = 0;
				731	fseek(fi, file_size / (long long) num_threads * (long long) id,
				732	SEEK_SET);
				733	continue;
				734	}
				735	word = sen[sentence_position];
				736	if (word == -1)
				737	continue;
				738	for (c = 0; c < input_len_1; c++)
				739	neu1[c] = 0;
				740	for (c = 0; c < input_len_1; c++)
				741	neu1e[c] = 0;
				742	for (c = 0; c < input_len_2; c++)
				743	neu2[c] = 0;
				744	for (c = 0; c < input_len_2; c++)
				745	neu2e[c] = 0;
				746	next_random = next_random * (unsigned long long) 25214903917 + 11;
				747	b = next_random % window;
				748	if (type == 0) { //train the cbow architecture
				749	// in -> hidden
				750	cw = 0;
				751	for (a = b; a < window * 2 + 1 - b; a++)
				752	if (a != window) {
				753	c = sentence_position - window + a;
				754	if (c < 0)
				755	continue;
				756	if (c >= sentence_length)
				757	continue;
				758	last_word = sen[c];
				759	if (last_word == -1)
				760	continue;
				761	for (c = 0; c < layer1_size; c++)
				762	neu1[c] += syn0[c + last_word * layer1_size];
				763	cw++;
				764	}
				765	if (cw) {
				766	for (c = 0; c < layer1_size; c++)
				767	neu1[c] /= cw;
				768	if (hs)
				769	for (d = 0; d < vocab[word].codelen; d++) {
				770	f = 0;
				771	l2 = vocab[word].point[d] * layer1_size;
				772	// Propagate hidden -> output
				773	for (c = 0; c < layer1_size; c++)
				774	f += neu1[c] * syn1[c + l2];
				775	if (f <= -MAX_EXP)
				776	continue;
				777	else if (f >= MAX_EXP)
				778	continue;
				779	else
				780	f = expTable[(int) ((f + MAX_EXP)
				781	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				782	// 'g' is the gradient multiplied by the learning rate
				783	g = (1 - vocab[word].code[d] - f) * alpha;
				784	// Propagate errors output -> hidden
				785	for (c = 0; c < layer1_size; c++)
				786	neu1e[c] += g * syn1[c + l2];
				787	// Learn weights hidden -> output
				788	for (c = 0; c < layer1_size; c++)
				789	syn1[c + l2] += g * neu1[c];
				790	if (cap == 1)
				791	for (c = 0; c < layer1_size; c++)
				792	capParam(syn1, c + l2);
				793	}
				794	// NEGATIVE SAMPLING
				795	if (negative > 0)
				796	for (d = 0; d < negative + 1; d++) {
				797	if (d == 0) {
				798	target = word;
				799	label = 1;
				800	} else {
				801	next_random = next_random
				802	* (unsigned long long) 25214903917 + 11;
				803	if (word_to_group != NULL
				804	&& word_to_group[word] != -1) {
				805	target = word;
				806	while (target == word) {
				807	target = group_to_table[word_to_group[word]
				808	* table_size
				809	+ (next_random >> 16) % table_size];
				810	next_random = next_random
				811	* (unsigned long long) 25214903917
				812	+ 11;
				813	}
				814	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				815	} else {
				816	target =
				817	table[(next_random >> 16) % table_size];
				818	}
				819	if (target == 0)
				820	target = next_random % (vocab_size - 1) + 1;
				821	if (target == word)
				822	continue;
				823	label = 0;
				824	}
				825	l2 = target * layer1_size;
				826	f = 0;
				827	for (c = 0; c < layer1_size; c++)
				828	f += neu1[c] * syn1neg[c + l2];
				829	if (f > MAX_EXP)
				830	g = (label - 1) * alpha;
				831	else if (f < -MAX_EXP)
				832	g = (label - 0) * alpha;
				833	else
				834	g = (label
				835	- expTable[(int) ((f + MAX_EXP)
				836	* (EXP_TABLE_SIZE / MAX_EXP / 2))])
				837	* alpha;
				838	for (c = 0; c < layer1_size; c++)
				839	neu1e[c] += g * syn1neg[c + l2];
				840	for (c = 0; c < layer1_size; c++)
				841	syn1neg[c + l2] += g * neu1[c];
				842	if (cap == 1)
				843	for (c = 0; c < layer1_size; c++)
				844	capParam(syn1neg, c + l2);
				845	}
				846	// Noise Contrastive Estimation
				847	if (nce > 0)
				848	for (d = 0; d < nce + 1; d++) {
				849	if (d == 0) {
				850	target = word;
				851	label = 1;
				852	} else {
				853	next_random = next_random
				854	* (unsigned long long) 25214903917 + 11;
				855	if (word_to_group != NULL
				856	&& word_to_group[word] != -1) {
				857	target = word;
				858	while (target == word) {
				859	target = group_to_table[word_to_group[word]
				860	* table_size
				861	+ (next_random >> 16) % table_size];
				862	next_random = next_random
				863	* (unsigned long long) 25214903917
				864	+ 11;
				865	}
				866	} else {
				867	target =
				868	table[(next_random >> 16) % table_size];
				869	}
				870	if (target == 0)
				871	target = next_random % (vocab_size - 1) + 1;
				872	if (target == word)
				873	continue;
				874	label = 0;
				875	}
				876	l2 = target * layer1_size;
				877	f = 0;
				878
				879	for (c = 0; c < layer1_size; c++)
				880	f += neu1[c] * syn1nce[c + l2];
				881	if (f > MAX_EXP)
				882	g = (label - 1) * alpha;
				883	else if (f < -MAX_EXP)
				884	g = (label - 0) * alpha;
				885	else {
				886	f = exp(f);
				887	g =
				888	(label
				889	- f
				890	/ (noise_distribution[target]
				891	* nce + f)) * alpha;
				892	}
				893	for (c = 0; c < layer1_size; c++)
				894	neu1e[c] += g * syn1nce[c + l2];
				895	for (c = 0; c < layer1_size; c++)
				896	syn1nce[c + l2] += g * neu1[c];
				897	if (cap == 1)
				898	for (c = 0; c < layer1_size; c++)
				899	capParam(syn1nce, c + l2);
				900	}
				901	// hidden -> in
				902	for (a = b; a < window * 2 + 1 - b; a++)
				903	if (a != window) {
				904	c = sentence_position - window + a;
				905	if (c < 0)
				906	continue;
				907	if (c >= sentence_length)
				908	continue;
				909	last_word = sen[c];
				910	if (last_word == -1)
				911	continue;
				912	for (c = 0; c < layer1_size; c++)
				913	syn0[c + last_word * layer1_size] += neu1e[c];
				914	}
				915	}
				916	} else if (type == 1) { //train skip-gram
				917	for (a = b; a < window * 2 + 1 - b; a++)
				918	if (a != window) {
				919	c = sentence_position - window + a;
				920	if (c < 0)
				921	continue;
				922	if (c >= sentence_length)
				923	continue;
				924	last_word = sen[c];
				925	if (last_word == -1)
				926	continue;
				927	l1 = last_word * layer1_size;
				928	for (c = 0; c < layer1_size; c++)
				929	neu1e[c] = 0;
				930	// HIERARCHICAL SOFTMAX
				931	if (hs)
				932	for (d = 0; d < vocab[word].codelen; d++) {
				933	f = 0;
				934	l2 = vocab[word].point[d] * layer1_size;
				935	// Propagate hidden -> output
				936	for (c = 0; c < layer1_size; c++)
				937	f += syn0[c + l1] * syn1[c + l2];
				938	if (f <= -MAX_EXP)
				939	continue;
				940	else if (f >= MAX_EXP)
				941	continue;
				942	else
				943	f = expTable[(int) ((f + MAX_EXP)
				944	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				945	// 'g' is the gradient multiplied by the learning rate
				946	g = (1 - vocab[word].code[d] - f) * alpha;
				947	// Propagate errors output -> hidden
				948	for (c = 0; c < layer1_size; c++)
				949	neu1e[c] += g * syn1[c + l2];
				950	// Learn weights hidden -> output
				951	for (c = 0; c < layer1_size; c++)
				952	syn1[c + l2] += g * syn0[c + l1];
				953	if (cap == 1)
				954	for (c = 0; c < layer1_size; c++)
				955	capParam(syn1, c + l2);
				956	}
				957	// NEGATIVE SAMPLING
				958	if (negative > 0)
				959	for (d = 0; d < negative + 1; d++) {
				960	if (d == 0) {
				961	target = word;
				962	label = 1;
				963	} else {
				964	next_random = next_random
				965	* (unsigned long long) 25214903917 + 11;
				966	if (word_to_group != NULL
				967	&& word_to_group[word] != -1) {
				968	target = word;
				969	while (target == word) {
				970	target =
				971	group_to_table[word_to_group[word]
				972	* table_size
				973	+ (next_random >> 16)
				974	% table_size];
				975	next_random =
				976	next_random
				977	* (unsigned long long) 25214903917
				978	+ 11;
				979	}
				980	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				981	} else {
				982	target = table[(next_random >> 16)
				983	% table_size];
				984	}
				985	if (target == 0)
				986	target = next_random % (vocab_size - 1) + 1;
				987	if (target == word)
				988	continue;
				989	label = 0;
				990	}
				991	l2 = target * layer1_size;
				992	f = 0;
				993	for (c = 0; c < layer1_size; c++)
				994	f += syn0[c + l1] * syn1neg[c + l2];
				995	if (f > MAX_EXP)
				996	g = (label - 1) * alpha;
				997	else if (f < -MAX_EXP)
				998	g = (label - 0) * alpha;
				999	else
				1000	g =
				1001	(label
				1002	- expTable[(int) ((f + MAX_EXP)
				1003	* (EXP_TABLE_SIZE
				1004	/ MAX_EXP / 2))])
				1005	* alpha;
				1006	for (c = 0; c < layer1_size; c++)
				1007	neu1e[c] += g * syn1neg[c + l2];
				1008	for (c = 0; c < layer1_size; c++)
				1009	syn1neg[c + l2] += g * syn0[c + l1];
				1010	if (cap == 1)
				1011	for (c = 0; c < layer1_size; c++)
				1012	capParam(syn1neg, c + l2);
				1013	}
				1014	//Noise Contrastive Estimation
				1015	if (nce > 0)
				1016	for (d = 0; d < nce + 1; d++) {
				1017	if (d == 0) {
				1018	target = word;
				1019	label = 1;
				1020	} else {
				1021	next_random = next_random
				1022	* (unsigned long long) 25214903917 + 11;
				1023	if (word_to_group != NULL
				1024	&& word_to_group[word] != -1) {
				1025	target = word;
				1026	while (target == word) {
				1027	target =
				1028	group_to_table[word_to_group[word]
				1029	* table_size
				1030	+ (next_random >> 16)
				1031	% table_size];
				1032	next_random =
				1033	next_random
				1034	* (unsigned long long) 25214903917
				1035	+ 11;
				1036	}
				1037	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1038	} else {
				1039	target = table[(next_random >> 16)
				1040	% table_size];
				1041	}
				1042	if (target == 0)
				1043	target = next_random % (vocab_size - 1) + 1;
				1044	if (target == word)
				1045	continue;
				1046	label = 0;
				1047	}
				1048	l2 = target * layer1_size;
				1049	f = 0;
				1050	for (c = 0; c < layer1_size; c++)
				1051	f += syn0[c + l1] * syn1nce[c + l2];
				1052	if (f > MAX_EXP)
				1053	g = (label - 1) * alpha;
				1054	else if (f < -MAX_EXP)
				1055	g = (label - 0) * alpha;
				1056	else {
				1057	f = exp(f);
				1058	g = (label
				1059	- f
				1060	/ (noise_distribution[target]
				1061	* nce + f)) * alpha;
				1062	}
				1063	for (c = 0; c < layer1_size; c++)
				1064	neu1e[c] += g * syn1nce[c + l2];
				1065	for (c = 0; c < layer1_size; c++)
				1066	syn1nce[c + l2] += g * syn0[c + l1];
				1067	if (cap == 1)
				1068	for (c = 0; c < layer1_size; c++)
				1069	capParam(syn1nce, c + l2);
				1070	}
				1071	// Learn weights input -> hidden
				1072	for (c = 0; c < layer1_size; c++)
				1073	syn0[c + l1] += neu1e[c];
				1074	}
				1075	} else if (type == 2) { //train the cwindow architecture
				1076	// in -> hidden
				1077	cw = 0;
				1078	for (a = 0; a < window * 2 + 1; a++)
				1079	if (a != window) {
				1080	c = sentence_position - window + a;
				1081	if (c < 0)
				1082	continue;
				1083	if (c >= sentence_length)
				1084	continue;
				1085	last_word = sen[c];
				1086	if (last_word == -1)
				1087	continue;
				1088	window_offset = a * layer1_size;
				1089	if (a > window)
				1090	window_offset -= layer1_size;
				1091	for (c = 0; c < layer1_size; c++)
				1092	neu1[c + window_offset] += syn0[c
				1093	+ last_word * layer1_size];
				1094	cw++;
				1095	}
				1096	if (cw) {
				1097	if (hs)
				1098	for (d = 0; d < vocab[word].codelen; d++) {
				1099	f = 0;
				1100	l2 = vocab[word].point[d] * window_layer_size;
				1101	// Propagate hidden -> output
				1102	for (c = 0; c < window_layer_size; c++)
				1103	f += neu1[c] * syn1_window[c + l2];
				1104	if (f <= -MAX_EXP)
				1105	continue;
				1106	else if (f >= MAX_EXP)
				1107	continue;
				1108	else
				1109	f = expTable[(int) ((f + MAX_EXP)
				1110	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1111	// 'g' is the gradient multiplied by the learning rate
				1112	g = (1 - vocab[word].code[d] - f) * alpha;
				1113	// Propagate errors output -> hidden
				1114	for (c = 0; c < window_layer_size; c++)
				1115	neu1e[c] += g * syn1_window[c + l2];
				1116	// Learn weights hidden -> output
				1117	for (c = 0; c < window_layer_size; c++)
				1118	syn1_window[c + l2] += g * neu1[c];
				1119	if (cap == 1)
				1120	for (c = 0; c < window_layer_size; c++)
				1121	capParam(syn1_window, c + l2);
				1122	}
				1123	// NEGATIVE SAMPLING
				1124	if (negative > 0)
				1125	for (d = 0; d < negative + 1; d++) {
				1126	if (d == 0) {
				1127	target = word;
				1128	label = 1;
				1129	} else {
				1130	next_random = next_random
				1131	* (unsigned long long) 25214903917 + 11;
				1132	if (word_to_group != NULL
				1133	&& word_to_group[word] != -1) {
				1134	target = word;
				1135	while (target == word) {
				1136	target = group_to_table[word_to_group[word]
				1137	* table_size
				1138	+ (next_random >> 16) % table_size];
				1139	next_random = next_random
				1140	* (unsigned long long) 25214903917
				1141	+ 11;
				1142	}
				1143	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1144	} else {
				1145	target =
				1146	table[(next_random >> 16) % table_size];
				1147	}
				1148	if (target == 0)
				1149	target = next_random % (vocab_size - 1) + 1;
				1150	if (target == word)
				1151	continue;
				1152	label = 0;
				1153	}
				1154	l2 = target * window_layer_size;
				1155	f = 0;
				1156	for (c = 0; c < window_layer_size; c++)
				1157	f += neu1[c] * syn1neg_window[c + l2];
				1158	if (f > MAX_EXP)
				1159	g = (label - 1) * alpha;
				1160	else if (f < -MAX_EXP)
				1161	g = (label - 0) * alpha;
				1162	else
				1163	g = (label
				1164	- expTable[(int) ((f + MAX_EXP)
				1165	* (EXP_TABLE_SIZE / MAX_EXP / 2))])
				1166	* alpha;
				1167	for (c = 0; c < window_layer_size; c++)
				1168	neu1e[c] += g * syn1neg_window[c + l2];
				1169	for (c = 0; c < window_layer_size; c++)
				1170	syn1neg_window[c + l2] += g * neu1[c];
				1171	if (cap == 1)
				1172	for (c = 0; c < window_layer_size; c++)
				1173	capParam(syn1neg_window, c + l2);
				1174	}
				1175	// Noise Contrastive Estimation
				1176	if (nce > 0)
				1177	for (d = 0; d < nce + 1; d++) {
				1178	if (d == 0) {
				1179	target = word;
				1180	label = 1;
				1181	} else {
				1182	next_random = next_random
				1183	* (unsigned long long) 25214903917 + 11;
				1184	if (word_to_group != NULL
				1185	&& word_to_group[word] != -1) {
				1186	target = word;
				1187	while (target == word) {
				1188	target = group_to_table[word_to_group[word]
				1189	* table_size
				1190	+ (next_random >> 16) % table_size];
				1191	next_random = next_random
				1192	* (unsigned long long) 25214903917
				1193	+ 11;
				1194	}
				1195	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1196	} else {
				1197	target =
				1198	table[(next_random >> 16) % table_size];
				1199	}
				1200	if (target == 0)
				1201	target = next_random % (vocab_size - 1) + 1;
				1202	if (target == word)
				1203	continue;
				1204	label = 0;
				1205	}
				1206	l2 = target * window_layer_size;
				1207	f = 0;
				1208	for (c = 0; c < window_layer_size; c++)
				1209	f += neu1[c] * syn1nce_window[c + l2];
				1210	if (f > MAX_EXP)
				1211	g = (label - 1) * alpha;
				1212	else if (f < -MAX_EXP)
				1213	g = (label - 0) * alpha;
				1214	else {
				1215	f = exp(f);
				1216	g =
				1217	(label
				1218	- f
				1219	/ (noise_distribution[target]
				1220	* nce + f)) * alpha;
				1221	}
				1222	for (c = 0; c < window_layer_size; c++)
				1223	neu1e[c] += g * syn1nce_window[c + l2];
				1224	for (c = 0; c < window_layer_size; c++)
				1225	syn1nce_window[c + l2] += g * neu1[c];
				1226	if (cap == 1)
				1227	for (c = 0; c < window_layer_size; c++)
				1228	capParam(syn1nce_window, c + l2);
				1229	}
				1230	// hidden -> in
				1231	for (a = 0; a < window * 2 + 1; a++)
				1232	if (a != window) {
				1233	c = sentence_position - window + a;
				1234	if (c < 0)
				1235	continue;
				1236	if (c >= sentence_length)
				1237	continue;
				1238	last_word = sen[c];
				1239	if (last_word == -1)
				1240	continue;
				1241	window_offset = a * layer1_size;
				1242	if (a > window)
				1243	window_offset -= layer1_size;
				1244	for (c = 0; c < layer1_size; c++)
				1245	syn0[c + last_word * layer1_size] += neu1e[c
				1246	+ window_offset];
				1247	}
				1248	}
				1249	} else if (type == 3) { //train structured skip-gram
				1250	for (a = 0; a < window * 2 + 1; a++)
				1251	if (a != window) {
				1252	c = sentence_position - window + a;
				1253	if (c < 0)
				1254	continue;
				1255	if (c >= sentence_length)
				1256	continue;
				1257	last_word = sen[c];
				1258	if (last_word == -1)
				1259	continue;
				1260	l1 = last_word * layer1_size;
				1261	window_offset = a * layer1_size;
				1262	if (a > window)
				1263	window_offset -= layer1_size;
				1264	for (c = 0; c < layer1_size; c++)
				1265	neu1e[c] = 0;
				1266	// HIERARCHICAL SOFTMAX
				1267	if (hs)
				1268	for (d = 0; d < vocab[word].codelen; d++) {
				1269	f = 0;
				1270	l2 = vocab[word].point[d] * window_layer_size;
				1271	// Propagate hidden -> output
				1272	for (c = 0; c < layer1_size; c++)
				1273	f += syn0[c + l1]
				1274	* syn1_window[c + l2 + window_offset];
				1275	if (f <= -MAX_EXP)
				1276	continue;
				1277	else if (f >= MAX_EXP)
				1278	continue;
				1279	else
				1280	f = expTable[(int) ((f + MAX_EXP)
				1281	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1282	// 'g' is the gradient multiplied by the learning rate
				1283	g = (1 - vocab[word].code[d] - f) * alpha;
				1284	// Propagate errors output -> hidden
				1285	for (c = 0; c < layer1_size; c++)
				1286	neu1e[c] += g
				1287	* syn1_window[c + l2 + window_offset];
				1288	// Learn weights hidden -> output
				1289	for (c = 0; c < layer1_size; c++)
				1290	syn1[c + l2 + window_offset] += g
				1291	* syn0[c + l1];
				1292	if (cap == 1)
				1293	for (c = 0; c < layer1_size; c++)
				1294	capParam(syn1, c + l2 + window_offset);
				1295	}
				1296	// NEGATIVE SAMPLING
				1297	if (negative > 0)
				1298	for (d = 0; d < negative + 1; d++) {
				1299	if (d == 0) {
				1300	target = word;
				1301	label = 1;
				1302	} else {
				1303	next_random = next_random
				1304	* (unsigned long long) 25214903917 + 11;
				1305	if (word_to_group != NULL
				1306	&& word_to_group[word] != -1) {
				1307	target = word;
				1308	while (target == word) {
				1309	target =
				1310	group_to_table[word_to_group[word]
				1311	* table_size
				1312	+ (next_random >> 16)
				1313	% table_size];
				1314	next_random =
				1315	next_random
				1316	* (unsigned long long) 25214903917
				1317	+ 11;
				1318	}
				1319	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1320	} else {
				1321	target = table[(next_random >> 16)
				1322	% table_size];
				1323	}
				1324	if (target == 0)
				1325	target = next_random % (vocab_size - 1) + 1;
				1326	if (target == word)
				1327	continue;
				1328	label = 0;
				1329	}
				1330	l2 = target * window_layer_size;
				1331	f = 0;
				1332	for (c = 0; c < layer1_size; c++)
				1333	f +=
				1334	syn0[c + l1]
				1335	* syn1neg_window[c + l2
				1336	+ window_offset];
				1337	if (f > MAX_EXP)
				1338	g = (label - 1) * alpha;
				1339	else if (f < -MAX_EXP)
				1340	g = (label - 0) * alpha;
				1341	else
				1342	g =
				1343	(label
				1344	- expTable[(int) ((f + MAX_EXP)
				1345	* (EXP_TABLE_SIZE
				1346	/ MAX_EXP / 2))])
				1347	* alpha;
				1348	for (c = 0; c < layer1_size; c++)
				1349	neu1e[c] +=
				1350	g
				1351	* syn1neg_window[c + l2
				1352	+ window_offset];
				1353	for (c = 0; c < layer1_size; c++)
				1354	syn1neg_window[c + l2 + window_offset] += g
				1355	* syn0[c + l1];
				1356	if (cap == 1)
				1357	for (c = 0; c < layer1_size; c++)
				1358	capParam(syn1neg_window,
				1359	c + l2 + window_offset);
				1360	}
				1361	// Noise Constrastive Estimation
				1362	if (nce > 0)
				1363	for (d = 0; d < nce + 1; d++) {
				1364	if (d == 0) {
				1365	target = word;
				1366	label = 1;
				1367	} else {
				1368	next_random = next_random
				1369	* (unsigned long long) 25214903917 + 11;
				1370	if (word_to_group != NULL
				1371	&& word_to_group[word] != -1) {
				1372	target = word;
				1373	while (target == word) {
				1374	target =
				1375	group_to_table[word_to_group[word]
				1376	* table_size
				1377	+ (next_random >> 16)
				1378	% table_size];
				1379	next_random =
				1380	next_random
				1381	* (unsigned long long) 25214903917
				1382	+ 11;
				1383	}
				1384	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1385	} else {
				1386	target = table[(next_random >> 16)
				1387	% table_size];
				1388	}
				1389	if (target == 0)
				1390	target = next_random % (vocab_size - 1) + 1;
				1391	if (target == word)
				1392	continue;
				1393	label = 0;
				1394	}
				1395	l2 = target * window_layer_size;
				1396	f = 0;
				1397	for (c = 0; c < layer1_size; c++)
				1398	f +=
				1399	syn0[c + l1]
				1400	* syn1nce_window[c + l2
				1401	+ window_offset];
				1402	if (f > MAX_EXP)
				1403	g = (label - 1) * alpha;
				1404	else if (f < -MAX_EXP)
				1405	g = (label - 0) * alpha;
				1406	else {
				1407	f = exp(f);
				1408	g = (label
				1409	- f
				1410	/ (noise_distribution[target]
				1411	* nce + f)) * alpha;
				1412	}
				1413	for (c = 0; c < layer1_size; c++)
				1414	neu1e[c] +=
				1415	g
				1416	* syn1nce_window[c + l2
				1417	+ window_offset];
				1418	for (c = 0; c < layer1_size; c++)
				1419	syn1nce_window[c + l2 + window_offset] += g
				1420	* syn0[c + l1];
				1421	if (cap == 1)
				1422	for (c = 0; c < layer1_size; c++)
				1423	capParam(syn1nce_window,
				1424	c + l2 + window_offset);
				1425	}
				1426	// Learn weights input -> hidden
				1427	for (c = 0; c < layer1_size; c++) {
				1428	syn0[c + l1] += neu1e[c];
				1429	if (syn0[c + l1] > 50)
				1430	syn0[c + l1] = 50;
				1431	if (syn0[c + l1] < -50)
				1432	syn0[c + l1] = -50;
				1433	}
				1434	}
				1435	} else if (type == 4) { //training senna
				1436	// in -> hidden
				1437	cw = 0;
				1438	for (a = 0; a < window * 2 + 1; a++)
				1439	if (a != window) {
				1440	c = sentence_position - window + a;
				1441	if (c < 0)
				1442	continue;
				1443	if (c >= sentence_length)
				1444	continue;
				1445	last_word = sen[c];
				1446	if (last_word == -1)
				1447	continue;
				1448	window_offset = a * layer1_size;
				1449	if (a > window)
				1450	window_offset -= layer1_size;
				1451	for (c = 0; c < layer1_size; c++)
				1452	neu1[c + window_offset] += syn0[c
				1453	+ last_word * layer1_size];
				1454	cw++;
				1455	}
				1456	if (cw) {
				1457	for (a = 0; a < window_hidden_size; a++) {
				1458	c = a * window_layer_size;
				1459	for (b = 0; b < window_layer_size; b++) {
				1460	neu2[a] += syn_window_hidden[c + b] * neu1[b];
				1461	}
				1462	}
				1463	if (hs)
				1464	for (d = 0; d < vocab[word].codelen; d++) {
				1465	f = 0;
				1466	l2 = vocab[word].point[d] * window_hidden_size;
				1467	// Propagate hidden -> output
				1468	for (c = 0; c < window_hidden_size; c++)
				1469	f += hardTanh(neu2[c]) * syn_hidden_word[c + l2];
				1470	if (f <= -MAX_EXP)
				1471	continue;
				1472	else if (f >= MAX_EXP)
				1473	continue;
				1474	else
				1475	f = expTable[(int) ((f + MAX_EXP)
				1476	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1477	// 'g' is the gradient multiplied by the learning rate
				1478	g = (1 - vocab[word].code[d] - f) * alpha;
				1479	// Propagate errors output -> hidden
				1480	for (c = 0; c < window_hidden_size; c++)
				1481	neu2e[c] += dHardTanh(neu2[c], g) * g
				1482	* syn_hidden_word[c + l2];
				1483	// Learn weights hidden -> output
				1484	for (c = 0; c < window_hidden_size; c++)
				1485	syn_hidden_word[c + l2] += dHardTanh(neu2[c], g) * g
				1486	* neu2[c];
				1487	}
				1488	// NEGATIVE SAMPLING
				1489	if (negative > 0)
				1490	for (d = 0; d < negative + 1; d++) {
				1491	if (d == 0) {
				1492	target = word;
				1493	label = 1;
				1494	} else {
				1495	next_random = next_random
				1496	* (unsigned long long) 25214903917 + 11;
				1497	if (word_to_group != NULL
				1498	&& word_to_group[word] != -1) {
				1499	target = word;
				1500	while (target == word) {
				1501	target = group_to_table[word_to_group[word]
				1502	* table_size
				1503	+ (next_random >> 16) % table_size];
				1504	next_random = next_random
				1505	* (unsigned long long) 25214903917
				1506	+ 11;
				1507	}
				1508	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1509	} else {
				1510	target =
				1511	table[(next_random >> 16) % table_size];
				1512	}
				1513	if (target == 0)
				1514	target = next_random % (vocab_size - 1) + 1;
				1515	if (target == word)
				1516	continue;
				1517	label = 0;
				1518	}
				1519	l2 = target * window_hidden_size;
				1520	f = 0;
				1521	for (c = 0; c < window_hidden_size; c++)
				1522	f += hardTanh(neu2[c])
				1523	* syn_hidden_word_neg[c + l2];
				1524	if (f > MAX_EXP)
				1525	g = (label - 1) * alpha / negative;
				1526	else if (f < -MAX_EXP)
				1527	g = (label - 0) * alpha / negative;
				1528	else
				1529	g = (label
				1530	- expTable[(int) ((f + MAX_EXP)
				1531	* (EXP_TABLE_SIZE / MAX_EXP / 2))])
				1532	* alpha / negative;
				1533	for (c = 0; c < window_hidden_size; c++)
				1534	neu2e[c] += dHardTanh(neu2[c], g) * g
				1535	* syn_hidden_word_neg[c + l2];
				1536	for (c = 0; c < window_hidden_size; c++)
				1537	syn_hidden_word_neg[c + l2] += dHardTanh(neu2[c], g)
				1538	* g * neu2[c];
				1539	}
				1540	for (a = 0; a < window_hidden_size; a++)
				1541	for (b = 0; b < window_layer_size; b++)
				1542	neu1e[b] += neu2e[a]
				1543	* syn_window_hidden[a * window_layer_size + b];
				1544	for (a = 0; a < window_hidden_size; a++)
				1545	for (b = 0; b < window_layer_size; b++)
				1546	syn_window_hidden[a * window_layer_size + b] += neu2e[a]
				1547	* neu1[b];
				1548	// hidden -> in
				1549	for (a = 0; a < window * 2 + 1; a++)
				1550	if (a != window) {
				1551	c = sentence_position - window + a;
				1552	if (c < 0)
				1553	continue;
				1554	if (c >= sentence_length)
				1555	continue;
				1556	last_word = sen[c];
				1557	if (last_word == -1)
				1558	continue;
				1559	window_offset = a * layer1_size;
				1560	if (a > window)
				1561	window_offset -= layer1_size;
				1562	for (c = 0; c < layer1_size; c++)
				1563	syn0[c + last_word * layer1_size] += neu1e[c
				1564	+ window_offset];
				1565	}
				1566	}
				1567	} else {
				1568	printf("unknown type %i", type);
				1569	exit(0);
				1570	}
				1571	sentence_position++;
				1572	if (sentence_position >= sentence_length) {
				1573	sentence_length = 0;
				1574	continue;
				1575	}
				1576	}
				1577	fclose(fi);
				1578	free(neu1);
				1579	free(neu1e);
				1580	pthread_exit(NULL);
				1581	}
				1582
				1583	void TrainModel() {
				1584	long a, b, c, d;
				1585	FILE *fo;
				1586	pthread_t pt = (pthread_t ) malloc(num_threads * sizeof(pthread_t));
				1587	printf("Starting training using file %s\n", train_file);
				1588	starting_alpha = alpha;
				1589	if (read_vocab_file[0] != 0)
				1590	ReadVocab();
				1591	else
				1592	LearnVocabFromTrainFile();
				1593	if (save_vocab_file[0] != 0)
				1594	SaveVocab();
				1595	if (output_file[0] == 0)
				1596	return;
				1597	InitNet();
				1598	if (negative > 0 \|\| nce > 0)
				1599	InitUnigramTable();
				1600	if (negative_classes_file[0] != 0)
				1601	InitClassUnigramTable();
				1602	start = clock();
				1603	for (a = 0; a < num_threads; a++)
				1604	pthread_create(&pt[a], NULL, TrainModelThread, (void *) a);
				1605	for (a = 0; a < num_threads; a++)
				1606	pthread_join(pt[a], NULL);
				1607	fo = fopen(output_file, "wb");
				1608	if (classes == 0) {
				1609	// Save the word vectors
				1610	fprintf(fo, "%lld %lld\n", vocab_size, layer1_size);
				1611	for (a = 0; a < vocab_size; a++) {
				1612	fprintf(fo, "%s ", vocab[a].word);
				1613	if (binary)
				1614	for (b = 0; b < layer1_size; b++)
				1615	fwrite(&syn0[a * layer1_size + b], sizeof(real), 1, fo);
				1616	else
				1617	for (b = 0; b < layer1_size; b++)
				1618	fprintf(fo, "%lf ", syn0[a * layer1_size + b]);
				1619	fprintf(fo, "\n");
				1620	}
				1621	} else {
				1622	// Run K-means on the word vectors
				1623	int clcn = classes, iter = 10, closeid;
				1624	int centcn = (int ) malloc(classes * sizeof(int));
				1625	int cl = (int ) calloc(vocab_size, sizeof(int));
				1626	real closev, x;
				1627	real cent = (real ) calloc(classes * layer1_size, sizeof(real));
				1628	for (a = 0; a < vocab_size; a++)
				1629	cl[a] = a % clcn;
				1630	for (a = 0; a < iter; a++) {
				1631	for (b = 0; b < clcn * layer1_size; b++)
				1632	cent[b] = 0;
				1633	for (b = 0; b < clcn; b++)
				1634	centcn[b] = 1;
				1635	for (c = 0; c < vocab_size; c++) {
				1636	for (d = 0; d < layer1_size; d++)
				1637	cent[layer1_size * cl[c] + d] += syn0[c * layer1_size + d];
				1638	centcn[cl[c]]++;
				1639	}
				1640	for (b = 0; b < clcn; b++) {
				1641	closev = 0;
				1642	for (c = 0; c < layer1_size; c++) {
				1643	cent[layer1_size * b + c] /= centcn[b];
				1644	closev += cent[layer1_size * b + c]
				1645	* cent[layer1_size * b + c];
				1646	}
				1647	closev = sqrt(closev);
				1648	for (c = 0; c < layer1_size; c++)
				1649	cent[layer1_size * b + c] /= closev;
				1650	}
				1651	for (c = 0; c < vocab_size; c++) {
				1652	closev = -10;
				1653	closeid = 0;
				1654	for (d = 0; d < clcn; d++) {
				1655	x = 0;
				1656	for (b = 0; b < layer1_size; b++)
				1657	x += cent[layer1_size * d + b]
				1658	* syn0[c * layer1_size + b];
				1659	if (x > closev) {
				1660	closev = x;
				1661	closeid = d;
				1662	}
				1663	}
				1664	cl[c] = closeid;
				1665	}
				1666	}
				1667	// Save the K-means classes
				1668	for (a = 0; a < vocab_size; a++)
				1669	fprintf(fo, "%s %d\n", vocab[a].word, cl[a]);
				1670	free(centcn);
				1671	free(cent);
				1672	free(cl);
				1673	}
				1674	fclose(fo);
				1675	if (save_net_file[0] != 0)
				1676	SaveNet();
				1677	}
				1678
				1679	int ArgPos(char str, int argc, char *argv) {
				1680	int a;
				1681	for (a = 1; a < argc; a++)
				1682	if (!strcmp(str, argv[a])) {
				1683	if (a == argc - 1) {
				1684	printf("Argument missing for %s\n", str);
				1685	exit(1);
				1686	}
				1687	return a;
				1688	}
				1689	return -1;
				1690	}
				1691
				1692	int main(int argc, char **argv) {
				1693	int i;
				1694	if (argc == 1) {
				1695	printf("WORD VECTOR estimation toolkit v 0.1c\n\n");
				1696	printf("Options:\n");
				1697	printf("Parameters for training:\n");
				1698	printf("\t-train <file>\n");
				1699	printf("\t\tUse text data from <file> to train the model\n");
				1700	printf("\t-output <file>\n");
				1701	printf(
				1702	"\t\tUse <file> to save the resulting word vectors / word clusters\n");
				1703	printf("\t-size <int>\n");
				1704	printf("\t\tSet size of word vectors; default is 100\n");
				1705	printf("\t-window <int>\n");
				1706	printf("\t\tSet max skip length between words; default is 5\n");
				1707	printf("\t-sample <float>\n");
				1708	printf(
				1709	"\t\tSet threshold for occurrence of words. Those that appear with higher frequency in the training data\n");
				1710	printf(
				1711	"\t\twill be randomly down-sampled; default is 1e-3, useful range is (0, 1e-5)\n");
				1712	printf("\t-hs <int>\n");
				1713	printf("\t\tUse Hierarchical Softmax; default is 0 (not used)\n");
				1714	printf("\t-negative <int>\n");
				1715	printf(
				1716	"\t\tNumber of negative examples; default is 5, common values are 3 - 10 (0 = not used)\n");
				1717	printf("\t-negative-classes <file>\n");
				1718	printf("\t\tNegative classes to sample from\n");
				1719	printf("\t-nce <int>\n");
				1720	printf(
				1721	"\t\tNumber of negative examples for nce; default is 0, common values are 3 - 10 (0 = not used)\n");
				1722	printf("\t-threads <int>\n");
				1723	printf("\t\tUse <int> threads (default 12)\n");
				1724	printf("\t-iter <int>\n");
				1725	printf("\t\tRun more training iterations (default 5)\n");
				1726	printf("\t-min-count <int>\n");
				1727	printf(
				1728	"\t\tThis will discard words that appear less than <int> times; default is 5\n");
				1729	printf("\t-alpha <float>\n");
				1730	printf(
				1731	"\t\tSet the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW\n");
				1732	printf("\t-classes <int>\n");
				1733	printf(
				1734	"\t\tOutput word classes rather than word vectors; default number of classes is 0 (vectors are written)\n");
				1735	printf("\t-debug <int>\n");
				1736	printf(
				1737	"\t\tSet the debug mode (default = 2 = more info during training)\n");
				1738	printf("\t-binary <int>\n");
				1739	printf(
				1740	"\t\tSave the resulting vectors in binary moded; default is 0 (off)\n");
				1741	printf("\t-save-vocab <file>\n");
				1742	printf("\t\tThe vocabulary will be saved to <file>\n");
				1743	printf("\t-read-vocab <file>\n");
				1744	printf(
				1745	"\t\tThe vocabulary will be read from <file>, not constructed from the training data\n");
				1746	printf("\t-read-net <file>\n");
				1747	printf(
				1748	"\t\tThe net parameters will be read from <file>, not initialized randomly\n");
				1749	printf("\t-save-net <file>\n");
				1750	printf("\t\tThe net parameters will be saved to <file>\n");
				1751	printf("\t-type <int>\n");
				1752	printf(
				1753	"\t\tType of embeddings (0 for cbow, 1 for skipngram, 2 for cwindow, 3 for structured skipngram, 4 for senna type)\n");
				1754	printf("\t-cap <int>\n");
				1755	printf(
				1756	"\t\tlimit the parameter values to the range [-50, 50]; default is 0 (off)\n");
				1757	printf("\nExamples:\n");
				1758	printf(
				1759	"./word2vec -train data.txt -output vec.txt -size 200 -window 5 -sample 1e-4 -negative 5 -hs 0 -binary 0 -type 1 -iter 3\n\n");
				1760	return 0;
				1761	}
				1762	output_file[0] = 0;
				1763	save_vocab_file[0] = 0;
				1764	read_vocab_file[0] = 0;
				1765	save_net_file[0] = 0;
				1766	read_net_file[0] = 0;
				1767	negative_classes_file[0] = 0;
				1768	if ((i = ArgPos((char *) "-size", argc, argv)) > 0)
				1769	layer1_size = atoi(argv[i + 1]);
				1770	if ((i = ArgPos((char *) "-train", argc, argv)) > 0)
				1771	strcpy(train_file, argv[i + 1]);
				1772	if ((i = ArgPos((char *) "-save-vocab", argc, argv)) > 0)
				1773	strcpy(save_vocab_file, argv[i + 1]);
				1774	if ((i = ArgPos((char *) "-read-vocab", argc, argv)) > 0)
				1775	strcpy(read_vocab_file, argv[i + 1]);
				1776	if ((i = ArgPos((char *) "-save-net", argc, argv)) > 0)
				1777	strcpy(save_net_file, argv[i + 1]);
				1778	if ((i = ArgPos((char *) "-read-net", argc, argv)) > 0)
				1779	strcpy(read_net_file, argv[i + 1]);
				1780	if ((i = ArgPos((char *) "-debug", argc, argv)) > 0)
				1781	debug_mode = atoi(argv[i + 1]);
				1782	if ((i = ArgPos((char *) "-binary", argc, argv)) > 0)
				1783	binary = atoi(argv[i + 1]);
				1784	if ((i = ArgPos((char *) "-type", argc, argv)) > 0)
				1785	type = atoi(argv[i + 1]);
				1786	if ((i = ArgPos((char *) "-output", argc, argv)) > 0)
				1787	strcpy(output_file, argv[i + 1]);
				1788	if ((i = ArgPos((char *) "-window", argc, argv)) > 0)
				1789	window = atoi(argv[i + 1]);
				1790	if ((i = ArgPos((char *) "-sample", argc, argv)) > 0)
				1791	sample = atof(argv[i + 1]);
				1792	if ((i = ArgPos((char *) "-hs", argc, argv)) > 0)
				1793	hs = atoi(argv[i + 1]);
				1794	if ((i = ArgPos((char *) "-negative", argc, argv)) > 0)
				1795	negative = atoi(argv[i + 1]);
				1796	if ((i = ArgPos((char *) "-negative-classes", argc, argv)) > 0)
				1797	strcpy(negative_classes_file, argv[i + 1]);
				1798	if ((i = ArgPos((char *) "-nce", argc, argv)) > 0)
				1799	nce = atoi(argv[i + 1]);
				1800	if ((i = ArgPos((char *) "-threads", argc, argv)) > 0)
				1801	num_threads = atoi(argv[i + 1]);
				1802	if ((i = ArgPos((char *) "-iter", argc, argv)) > 0)
				1803	iter = atoi(argv[i + 1]);
				1804	if ((i = ArgPos((char *) "-min-count", argc, argv)) > 0)
				1805	min_count = atoi(argv[i + 1]);
				1806	if ((i = ArgPos((char *) "-classes", argc, argv)) > 0)
				1807	classes = atoi(argv[i + 1]);
				1808	if ((i = ArgPos((char *) "-cap", argc, argv)) > 0)
				1809	cap = atoi(argv[i + 1]);
				1810	if (type == 0 \|\| type == 2 \|\| type == 4)
				1811	alpha = 0.05;
				1812	if ((i = ArgPos((char *) "-alpha", argc, argv)) > 0)
				1813	alpha = atof(argv[i + 1]);
				1814	vocab = (struct vocab_word *) calloc(vocab_max_size,
				1815	sizeof(struct vocab_word));
				1816	vocab_hash = (int *) calloc(vocab_hash_size, sizeof(int));
				1817	expTable = (real ) malloc((EXP_TABLE_SIZE + 1) sizeof(real));
				1818	for (i = 0; i < EXP_TABLE_SIZE; i++) {
				1819	expTable[i] = exp((i / (real) EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table
				1820	expTable[i] = expTable[i] / (expTable[i] + 1); // Precompute f(x) = x / (x + 1)
				1821	}
				1822	TrainModel();
				1823	return 0;
				1824	}
				1825