Blame - word2vecExt1.c - ids-kl/dereko2vec

blob: a2d5cdc4f663b074e6276dd85bf43c83a8b15303 [file] [log] [blame]

Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	1	// Copyright 2013 Google Inc. All Rights Reserved.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#include <locale.h>
				16	#include <stdio.h>
				17	#include <stdlib.h>
				18	#include <string.h>
				19	#include <unistd.h>
				20	#include <math.h>
				21	#include <pthread.h>
				22	#include <collocatordb.h>
				23
				24	#define MAX_STRING 100
				25	#define EXP_TABLE_SIZE 1000
				26	#define MAX_EXP 6
				27	#define MAX_SENTENCE_LENGTH 1000
				28	#define MAX_CC 100
				29	#define MAX_CODE_LENGTH 40
				30
				31	const int vocab_hash_size = 30000000; // Maximum 30 * 0.7 = 21M words in the vocabulary
				32
				33	typedef float real; // Precision of float numbers
				34
				35	struct vocab_word {
				36	long long cn;
				37	int *point;
				38	char word, code, codelen;
				39	};
				40
				41	char train_file[MAX_STRING], output_file[MAX_STRING];
				42	char save_vocab_file[MAX_STRING], read_vocab_file[MAX_STRING];
				43	char save_net_file[MAX_STRING], read_net_file[MAX_STRING];
				44	char magic_stop_file[MAX_STRING];
				45
				46	struct vocab_word *vocab;
				47	int binary = 0, type = 1, debug_mode = 2, window = 5, min_count = 5,
				48	num_threads = 12, min_reduce = 1;
				49	int *vocab_hash;
				50	long long *threadPos;
				51	int *threadIters;
				52	long long vocab_max_size = 1000, vocab_size = 0, layer1_size = 100;
				53	long long train_words = 0, word_count_actual = 0, iter = 5, file_size = 0,
				54	classes = 0;
				55	real alpha = 0.025, starting_alpha, sample = 1e-3;
				56	real syn0, syn1, syn1neg, syn1nce, *expTable;
				57	real avgWordLength=0;
				58	clock_t start, start_clock;
				59
				60	real syn1_window, syn1neg_window, *syn1nce_window;
				61	int w_offset, window_layer_size;
				62
				63	int window_hidden_size = 500;
				64	real syn_window_hidden, syn_hidden_word, *syn_hidden_word_neg,
				65	*syn_hidden_word_nce;
				66
				67	int hs = 0, negative = 5;
				68	const int table_size = 1e8;
				69	int *table;
				70
				71	long cc = 0;
				72	long tc = 1;
				73
				74	//constrastive negative sampling
				75	char negative_classes_file[MAX_STRING];
				76	int *word_to_group;
				77	int group_to_table; //group_sizetable_size
				78	int class_number;
				79
				80	//nce
				81	real* noise_distribution;
				82	int nce = 0;
				83
				84	//param caps
				85	real CAP_VALUE = 50;
				86	int cap = 0;
				87
				88	COLLOCATORDB *cdb = NULL;
				89
				90	void capParam(real* array, int index) {
				91	if (array[index] > CAP_VALUE)
				92	array[index] = CAP_VALUE;
				93	else if (array[index] < -CAP_VALUE)
				94	array[index] = -CAP_VALUE;
				95	}
				96
				97	real hardTanh(real x) {
				98	if (x >= 1) {
				99	return 1;
				100	} else if (x <= -1) {
				101	return -1;
				102	} else {
				103	return x;
				104	}
				105	}
				106
				107	real dHardTanh(real x, real g) {
				108	if (x > 1 && g > 0) {
				109	return 0;
				110	}
				111	if (x < -1 && g < 0) {
				112	return 0;
				113	}
				114	return 1;
				115	}
				116
				117	void InitUnigramTable() {
				118	int a, i;
				119	long long train_words_pow = 0;
				120	real d1, power = 0.75;
				121	table = (int ) malloc(table_size sizeof(int));
				122	for (a = 0; a < vocab_size; a++)
				123	train_words_pow += pow(vocab[a].cn, power);
				124	i = 0;
				125	d1 = pow(vocab[i].cn, power) / (real) train_words_pow;
				126	for (a = 0; a < table_size; a++) {
				127	table[a] = i;
				128	if (a / (real) table_size > d1) {
				129	i++;
				130	d1 += pow(vocab[i].cn, power) / (real) train_words_pow;
				131	}
				132	if (i >= vocab_size)
				133	i = vocab_size - 1;
				134	}
				135
				136	noise_distribution = (real *) calloc(vocab_size, sizeof(real));
				137	for (a = 0; a < vocab_size; a++)
				138	noise_distribution[a] = pow(vocab[a].cn, power)
				139	/ (real) train_words_pow;
				140	}
				141
				142	// Reads a single word from a file, assuming space + tab + EOL to be word boundaries
				143	void ReadWord(char word, FILE fin) {
				144	int a = 0, ch;
				145	while (!feof(fin)) {
				146	ch = fgetc(fin);
				147	if (ch == 13)
				148	continue;
				149	if ((ch == ' ') \|\| (ch == '\t') \|\| (ch == '\n')) {
				150	if (a > 0) {
				151	if (ch == '\n')
				152	ungetc(ch, fin);
				153	break;
				154	}
				155	if (ch == '\n') {
				156	strcpy(word, (char *) "</s>");
				157	return;
				158	} else
				159	continue;
				160	}
				161	word[a] = ch;
				162	a++;
				163	if (a >= MAX_STRING - 1)
				164	a--; // Truncate too long words
				165	}
				166	word[a] = 0;
				167	}
				168
				169	// Returns hash value of a word
				170	int GetWordHash(char *word) {
				171	unsigned long long a, hash = 0;
				172	for (a = 0; a < strlen(word); a++)
				173	hash = hash * 257 + word[a];
				174	hash = hash % vocab_hash_size;
				175	return hash;
				176	}
				177
				178	// Returns position of a word in the vocabulary; if the word is not found, returns -1
				179	int SearchVocab(char *word) {
				180	unsigned int hash = GetWordHash(word);
				181	while (1) {
				182	if (vocab_hash[hash] == -1)
				183	return -1;
				184	if (!strcmp(word, vocab[vocab_hash[hash]].word))
				185	return vocab_hash[hash];
				186	hash = (hash + 1) % vocab_hash_size;
				187	}
				188	return -1;
				189	}
				190
				191	// Reads a word and returns its index in the vocabulary
				192	int ReadWordIndex(FILE *fin) {
				193	char word[MAX_STRING];
				194	ReadWord(word, fin);
				195	if (feof(fin))
				196	return -1;
				197	return SearchVocab(word);
				198	}
				199
				200	// Adds a word to the vocabulary
				201	int AddWordToVocab(char *word) {
				202	unsigned int hash, length = strlen(word) + 1;
				203	if (length > MAX_STRING)
				204	length = MAX_STRING;
				205	vocab[vocab_size].word = (char *) calloc(length, sizeof(char));
				206	strcpy(vocab[vocab_size].word, word);
				207	vocab[vocab_size].cn = 0;
				208	vocab_size++;
				209	// Reallocate memory if needed
				210	if (vocab_size + 2 >= vocab_max_size) {
				211	vocab_max_size += 1000;
				212	vocab = (struct vocab_word *) realloc(vocab,
				213	vocab_max_size * sizeof(struct vocab_word));
				214	}
				215	hash = GetWordHash(word);
				216	while (vocab_hash[hash] != -1)
				217	hash = (hash + 1) % vocab_hash_size;
				218	vocab_hash[hash] = vocab_size - 1;
				219	return vocab_size - 1;
				220	}
				221
				222	// Used later for sorting by word counts
				223	int VocabCompare(const void a, const void b) {
				224	return ((struct vocab_word ) b)->cn - ((struct vocab_word ) a)->cn;
				225	}
				226
				227	// Sorts the vocabulary by frequency using word counts
				228	void SortVocab() {
				229	int a, size;
				230	unsigned int hash;
				231	// Sort the vocabulary and keep </s> at the first position
				232	qsort(&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
				233	for (a = 0; a < vocab_hash_size; a++)
				234	vocab_hash[a] = -1;
				235	size = vocab_size;
				236	train_words = 0;
				237	for (a = 0; a < size; a++) {
				238	avgWordLength += vocab[a].cn * (strlen(vocab[a].word) + 1);
				239	// Words occuring less than min_count times will be discarded from the vocab
				240	if ((vocab[a].cn < min_count) && (a != 0)) {
				241	vocab_size--;
				242	free(vocab[a].word);
				243	} else {
				244	// Hash will be re-computed, as after the sorting it is not actual
				245	hash = GetWordHash(vocab[a].word);
				246	while (vocab_hash[hash] != -1)
				247	hash = (hash + 1) % vocab_hash_size;
				248	vocab_hash[hash] = a;
				249	train_words += vocab[a].cn;
				250	}
				251	}
				252	avgWordLength /= train_words;
				253	vocab = (struct vocab_word *) realloc(vocab,
				254	(vocab_size + 1) * sizeof(struct vocab_word));
				255	// Allocate memory for the binary tree construction
				256	for (a = 0; a < vocab_size; a++) {
				257	vocab[a].code = (char *) calloc(MAX_CODE_LENGTH, sizeof(char));
				258	vocab[a].point = (int *) calloc(MAX_CODE_LENGTH, sizeof(int));
				259	}
				260	}
				261
				262	// Reduces the vocabulary by removing infrequent tokens
				263	void ReduceVocab() {
				264	int a, b = 0;
				265	unsigned int hash;
				266	for (a = 0; a < vocab_size; a++)
				267	if (vocab[a].cn > min_reduce) {
				268	vocab[b].cn = vocab[a].cn;
				269	vocab[b].word = vocab[a].word;
				270	b++;
				271	} else
				272	free(vocab[a].word);
				273	vocab_size = b;
				274	for (a = 0; a < vocab_hash_size; a++)
				275	vocab_hash[a] = -1;
				276	for (a = 0; a < vocab_size; a++) {
				277	// Hash will be re-computed, as it is not actual
				278	hash = GetWordHash(vocab[a].word);
				279	while (vocab_hash[hash] != -1)
				280	hash = (hash + 1) % vocab_hash_size;
				281	vocab_hash[hash] = a;
				282	}
				283	fflush(stdout);
				284	min_reduce++;
				285	}
				286
				287	// Create binary Huffman tree using the word counts
				288	// Frequent words will have short uniqe binary codes
				289	void CreateBinaryTree() {
				290	long long a, b, i, min1i, min2i, pos1, pos2, point[MAX_CODE_LENGTH];
				291	char code[MAX_CODE_LENGTH];
				292	long long count = (long long ) calloc(vocab_size * 2 + 1,
				293	sizeof(long long));
				294	long long binary = (long long ) calloc(vocab_size * 2 + 1,
				295	sizeof(long long));
				296	long long parent_node = (long long ) calloc(vocab_size * 2 + 1,
				297	sizeof(long long));
				298	// todo: this needs to operate on a sorted copy of vocab[a].cn if we use local counts
				299	for (a = 0; a < vocab_size; a++)
				300	count[a] = vocab[a].cn;
				301	for (a = vocab_size; a < vocab_size * 2; a++)
				302	count[a] = 1e15;
				303	pos1 = vocab_size - 1;
				304	pos2 = vocab_size;
				305	// Following algorithm constructs the Huffman tree by adding one node at a time
				306	for (a = 0; a < vocab_size - 1; a++) {
				307	// First, find two smallest nodes 'min1, min2'
				308	if (pos1 >= 0) {
				309	if (count[pos1] < count[pos2]) {
				310	min1i = pos1;
				311	pos1--;
				312	} else {
				313	min1i = pos2;
				314	pos2++;
				315	}
				316	} else {
				317	min1i = pos2;
				318	pos2++;
				319	}
				320	if (pos1 >= 0) {
				321	if (count[pos1] < count[pos2]) {
				322	min2i = pos1;
				323	pos1--;
				324	} else {
				325	min2i = pos2;
				326	pos2++;
				327	}
				328	} else {
				329	min2i = pos2;
				330	pos2++;
				331	}
				332	count[vocab_size + a] = count[min1i] + count[min2i];
				333	parent_node[min1i] = vocab_size + a;
				334	parent_node[min2i] = vocab_size + a;
				335	binary[min2i] = 1;
				336	}
				337	// Now assign binary code to each vocabulary word
				338	for (a = 0; a < vocab_size; a++) {
				339	b = a;
				340	i = 0;
				341	while (1) {
				342	code[i] = binary[b];
				343	point[i] = b;
				344	i++;
				345	b = parent_node[b];
				346	if (b == vocab_size * 2 - 2)
				347	break;
				348	}
				349	vocab[a].codelen = i;
				350	vocab[a].point[0] = vocab_size - 2;
				351	for (b = 0; b < i; b++) {
				352	vocab[a].code[i - b - 1] = code[b];
				353	vocab[a].point[i - b] = point[b] - vocab_size;
				354	}
				355	}
				356	free(count);
				357	free(binary);
				358	free(parent_node);
				359	}
				360
				361	void LearnVocabFromTrainFile() {
				362	char word[MAX_STRING];
				363	FILE *fin;
				364	long long a, i;
				365	for (a = 0; a < vocab_hash_size; a++)
				366	vocab_hash[a] = -1;
				367	fin = fopen(train_file, "rb");
				368	if (fin == NULL) {
				369	printf("ERROR: training data file not found!\n");
				370	exit(1);
				371	}
				372	vocab_size = 0;
				373	AddWordToVocab((char *) "</s>");
				374	while (1) {
				375	ReadWord(word, fin);
				376	if (feof(fin))
				377	break;
				378	train_words++;
				379	if ((debug_mode > 1) && (train_words % 100000 == 0)) {
				380	printf("%lldK%c", train_words / 1000, 13);
				381	fflush(stdout);
				382	}
				383	i = SearchVocab(word);
				384	if (i == -1) {
				385	a = AddWordToVocab(word);
				386	vocab[a].cn = 1;
				387	} else
				388	vocab[i].cn++;
				389	if (vocab_size > vocab_hash_size * 0.7)
				390	ReduceVocab();
				391	}
				392	SortVocab();
				393	if (debug_mode > 0) {
Peter Fankhauser	dd8b854	2020-07-01 09:29:42 +0200	[diff] [blame^]	394	printf("Vocab size: %'lld\n", vocab_size);
				395	printf("Words in train file: %'lld\n", train_words);
Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	396	}
				397	file_size = ftell(fin);
				398	fclose(fin);
				399	}
				400
				401	void SaveVocab() {
				402	long long i;
				403	FILE *fo = fopen(save_vocab_file, "wb");
				404	for (i = 0; i < vocab_size; i++)
				405	fprintf(fo, "%s %lld\n", vocab[i].word, vocab[i].cn);
				406	fclose(fo);
				407	}
				408
				409	void ReadVocab() {
				410	long long a, i = 0;
				411	char c;
				412	char word[MAX_STRING];
				413	FILE *fin = fopen(read_vocab_file, "rb");
				414	if (fin == NULL) {
				415	printf("Vocabulary file not found\n");
				416	exit(1);
				417	}
				418	for (a = 0; a < vocab_hash_size; a++)
				419	vocab_hash[a] = -1;
				420	vocab_size = 0;
				421	while (1) {
				422	ReadWord(word, fin);
				423	if (feof(fin))
				424	break;
				425	a = AddWordToVocab(word);
				426	fscanf(fin, "%lld%c", &vocab[a].cn, &c);
				427	i++;
				428	}
				429	fclose(fin);
Peter Fankhauser	117aab0	2020-04-24 13:20:06 +0200	[diff] [blame]	430	fin = fopen(train_file, "rb");
				431	if (fin == NULL) {
				432	printf("ERROR: training data file not found!\n");
				433	exit(1);
				434	}
				435	fseek(fin, 0, SEEK_END);
				436	file_size = ftell(fin);
				437	fclose(fin);
Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	438	SortVocab();
Peter Fankhauser	dd8b854	2020-07-01 09:29:42 +0200	[diff] [blame^]	439	if (debug_mode > 0) {
				440	printf("Vocab size: %'lld\n", vocab_size);
				441	printf("Words in vocab's train file: %'lld\n", train_words);
				442	printf("Avg. word length in vocab's train file: %.2f\n", avgWordLength);
				443	}
Peter Fankhauser	117aab0	2020-04-24 13:20:06 +0200	[diff] [blame]	444	train_words = file_size / avgWordLength;
Peter Fankhauser	dd8b854	2020-07-01 09:29:42 +0200	[diff] [blame^]	445	// PF: so even with tc=0, alpha will be appropriately adapted?
Peter Fankhauser	117aab0	2020-04-24 13:20:06 +0200	[diff] [blame]	446	if(debug_mode > 0)
				447	printf("Estimated words in train file: %'lld\n", train_words);
Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	448	if (tc > 0) {
				449	// recalculate counts for the current corpus
				450	// adapted from LearnVocabFromTrainFile()
				451	// note that we don't sort or rehash the vocabulary again, we only adapt vocab[.].cn.
				452	fin = fopen(train_file, "rb");
				453	if (fin == NULL) {
				454	printf("ERROR: training data file not found!\n");
				455	exit(1);
				456	}
				457	// reset vocabulary counts
				458	for (a = 0; a < vocab_size; a++)
				459	vocab[a].cn = 0;
Peter Fankhauser	dd8b854	2020-07-01 09:29:42 +0200	[diff] [blame^]	460	train_words = 0;
Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	461	while (1) {
				462	ReadWord(word, fin);
				463	if (feof(fin))
				464	break;
Peter Fankhauser	dd8b854	2020-07-01 09:29:42 +0200	[diff] [blame^]	465	if ((debug_mode > 1) && (train_words % 100000 == 0)) {
				466	printf("%lldK%c", train_words / 1000, 13);
Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	467	fflush(stdout);
				468	}
				469	i = SearchVocab(word);
				470	// the word must be in the vocabulary but we don't issue a warning,
				471	// because it may have been cut off due to minfreq.
				472	if (i >= 0) {
				473	vocab[i].cn++;
Peter Fankhauser	dd8b854	2020-07-01 09:29:42 +0200	[diff] [blame^]	474	train_words++;
Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	475	}
				476	}
				477	// we cannot have 0 counts.
				478	for (a = 0; a < vocab_size; a++) {
				479	if(vocab[a].cn == 0) {
				480	vocab[a].cn = 1;
Peter Fankhauser	dd8b854	2020-07-01 09:29:42 +0200	[diff] [blame^]	481	train_words++;
Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	482	}
				483	}
				484	if (debug_mode > 0) {
				485	printf("Vocab size: %lld\n", vocab_size);
Peter Fankhauser	dd8b854	2020-07-01 09:29:42 +0200	[diff] [blame^]	486	printf("Words in current train file: %'lld\n", train_words);
Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	487	}
				488	fseek(fin, 0, SEEK_END);
				489	file_size = ftell(fin);
				490	fclose(fin);
				491	}
Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	492	}
				493
				494	void InitClassUnigramTable() {
				495	// TODO: this probably needs to be adapted for dealing with subcorpus adjusted vocabulary counts
				496	long long a, c;
				497	printf("loading class unigrams \n");
				498	FILE *fin = fopen(negative_classes_file, "rb");
				499	if (fin == NULL) {
				500	printf("ERROR: class file not found!\n");
				501	exit(1);
				502	}
				503	word_to_group = (int ) malloc(vocab_size sizeof(int));
				504	for (a = 0; a < vocab_size; a++)
				505	word_to_group[a] = -1;
				506	char class[MAX_STRING];
				507	char prev_class[MAX_STRING];
				508	prev_class[0] = 0;
				509	char word[MAX_STRING];
				510	class_number = -1;
				511	while (1) {
				512	if (feof(fin))
				513	break;
				514	ReadWord(class, fin);
				515	ReadWord(word, fin);
				516	int word_index = SearchVocab(word);
				517	if (word_index != -1) {
				518	if (strcmp(class, prev_class) != 0) {
				519	class_number++;
				520	strcpy(prev_class, class);
				521	}
				522	word_to_group[word_index] = class_number;
				523	}
				524	ReadWord(word, fin);
				525	}
				526	class_number++;
				527	fclose(fin);
				528
				529	group_to_table = (int ) malloc(table_size class_number * sizeof(int));
				530	long long train_words_pow = 0;
				531	real d1, power = 0.75;
				532
				533	for (c = 0; c < class_number; c++) {
				534	long long offset = c * table_size;
				535	train_words_pow = 0;
				536	for (a = 0; a < vocab_size; a++)
				537	if (word_to_group[a] == c)
				538	train_words_pow += pow(vocab[a].cn, power);
				539	int i = 0;
				540	while (word_to_group[i] != c && i < vocab_size)
				541	i++;
				542	d1 = pow(vocab[i].cn, power) / (real) train_words_pow;
				543	for (a = 0; a < table_size; a++) {
				544	//printf("index %lld , word %d\n", a, i);
				545	group_to_table[offset + a] = i;
				546	if (a / (real) table_size > d1) {
				547	i++;
				548	while (word_to_group[i] != c && i < vocab_size)
				549	i++;
				550	d1 += pow(vocab[i].cn, power) / (real) train_words_pow;
				551	}
				552	if (i >= vocab_size)
				553	while (word_to_group[i] != c && i >= 0)
				554	i--;
				555	}
				556	}
				557	}
				558
				559	void SaveArgs(int argc, char **argv) {
				560	unsigned int i;
				561	char args_file[MAX_STRING];
				562	strcpy(args_file, output_file);
				563	strcat(args_file, ".args");
				564	FILE *fargs = fopen(args_file, "w");
				565	if (fargs == NULL) {
				566	printf("Cannot save args to %s.\n", args_file);
				567	return;
				568	}
				569
				570	for(i=1; i<argc; i++)
				571	fprintf(fargs, "%s ", argv[i]);
				572
				573	fprintf(fargs, "\n");
				574	fclose(fargs);
				575
				576	return;
				577	}
				578
				579	void SaveNet() {
				580	if (type == 4 \|\| negative <= 0) {
				581	fprintf(stderr,
				582	"save-net only supported for type 0,1,2,3 with negative sampling\n");
				583	return;
				584	}
				585
				586	FILE *fnet = fopen(save_net_file, "wb");
				587	if (fnet == NULL) {
				588	printf("Net parameter file not found\n");
				589	exit(1);
				590	}
				591	fwrite(syn0, sizeof(real), vocab_size * layer1_size, fnet);
				592	if (type == 0 \|\| type == 1) {
				593	fwrite(syn1neg, sizeof(real), vocab_size * layer1_size, fnet);
				594	}
				595	if (type == 2 \|\| type == 3) {
				596	fwrite(syn1neg_window, sizeof(real), vocab_size * window_layer_size, fnet);
				597	}
				598	fclose(fnet);
				599	}
				600
				601	void InitNet() {
				602	long long a, b;
				603	unsigned long long next_random = 1;
				604	long long read;
				605
				606	window_layer_size = layer1_size * window * 2;
				607	a = posix_memalign((void **) &syn0, 128,
				608	(long long) vocab_size * layer1_size * sizeof(real));
				609	if (syn0 == NULL) {
				610	printf("Memory allocation failed\n");
				611	exit(1);
				612	}
				613
				614	if (hs) {
				615	a = posix_memalign((void **) &syn1, 128,
				616	(long long) vocab_size * layer1_size * sizeof(real));
				617	if (syn1 == NULL) {
				618	printf("Memory allocation failed\n");
				619	exit(1);
				620	}
				621	a = posix_memalign((void **) &syn1_window, 128,
				622	(long long) vocab_size * window_layer_size * sizeof(real));
				623	if (syn1_window == NULL) {
				624	printf("Memory allocation failed\n");
				625	exit(1);
				626	}
				627	a = posix_memalign((void **) &syn_hidden_word, 128,
				628	(long long) vocab_size * window_hidden_size * sizeof(real));
				629	if (syn_hidden_word == NULL) {
				630	printf("Memory allocation failed\n");
				631	exit(1);
				632	}
				633
				634	for (a = 0; a < vocab_size; a++)
				635	for (b = 0; b < layer1_size; b++)
				636	syn1[a * layer1_size + b] = 0;
				637	for (a = 0; a < vocab_size; a++)
				638	for (b = 0; b < window_layer_size; b++)
				639	syn1_window[a * window_layer_size + b] = 0;
				640	for (a = 0; a < vocab_size; a++)
				641	for (b = 0; b < window_hidden_size; b++)
				642	syn_hidden_word[a * window_hidden_size + b] = 0;
				643	}
				644	if (negative > 0) {
				645	if (type == 0 \|\| type == 1) {
				646	a = posix_memalign((void **) &syn1neg, 128,
				647	(long long) vocab_size * layer1_size * sizeof(real));
				648	if (syn1neg == NULL) {
				649	printf("Memory allocation failed\n");
				650	exit(1);
				651	}
				652	for (a = 0; a < vocab_size; a++)
				653	for (b = 0; b < layer1_size; b++)
				654	syn1neg[a * layer1_size + b] = 0;
				655	} else if (type == 2 \|\| type == 3) {
				656	a = posix_memalign((void **) &syn1neg_window, 128,
				657	(long long) vocab_size * window_layer_size * sizeof(real));
				658	if (syn1neg_window == NULL) {
				659	printf("Memory allocation failed\n");
				660	exit(1);
				661	}
				662	for (a = 0; a < vocab_size; a++)
				663	for (b = 0; b < window_layer_size; b++)
				664	syn1neg_window[a * window_layer_size + b] = 0;
				665	} else if (type == 4) {
				666	a = posix_memalign((void **) &syn_hidden_word_neg, 128,
				667	(long long) vocab_size * window_hidden_size * sizeof(real));
				668	if (syn_hidden_word_neg == NULL) {
				669	printf("Memory allocation failed\n");
				670	exit(1);
				671	}
				672	for (a = 0; a < vocab_size; a++)
				673	for (b = 0; b < window_hidden_size; b++)
				674	syn_hidden_word_neg[a * window_hidden_size + b] = 0;
				675	}
				676	}
				677	if (nce > 0) {
				678	a = posix_memalign((void **) &syn1nce, 128,
				679	(long long) vocab_size * layer1_size * sizeof(real));
				680	if (syn1nce == NULL) {
				681	printf("Memory allocation failed\n");
				682	exit(1);
				683	}
				684	a = posix_memalign((void **) &syn1nce_window, 128,
				685	(long long) vocab_size * window_layer_size * sizeof(real));
				686	if (syn1nce_window == NULL) {
				687	printf("Memory allocation failed\n");
				688	exit(1);
				689	}
				690	a = posix_memalign((void **) &syn_hidden_word_nce, 128,
				691	(long long) vocab_size * window_hidden_size * sizeof(real));
				692	if (syn_hidden_word_nce == NULL) {
				693	printf("Memory allocation failed\n");
				694	exit(1);
				695	}
				696
				697	for (a = 0; a < vocab_size; a++)
				698	for (b = 0; b < layer1_size; b++)
				699	syn1nce[a * layer1_size + b] = 0;
				700	for (a = 0; a < vocab_size; a++)
				701	for (b = 0; b < window_layer_size; b++)
				702	syn1nce_window[a * window_layer_size + b] = 0;
				703	for (a = 0; a < vocab_size; a++)
				704	for (b = 0; b < window_hidden_size; b++)
				705	syn_hidden_word_nce[a * window_hidden_size + b] = 0;
				706	}
				707
				708	if (type == 4) {
				709	a = posix_memalign((void **) &syn_window_hidden, 128,
				710	window_hidden_size * window_layer_size * sizeof(real));
				711	if (syn_window_hidden == NULL) {
				712	printf("Memory allocation failed\n");
				713	exit(1);
				714	}
				715	for (a = 0; a < window_hidden_size * window_layer_size; a++) {
				716	next_random = next_random * (unsigned long long) 25214903917 + 11;
				717	syn_window_hidden[a] = (((next_random & 0xFFFF) / (real) 65536)
				718	- 0.5) / (window_hidden_size * window_layer_size);
				719	}
				720	}
				721
				722	if (read_net_file[0] == 0) {
				723	for (a = 0; a < vocab_size; a++)
				724	for (b = 0; b < layer1_size; b++) {
				725	next_random = next_random * (unsigned long long) 25214903917
				726	+ 11;
				727	syn0[a * layer1_size + b] = (((next_random & 0xFFFF)
				728	/ (real) 65536) - 0.5) / layer1_size;
				729	}
				730	} else if ((type == 0 \|\| type == 1) && negative > 0) {
				731	FILE *fnet = fopen(read_net_file, "rb");
				732	if (fnet == NULL) {
				733	printf("Net parameter file not found\n");
				734	exit(1);
				735	}
				736	printf("vocab-size: %lld, layer1_size: %lld\n",
				737	vocab_size, layer1_size);
				738	read = fread(syn0, sizeof(real), vocab_size * layer1_size, fnet);
				739	if (read != vocab_size * layer1_size) {
				740	fprintf(stderr, "read-net failed %lld\n", read);
				741	exit(-1);
				742	}
				743	read = fread(syn1neg, sizeof(real),
				744	vocab_size * layer1_size, fnet);
				745	if (read != (long long) vocab_size * layer1_size) {
				746	fprintf(stderr, "read-net failed, read %lld, expected: %lld\n",
				747	read,
				748	(long long) sizeof(real) * vocab_size * layer1_size);
				749	exit(-1);
				750	}
				751	fgetc(fnet);
				752	if (!feof(fnet)) {
				753	fprintf(stderr,
				754	"Remaining bytes in net-file after read-net. File position: %ld\n",
				755	ftell(fnet));
				756	exit(-1);
				757	}
				758	fclose(fnet);
				759	} else if ((type == 2 \|\| type == 3) && negative > 0) {
				760	FILE *fnet = fopen(read_net_file, "rb");
				761	if (fnet == NULL) {
				762	printf("Net parameter file not found\n");
				763	exit(1);
				764	}
				765	printf("vocab-size: %lld, layer1_size: %lld, window_layer_size %d\n",
				766	vocab_size, layer1_size, window_layer_size);
				767	read = fread(syn0, sizeof(real), vocab_size * layer1_size, fnet);
				768	if (read != vocab_size * layer1_size) {
				769	fprintf(stderr, "read-net failed %lld\n", read);
				770	exit(-1);
				771	}
				772	read = fread(syn1neg_window, sizeof(real),
				773	vocab_size * window_layer_size, fnet);
				774	if (read != (long long) vocab_size * window_layer_size) {
				775	fprintf(stderr, "read-net failed, read %lld, expected: %lld\n",
				776	read,
				777	(long long) sizeof(real) * vocab_size * window_layer_size);
				778	exit(-1);
				779	}
				780	fgetc(fnet);
				781	if (!feof(fnet)) {
				782	fprintf(stderr,
				783	"Remaining bytes in net-file after read-net. File position: %ld\n",
				784	ftell(fnet));
				785	exit(-1);
				786	}
				787	fclose(fnet);
				788	} else {
				789	fprintf(stderr,
				790	"read-net only supported for type 3 with negative sampling\n");
				791	exit(-1);
				792	}
				793
				794	CreateBinaryTree();
				795	}
				796
				797	char currentDateTime(char buf, real offset) {
				798	time_t t;
				799	time(&t);
				800	t += (long) offset;
				801	struct tm tstruct;
				802	tstruct = *localtime(&t);
				803	strftime(buf, 80, "%c", &tstruct);
				804	return buf;
				805	}
				806
				807	void MonitorThread(void id) {
				808	char *timebuf = malloc(80);;
				809	int i, n=num_threads;
				810	long long sum;
				811	sleep(1);
				812	while(n > 0) {
				813	sleep(1);
				814	sum = n = 0;
				815	for(i=0; i < num_threads; i++) {
				816	if(threadPos[i] >= 0) {
				817	sum += (iter - threadIters[i]) * file_size / num_threads + threadPos[i] - (file_size / num_threads) * i;
				818	n++;
				819	} else {
				820	sum += iter * file_size / num_threads;
				821	}
				822	}
				823	if(n == 0)
				824	break;
				825	real finished_portion = (real) sum / (float) (file_size * iter);
				826	long long now = time(NULL);
				827	long long elapsed = (now - start);
				828	long long ttg = ((1.0 / finished_portion) * (real) elapsed - elapsed);
				829
				830	printf("\rAlpha: %.3f Done: %.2f%% with %.2fKB/s TE: %llds TTG: %llds ETA: %s\033[K",
				831	alpha,
				832	finished_portion * 100,
				833	(float) sum / elapsed / 1000,
				834	elapsed,
				835	ttg,
				836	currentDateTime(timebuf, ttg)
				837	);
				838	fflush(stdout);
				839	}
				840	pthread_exit(NULL);
				841	}
				842
				843	void TrainModelThread(void id) {
				844	long long a, b, d, cw, word, last_word, sentence_length = 0,
				845	sentence_position = 0;
				846	long long word_count = 0, last_word_count = 0, sen[MAX_SENTENCE_LENGTH + 1];
				847	long long l1, l2, c, target, label, local_iter = iter;
				848	unsigned long long next_random = (long long) id;
				849	real f, g;
				850	int input_len_1 = layer1_size;
				851	int window_offset = -1;
				852	if (type == 2 \|\| type == 4) {
				853	input_len_1 = window_layer_size;
				854	}
				855	real neu1 = (real ) calloc(input_len_1, sizeof(real));
				856	real neu1e = (real ) calloc(input_len_1, sizeof(real));
				857	threadIters[(long) id] = iter;
				858
				859	int input_len_2 = 0;
				860	if (type == 4) {
				861	input_len_2 = window_hidden_size;
				862	}
				863	real neu2 = (real ) calloc(input_len_2, sizeof(real));
				864	real neu2e = (real ) calloc(input_len_2, sizeof(real));
				865
				866	FILE *fi = fopen(train_file, "rb");
				867	long long start_pos = file_size / (long long) num_threads * (long long) id;
				868	long long end_pos = file_size / (long long) num_threads * (long long) (id + 1) -1;
				869	long long current_pos = start_pos;
				870	long long last_pos = start_pos;;
				871	fseek(fi, start_pos, SEEK_SET);
				872	while (1) {
				873	if (word_count - last_word_count > 10000) {
				874	// if ((current_pos - last_pos > 100000)) {
				875	// PF: changed back, because it seems that alpha is not correctly adjusted otherwise.
				876	word_count_actual += word_count - last_word_count;
				877	last_pos = current_pos;
				878	last_word_count = word_count;
				879	alpha = starting_alpha
				880	* (1 - word_count_actual / (real) (iter * train_words + 1));
				881	if (alpha < starting_alpha * 0.0001)
				882	alpha = starting_alpha * 0.0001;
				883	}
				884	if (sentence_length == 0) {
				885	while (1) {
				886	word = ReadWordIndex(fi);
				887	if (feof(fi))
				888	break;
				889	if (word == -1)
				890	continue;
				891	word_count++;
				892	if (word == 0)
				893	break;
				894	// The subsampling randomly discards frequent words while keeping the ranking same
				895	if (sample > 0) {
				896	real ran = (sqrt(vocab[word].cn / (sample * train_words))
				897	+ 1) * (sample * train_words) / vocab[word].cn;
				898	next_random = next_random * (unsigned long long) 25214903917
				899	+ 11;
				900	if (ran < (next_random & 0xFFFF) / (real) 65536) {
				901	if (type == 3) // in structured skipgrams
				902	word = -2; // keep the window position correct
				903	else
				904	continue;
				905	}
				906	}
				907	sen[sentence_length] = word;
				908	sentence_length++;
				909	if (sentence_length >= MAX_SENTENCE_LENGTH)
				910	break;
				911	}
				912	sentence_position = 0;
				913	}
				914	current_pos = threadPos[(long) id] = ftell(fi);
				915	if (feof(fi) \|\| current_pos >= end_pos ) {
				916	word_count_actual += word_count - last_word_count;
				917	threadIters[(long) id]--;
				918	local_iter--;
				919	if (local_iter == 0)
				920	break;
				921	if (magic_stop_file[0] && access(magic_stop_file, F_OK ) != -1) {
				922	printf("Magic stop file %s found. Stopping traing ...\n", magic_stop_file);
				923	break;
				924	}
				925	word_count = 0;
				926	last_word_count = 0;
				927	sentence_length = 0;
				928	fseek(fi, file_size / (long long) num_threads * (long long) id,
				929	SEEK_SET);
				930	continue;
				931	}
				932	word = sen[sentence_position];
				933	while (word == -2 && sentence_position<sentence_length)
				934	word = sen[++sentence_position];
				935	if (sentence_position>=sentence_length) {
				936	sentence_length=0;
				937	continue;
				938	}
				939	if (word < 0)
				940	continue;
				941	for (c = 0; c < input_len_1; c++)
				942	neu1[c] = 0;
				943	for (c = 0; c < input_len_1; c++)
				944	neu1e[c] = 0;
				945	for (c = 0; c < input_len_2; c++)
				946	neu2[c] = 0;
				947	for (c = 0; c < input_len_2; c++)
				948	neu2e[c] = 0;
				949	next_random = next_random * (unsigned long long) 25214903917 + 11;
				950	b = next_random % window;
				951	if (type == 0) { //train the cbow architecture
				952	// in -> hidden
				953	cw = 0;
				954	for (a = b; a < window * 2 + 1 - b; a++)
				955	if (a != window) {
				956	c = sentence_position - window + a;
				957	if (c < 0)
				958	continue;
				959	if (c >= sentence_length)
				960	continue;
				961	last_word = sen[c];
				962	if (last_word == -1)
				963	continue;
				964	for (c = 0; c < layer1_size; c++)
				965	neu1[c] += syn0[c + last_word * layer1_size];
				966	cw++;
				967	}
				968	if (cw) {
				969	for (c = 0; c < layer1_size; c++)
				970	neu1[c] /= cw;
				971	if (hs)
				972	for (d = 0; d < vocab[word].codelen; d++) {
				973	f = 0;
				974	l2 = vocab[word].point[d] * layer1_size;
				975	// Propagate hidden -> output
				976	for (c = 0; c < layer1_size; c++)
				977	f += neu1[c] * syn1[c + l2];
				978	if (f <= -MAX_EXP)
				979	continue;
				980	else if (f >= MAX_EXP)
				981	continue;
				982	else
				983	f = expTable[(int) ((f + MAX_EXP)
				984	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				985	// 'g' is the gradient multiplied by the learning rate
				986	g = (1 - vocab[word].code[d] - f) * alpha;
				987	// Propagate errors output -> hidden
				988	for (c = 0; c < layer1_size; c++)
				989	neu1e[c] += g * syn1[c + l2];
				990	// Learn weights hidden -> output
				991	for (c = 0; c < layer1_size; c++)
				992	syn1[c + l2] += g * neu1[c];
				993	if (cap == 1)
				994	for (c = 0; c < layer1_size; c++)
				995	capParam(syn1, c + l2);
				996	}
				997	// NEGATIVE SAMPLING
				998	if (negative > 0)
				999	for (d = 0; d < negative + 1; d++) {
				1000	if (d == 0) {
				1001	target = word;
				1002	label = 1;
				1003	} else {
				1004	next_random = next_random
				1005	* (unsigned long long) 25214903917 + 11;
				1006	if (word_to_group != NULL
				1007	&& word_to_group[word] != -1) {
				1008	target = word;
				1009	while (target == word) {
				1010	target = group_to_table[word_to_group[word]
				1011	* table_size
				1012	+ (next_random >> 16) % table_size];
				1013	next_random = next_random
				1014	* (unsigned long long) 25214903917
				1015	+ 11;
				1016	}
				1017	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1018	} else {
				1019	target =
				1020	table[(next_random >> 16) % table_size];
				1021	}
				1022	if (target == 0)
				1023	target = next_random % (vocab_size - 1) + 1;
				1024	if (target == word)
				1025	continue;
				1026	label = 0;
				1027	}
				1028	l2 = target * layer1_size;
				1029	f = 0;
				1030	for (c = 0; c < layer1_size; c++)
				1031	f += neu1[c] * syn1neg[c + l2];
				1032	if (f > MAX_EXP)
				1033	g = (label - 1) * alpha;
				1034	else if (f < -MAX_EXP)
				1035	g = (label - 0) * alpha;
				1036	else
				1037	g = (label
				1038	- expTable[(int) ((f + MAX_EXP)
				1039	* (EXP_TABLE_SIZE / MAX_EXP / 2))])
				1040	* alpha;
				1041	for (c = 0; c < layer1_size; c++)
				1042	neu1e[c] += g * syn1neg[c + l2];
				1043	for (c = 0; c < layer1_size; c++)
				1044	syn1neg[c + l2] += g * neu1[c];
				1045	if (cap == 1)
				1046	for (c = 0; c < layer1_size; c++)
				1047	capParam(syn1neg, c + l2);
				1048	}
				1049	// Noise Contrastive Estimation
				1050	if (nce > 0)
				1051	for (d = 0; d < nce + 1; d++) {
				1052	if (d == 0) {
				1053	target = word;
				1054	label = 1;
				1055	} else {
				1056	next_random = next_random
				1057	* (unsigned long long) 25214903917 + 11;
				1058	if (word_to_group != NULL
				1059	&& word_to_group[word] != -1) {
				1060	target = word;
				1061	while (target == word) {
				1062	target = group_to_table[word_to_group[word]
				1063	* table_size
				1064	+ (next_random >> 16) % table_size];
				1065	next_random = next_random
				1066	* (unsigned long long) 25214903917
				1067	+ 11;
				1068	}
				1069	} else {
				1070	target =
				1071	table[(next_random >> 16) % table_size];
				1072	}
				1073	if (target == 0)
				1074	target = next_random % (vocab_size - 1) + 1;
				1075	if (target == word)
				1076	continue;
				1077	label = 0;
				1078	}
				1079	l2 = target * layer1_size;
				1080	f = 0;
				1081
				1082	for (c = 0; c < layer1_size; c++)
				1083	f += neu1[c] * syn1nce[c + l2];
				1084	if (f > MAX_EXP)
				1085	g = (label - 1) * alpha;
				1086	else if (f < -MAX_EXP)
				1087	g = (label - 0) * alpha;
				1088	else {
				1089	f = exp(f);
				1090	g =
				1091	(label
				1092	- f
				1093	/ (noise_distribution[target]
				1094	* nce + f)) * alpha;
				1095	}
				1096	for (c = 0; c < layer1_size; c++)
				1097	neu1e[c] += g * syn1nce[c + l2];
				1098	for (c = 0; c < layer1_size; c++)
				1099	syn1nce[c + l2] += g * neu1[c];
				1100	if (cap == 1)
				1101	for (c = 0; c < layer1_size; c++)
				1102	capParam(syn1nce, c + l2);
				1103	}
				1104	// hidden -> in
				1105	for (a = b; a < window * 2 + 1 - b; a++)
				1106	if (a != window) {
				1107	c = sentence_position - window + a;
				1108	if (c < 0)
				1109	continue;
				1110	if (c >= sentence_length)
				1111	continue;
				1112	last_word = sen[c];
				1113	if (last_word == -1)
				1114	continue;
				1115	for (c = 0; c < layer1_size; c++)
				1116	syn0[c + last_word * layer1_size] += neu1e[c];
				1117	}
				1118	}
				1119	} else if (type == 1) { //train skip-gram
				1120	for (a = b; a < window * 2 + 1 - b; a++)
				1121	if (a != window) {
				1122	c = sentence_position - window + a;
				1123	if (c < 0)
				1124	continue;
				1125	if (c >= sentence_length)
				1126	continue;
				1127	last_word = sen[c];
				1128	if (last_word == -1)
				1129	continue;
				1130	l1 = last_word * layer1_size;
				1131	for (c = 0; c < layer1_size; c++)
				1132	neu1e[c] = 0;
				1133	// HIERARCHICAL SOFTMAX
				1134	if (hs)
				1135	for (d = 0; d < vocab[word].codelen; d++) {
				1136	f = 0;
				1137	l2 = vocab[word].point[d] * layer1_size;
				1138	// Propagate hidden -> output
				1139	for (c = 0; c < layer1_size; c++)
				1140	f += syn0[c + l1] * syn1[c + l2];
				1141	if (f <= -MAX_EXP)
				1142	continue;
				1143	else if (f >= MAX_EXP)
				1144	continue;
				1145	else
				1146	f = expTable[(int) ((f + MAX_EXP)
				1147	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1148	// 'g' is the gradient multiplied by the learning rate
				1149	g = (1 - vocab[word].code[d] - f) * alpha;
				1150	// Propagate errors output -> hidden
				1151	for (c = 0; c < layer1_size; c++)
				1152	neu1e[c] += g * syn1[c + l2];
				1153	// Learn weights hidden -> output
				1154	for (c = 0; c < layer1_size; c++)
				1155	syn1[c + l2] += g * syn0[c + l1];
				1156	if (cap == 1)
				1157	for (c = 0; c < layer1_size; c++)
				1158	capParam(syn1, c + l2);
				1159	}
				1160	// NEGATIVE SAMPLING
				1161	if (negative > 0)
				1162	for (d = 0; d < negative + 1; d++) {
				1163	if (d == 0) {
				1164	target = word;
				1165	label = 1;
				1166	} else {
				1167	next_random = next_random
				1168	* (unsigned long long) 25214903917 + 11;
				1169	if (word_to_group != NULL
				1170	&& word_to_group[word] != -1) {
				1171	target = word;
				1172	while (target == word) {
				1173	target =
				1174	group_to_table[word_to_group[word]
				1175	* table_size
				1176	+ (next_random >> 16)
				1177	% table_size];
				1178	next_random =
				1179	next_random
				1180	* (unsigned long long) 25214903917
				1181	+ 11;
				1182	}
				1183	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1184	} else {
				1185	target = table[(next_random >> 16)
				1186	% table_size];
				1187	}
				1188	if (target == 0)
				1189	target = next_random % (vocab_size - 1) + 1;
				1190	if (target == word)
				1191	continue;
				1192	label = 0;
				1193	}
				1194	l2 = target * layer1_size;
				1195	f = 0;
				1196	for (c = 0; c < layer1_size; c++)
				1197	f += syn0[c + l1] * syn1neg[c + l2];
				1198	if (f > MAX_EXP)
				1199	g = (label - 1) * alpha;
				1200	else if (f < -MAX_EXP)
				1201	g = (label - 0) * alpha;
				1202	else
				1203	g =
				1204	(label
				1205	- expTable[(int) ((f + MAX_EXP)
				1206	* (EXP_TABLE_SIZE
				1207	/ MAX_EXP / 2))])
				1208	* alpha;
				1209	for (c = 0; c < layer1_size; c++)
				1210	neu1e[c] += g * syn1neg[c + l2];
				1211	for (c = 0; c < layer1_size; c++)
				1212	syn1neg[c + l2] += g * syn0[c + l1];
				1213	if (cap == 1)
				1214	for (c = 0; c < layer1_size; c++)
				1215	capParam(syn1neg, c + l2);
				1216	}
				1217	//Noise Contrastive Estimation
				1218	if (nce > 0)
				1219	for (d = 0; d < nce + 1; d++) {
				1220	if (d == 0) {
				1221	target = word;
				1222	label = 1;
				1223	} else {
				1224	next_random = next_random
				1225	* (unsigned long long) 25214903917 + 11;
				1226	if (word_to_group != NULL
				1227	&& word_to_group[word] != -1) {
				1228	target = word;
				1229	while (target == word) {
				1230	target =
				1231	group_to_table[word_to_group[word]
				1232	* table_size
				1233	+ (next_random >> 16)
				1234	% table_size];
				1235	next_random =
				1236	next_random
				1237	* (unsigned long long) 25214903917
				1238	+ 11;
				1239	}
				1240	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1241	} else {
				1242	target = table[(next_random >> 16)
				1243	% table_size];
				1244	}
				1245	if (target == 0)
				1246	target = next_random % (vocab_size - 1) + 1;
				1247	if (target == word)
				1248	continue;
				1249	label = 0;
				1250	}
				1251	l2 = target * layer1_size;
				1252	f = 0;
				1253	for (c = 0; c < layer1_size; c++)
				1254	f += syn0[c + l1] * syn1nce[c + l2];
				1255	if (f > MAX_EXP)
				1256	g = (label - 1) * alpha;
				1257	else if (f < -MAX_EXP)
				1258	g = (label - 0) * alpha;
				1259	else {
				1260	f = exp(f);
				1261	g = (label
				1262	- f
				1263	/ (noise_distribution[target]
				1264	* nce + f)) * alpha;
				1265	}
				1266	for (c = 0; c < layer1_size; c++)
				1267	neu1e[c] += g * syn1nce[c + l2];
				1268	for (c = 0; c < layer1_size; c++)
				1269	syn1nce[c + l2] += g * syn0[c + l1];
				1270	if (cap == 1)
				1271	for (c = 0; c < layer1_size; c++)
				1272	capParam(syn1nce, c + l2);
				1273	}
				1274	// Learn weights input -> hidden
				1275	for (c = 0; c < layer1_size; c++)
				1276	syn0[c + l1] += neu1e[c];
				1277	}
				1278	} else if (type == 2) { //train the cwindow architecture
				1279	// in -> hidden
				1280	cw = 0;
				1281	for (a = 0; a < window * 2 + 1; a++)
				1282	if (a != window) {
				1283	c = sentence_position - window + a;
				1284	if (c < 0)
				1285	continue;
				1286	if (c >= sentence_length)
				1287	continue;
				1288	last_word = sen[c];
				1289	if (last_word == -1)
				1290	continue;
				1291	window_offset = a * layer1_size;
				1292	if (a > window)
				1293	window_offset -= layer1_size;
				1294	for (c = 0; c < layer1_size; c++)
				1295	neu1[c + window_offset] += syn0[c
				1296	+ last_word * layer1_size];
				1297	cw++;
				1298	}
				1299	if (cw) {
				1300	if (hs)
				1301	for (d = 0; d < vocab[word].codelen; d++) {
				1302	f = 0;
				1303	l2 = vocab[word].point[d] * window_layer_size;
				1304	// Propagate hidden -> output
				1305	for (c = 0; c < window_layer_size; c++)
				1306	f += neu1[c] * syn1_window[c + l2];
				1307	if (f <= -MAX_EXP)
				1308	continue;
				1309	else if (f >= MAX_EXP)
				1310	continue;
				1311	else
				1312	f = expTable[(int) ((f + MAX_EXP)
				1313	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1314	// 'g' is the gradient multiplied by the learning rate
				1315	g = (1 - vocab[word].code[d] - f) * alpha;
				1316	// Propagate errors output -> hidden
				1317	for (c = 0; c < window_layer_size; c++)
				1318	neu1e[c] += g * syn1_window[c + l2];
				1319	// Learn weights hidden -> output
				1320	for (c = 0; c < window_layer_size; c++)
				1321	syn1_window[c + l2] += g * neu1[c];
				1322	if (cap == 1)
				1323	for (c = 0; c < window_layer_size; c++)
				1324	capParam(syn1_window, c + l2);
				1325	}
				1326	// NEGATIVE SAMPLING
				1327	if (negative > 0)
				1328	for (d = 0; d < negative + 1; d++) {
				1329	if (d == 0) {
				1330	target = word;
				1331	label = 1;
				1332	} else {
				1333	next_random = next_random
				1334	* (unsigned long long) 25214903917 + 11;
				1335	if (word_to_group != NULL
				1336	&& word_to_group[word] != -1) {
				1337	target = word;
				1338	while (target == word) {
				1339	target = group_to_table[word_to_group[word]
				1340	* table_size
				1341	+ (next_random >> 16) % table_size];
				1342	next_random = next_random
				1343	* (unsigned long long) 25214903917
				1344	+ 11;
				1345	}
				1346	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1347	} else {
				1348	target =
				1349	table[(next_random >> 16) % table_size];
				1350	}
				1351	if (target == 0)
				1352	target = next_random % (vocab_size - 1) + 1;
				1353	if (target == word)
				1354	continue;
				1355	label = 0;
				1356	}
				1357	l2 = target * window_layer_size;
				1358	f = 0;
				1359	for (c = 0; c < window_layer_size; c++)
				1360	f += neu1[c] * syn1neg_window[c + l2];
				1361	if (f > MAX_EXP)
				1362	g = (label - 1) * alpha;
				1363	else if (f < -MAX_EXP)
				1364	g = (label - 0) * alpha;
				1365	else
				1366	g = (label
				1367	- expTable[(int) ((f + MAX_EXP)
				1368	* (EXP_TABLE_SIZE / MAX_EXP / 2))])
				1369	* alpha;
				1370	for (c = 0; c < window_layer_size; c++)
				1371	neu1e[c] += g * syn1neg_window[c + l2];
				1372	for (c = 0; c < window_layer_size; c++)
				1373	syn1neg_window[c + l2] += g * neu1[c];
				1374	if (cap == 1)
				1375	for (c = 0; c < window_layer_size; c++)
				1376	capParam(syn1neg_window, c + l2);
				1377	}
				1378	// Noise Contrastive Estimation
				1379	if (nce > 0)
				1380	for (d = 0; d < nce + 1; d++) {
				1381	if (d == 0) {
				1382	target = word;
				1383	label = 1;
				1384	} else {
				1385	next_random = next_random
				1386	* (unsigned long long) 25214903917 + 11;
				1387	if (word_to_group != NULL
				1388	&& word_to_group[word] != -1) {
				1389	target = word;
				1390	while (target == word) {
				1391	target = group_to_table[word_to_group[word]
				1392	* table_size
				1393	+ (next_random >> 16) % table_size];
				1394	next_random = next_random
				1395	* (unsigned long long) 25214903917
				1396	+ 11;
				1397	}
				1398	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1399	} else {
				1400	target =
				1401	table[(next_random >> 16) % table_size];
				1402	}
				1403	if (target == 0)
				1404	target = next_random % (vocab_size - 1) + 1;
				1405	if (target == word)
				1406	continue;
				1407	label = 0;
				1408	}
				1409	l2 = target * window_layer_size;
				1410	f = 0;
				1411	for (c = 0; c < window_layer_size; c++)
				1412	f += neu1[c] * syn1nce_window[c + l2];
				1413	if (f > MAX_EXP)
				1414	g = (label - 1) * alpha;
				1415	else if (f < -MAX_EXP)
				1416	g = (label - 0) * alpha;
				1417	else {
				1418	f = exp(f);
				1419	g =
				1420	(label
				1421	- f
				1422	/ (noise_distribution[target]
				1423	* nce + f)) * alpha;
				1424	}
				1425	for (c = 0; c < window_layer_size; c++)
				1426	neu1e[c] += g * syn1nce_window[c + l2];
				1427	for (c = 0; c < window_layer_size; c++)
				1428	syn1nce_window[c + l2] += g * neu1[c];
				1429	if (cap == 1)
				1430	for (c = 0; c < window_layer_size; c++)
				1431	capParam(syn1nce_window, c + l2);
				1432	}
				1433	// hidden -> in
				1434	for (a = 0; a < window * 2 + 1; a++)
				1435	if (a != window) {
				1436	c = sentence_position - window + a;
				1437	if (c < 0)
				1438	continue;
				1439	if (c >= sentence_length)
				1440	continue;
				1441	last_word = sen[c];
				1442	if (last_word == -1)
				1443	continue;
				1444	window_offset = a * layer1_size;
				1445	if (a > window)
				1446	window_offset -= layer1_size;
				1447	for (c = 0; c < layer1_size; c++)
				1448	syn0[c + last_word * layer1_size] += neu1e[c
				1449	+ window_offset];
				1450	}
				1451	}
				1452	} else if (type == 3) { //train structured skip-gram
				1453	for (a = 0; a < window * 2 + 1; a++)
				1454	if (a != window) {
				1455	c = sentence_position - window + a;
				1456	if (c < 0)
				1457	continue;
				1458	if (c >= sentence_length)
				1459	continue;
				1460	last_word = sen[c];
				1461	if (last_word < 0)
				1462	continue;
				1463	l1 = last_word * layer1_size;
				1464	window_offset = a * layer1_size;
				1465	if (a > window)
				1466	window_offset -= layer1_size;
				1467	for (c = 0; c < layer1_size; c++)
				1468	neu1e[c] = 0;
				1469	// HIERARCHICAL SOFTMAX
				1470	if (hs)
				1471	for (d = 0; d < vocab[word].codelen; d++) {
				1472	f = 0;
				1473	l2 = vocab[word].point[d] * window_layer_size;
				1474	// Propagate hidden -> output
				1475	for (c = 0; c < layer1_size; c++)
				1476	f += syn0[c + l1]
				1477	* syn1_window[c + l2 + window_offset];
				1478	if (f <= -MAX_EXP)
				1479	continue;
				1480	else if (f >= MAX_EXP)
				1481	continue;
				1482	else
				1483	f = expTable[(int) ((f + MAX_EXP)
				1484	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1485	// 'g' is the gradient multiplied by the learning rate
				1486	g = (1 - vocab[word].code[d] - f) * alpha;
				1487	// Propagate errors output -> hidden
				1488	for (c = 0; c < layer1_size; c++)
				1489	neu1e[c] += g
				1490	* syn1_window[c + l2 + window_offset];
				1491	// Learn weights hidden -> output
				1492	for (c = 0; c < layer1_size; c++)
				1493	syn1[c + l2 + window_offset] += g
				1494	* syn0[c + l1];
				1495	if (cap == 1)
				1496	for (c = 0; c < layer1_size; c++)
				1497	capParam(syn1, c + l2 + window_offset);
				1498	}
				1499	// NEGATIVE SAMPLING
				1500	if (negative > 0)
				1501	for (d = 0; d < negative + 1; d++) {
				1502	if (d == 0) {
				1503	target = word;
				1504	label = 1;
				1505	} else {
				1506	next_random = next_random
				1507	* (unsigned long long) 25214903917 + 11;
				1508	if (word_to_group != NULL
				1509	&& word_to_group[word] != -1) {
				1510	target = word;
				1511	while (target == word) {
				1512	target =
				1513	group_to_table[word_to_group[word]
				1514	* table_size
				1515	+ (next_random >> 16)
				1516	% table_size];
				1517	next_random =
				1518	next_random
				1519	* (unsigned long long) 25214903917
				1520	+ 11;
				1521	}
				1522	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1523	} else {
				1524	target = table[(next_random >> 16)
				1525	% table_size];
				1526	}
				1527	if (target == 0)
				1528	target = next_random % (vocab_size - 1) + 1;
				1529	if (target == word)
				1530	continue;
				1531	label = 0;
				1532	}
				1533	l2 = target * window_layer_size;
				1534	f = 0;
				1535	for (c = 0; c < layer1_size; c++)
				1536	f +=
				1537	syn0[c + l1]
				1538	* syn1neg_window[c + l2
				1539	+ window_offset];
				1540	if (f > MAX_EXP)
				1541	g = (label - 1) * alpha;
				1542	else if (f < -MAX_EXP)
				1543	g = (label - 0) * alpha;
				1544	else
				1545	g =
				1546	(label
				1547	- expTable[(int) ((f + MAX_EXP)
				1548	* (EXP_TABLE_SIZE
				1549	/ MAX_EXP / 2))])
				1550	* alpha;
				1551	if(debug_mode > 2 && ((long long) id) == 0) {
				1552	printf("negative sampling %lld for input (word) %s (#%lld), target (last word) %s returned %s (#%lld), ", d, vocab[word].word, word, vocab[last_word].word, vocab[target].word, target);
				1553	printf("label %lld, a %lld, gain %.4f\n", label, a-window, g);
				1554	}
				1555	for (c = 0; c < layer1_size; c++)
				1556	neu1e[c] +=
				1557	g
				1558	* syn1neg_window[c + l2
				1559	+ window_offset];
				1560	for (c = 0; c < layer1_size; c++)
				1561	syn1neg_window[c + l2 + window_offset] += g
				1562	* syn0[c + l1];
				1563	if (cap == 1)
				1564	for (c = 0; c < layer1_size; c++)
				1565	capParam(syn1neg_window,
				1566	c + l2 + window_offset);
				1567	}
				1568	// Noise Constrastive Estimation
				1569	if (nce > 0)
				1570	for (d = 0; d < nce + 1; d++) {
				1571	if (d == 0) {
				1572	target = word;
				1573	label = 1;
				1574	} else {
				1575	next_random = next_random
				1576	* (unsigned long long) 25214903917 + 11;
				1577	if (word_to_group != NULL
				1578	&& word_to_group[word] != -1) {
				1579	target = word;
				1580	while (target == word) {
				1581	target =
				1582	group_to_table[word_to_group[word]
				1583	* table_size
				1584	+ (next_random >> 16)
				1585	% table_size];
				1586	next_random =
				1587	next_random
				1588	* (unsigned long long) 25214903917
				1589	+ 11;
				1590	}
				1591	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1592	} else {
				1593	target = table[(next_random >> 16)
				1594	% table_size];
				1595	}
				1596	if (target == 0)
				1597	target = next_random % (vocab_size - 1) + 1;
				1598	if (target == word)
				1599	continue;
				1600	label = 0;
				1601	}
				1602	l2 = target * window_layer_size;
				1603	f = 0;
				1604	for (c = 0; c < layer1_size; c++)
				1605	f +=
				1606	syn0[c + l1]
				1607	* syn1nce_window[c + l2
				1608	+ window_offset];
				1609	if (f > MAX_EXP)
				1610	g = (label - 1) * alpha;
				1611	else if (f < -MAX_EXP)
				1612	g = (label - 0) * alpha;
				1613	else {
				1614	f = exp(f);
				1615	g = (label
				1616	- f
				1617	/ (noise_distribution[target]
				1618	* nce + f)) * alpha;
				1619	}
				1620	for (c = 0; c < layer1_size; c++)
				1621	neu1e[c] +=
				1622	g
				1623	* syn1nce_window[c + l2
				1624	+ window_offset];
				1625	for (c = 0; c < layer1_size; c++)
				1626	syn1nce_window[c + l2 + window_offset] += g
				1627	* syn0[c + l1];
				1628	if (cap == 1)
				1629	for (c = 0; c < layer1_size; c++)
				1630	capParam(syn1nce_window,
				1631	c + l2 + window_offset);
				1632	}
				1633	// Learn weights input -> hidden
				1634	for (c = 0; c < layer1_size; c++) {
				1635	syn0[c + l1] += neu1e[c];
				1636	if (syn0[c + l1] > 50)
				1637	syn0[c + l1] = 50;
				1638	if (syn0[c + l1] < -50)
				1639	syn0[c + l1] = -50;
				1640	}
				1641	}
				1642	} else if (type == 4) { //training senna
				1643	// in -> hidden
				1644	cw = 0;
				1645	for (a = 0; a < window * 2 + 1; a++)
				1646	if (a != window) {
				1647	c = sentence_position - window + a;
				1648	if (c < 0)
				1649	continue;
				1650	if (c >= sentence_length)
				1651	continue;
				1652	last_word = sen[c];
				1653	if (last_word == -1)
				1654	continue;
				1655	window_offset = a * layer1_size;
				1656	if (a > window)
				1657	window_offset -= layer1_size;
				1658	for (c = 0; c < layer1_size; c++)
				1659	neu1[c + window_offset] += syn0[c
				1660	+ last_word * layer1_size];
				1661	cw++;
				1662	}
				1663	if (cw) {
				1664	for (a = 0; a < window_hidden_size; a++) {
				1665	c = a * window_layer_size;
				1666	for (b = 0; b < window_layer_size; b++) {
				1667	neu2[a] += syn_window_hidden[c + b] * neu1[b];
				1668	}
				1669	}
				1670	if (hs)
				1671	for (d = 0; d < vocab[word].codelen; d++) {
				1672	f = 0;
				1673	l2 = vocab[word].point[d] * window_hidden_size;
				1674	// Propagate hidden -> output
				1675	for (c = 0; c < window_hidden_size; c++)
				1676	f += hardTanh(neu2[c]) * syn_hidden_word[c + l2];
				1677	if (f <= -MAX_EXP)
				1678	continue;
				1679	else if (f >= MAX_EXP)
				1680	continue;
				1681	else
				1682	f = expTable[(int) ((f + MAX_EXP)
				1683	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1684	// 'g' is the gradient multiplied by the learning rate
				1685	g = (1 - vocab[word].code[d] - f) * alpha;
				1686	// Propagate errors output -> hidden
				1687	for (c = 0; c < window_hidden_size; c++)
				1688	neu2e[c] += dHardTanh(neu2[c], g) * g
				1689	* syn_hidden_word[c + l2];
				1690	// Learn weights hidden -> output
				1691	for (c = 0; c < window_hidden_size; c++)
				1692	syn_hidden_word[c + l2] += dHardTanh(neu2[c], g) * g
				1693	* neu2[c];
				1694	}
				1695	// NEGATIVE SAMPLING
				1696	if (negative > 0)
				1697	for (d = 0; d < negative + 1; d++) {
				1698	if (d == 0) {
				1699	target = word;
				1700	label = 1;
				1701	} else {
				1702	next_random = next_random
				1703	* (unsigned long long) 25214903917 + 11;
				1704	if (word_to_group != NULL
				1705	&& word_to_group[word] != -1) {
				1706	target = word;
				1707	while (target == word) {
				1708	target = group_to_table[word_to_group[word]
				1709	* table_size
				1710	+ (next_random >> 16) % table_size];
				1711	next_random = next_random
				1712	* (unsigned long long) 25214903917
				1713	+ 11;
				1714	}
				1715	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1716	} else {
				1717	target =
				1718	table[(next_random >> 16) % table_size];
				1719	}
				1720	if (target == 0)
				1721	target = next_random % (vocab_size - 1) + 1;
				1722	if (target == word)
				1723	continue;
				1724	label = 0;
				1725	}
				1726	l2 = target * window_hidden_size;
				1727	f = 0;
				1728	for (c = 0; c < window_hidden_size; c++)
				1729	f += hardTanh(neu2[c])
				1730	* syn_hidden_word_neg[c + l2];
				1731	if (f > MAX_EXP)
				1732	g = (label - 1) * alpha / negative;
				1733	else if (f < -MAX_EXP)
				1734	g = (label - 0) * alpha / negative;
				1735	else
				1736	g = (label
				1737	- expTable[(int) ((f + MAX_EXP)
				1738	* (EXP_TABLE_SIZE / MAX_EXP / 2))])
				1739	* alpha / negative;
				1740	for (c = 0; c < window_hidden_size; c++)
				1741	neu2e[c] += dHardTanh(neu2[c], g) * g
				1742	* syn_hidden_word_neg[c + l2];
				1743	for (c = 0; c < window_hidden_size; c++)
				1744	syn_hidden_word_neg[c + l2] += dHardTanh(neu2[c], g)
				1745	* g * neu2[c];
				1746	}
				1747	for (a = 0; a < window_hidden_size; a++)
				1748	for (b = 0; b < window_layer_size; b++)
				1749	neu1e[b] += neu2e[a]
				1750	* syn_window_hidden[a * window_layer_size + b];
				1751	for (a = 0; a < window_hidden_size; a++)
				1752	for (b = 0; b < window_layer_size; b++)
				1753	syn_window_hidden[a * window_layer_size + b] += neu2e[a]
				1754	* neu1[b];
				1755	// hidden -> in
				1756	for (a = 0; a < window * 2 + 1; a++)
				1757	if (a != window) {
				1758	c = sentence_position - window + a;
				1759	if (c < 0)
				1760	continue;
				1761	if (c >= sentence_length)
				1762	continue;
				1763	last_word = sen[c];
				1764	if (last_word == -1)
				1765	continue;
				1766	window_offset = a * layer1_size;
				1767	if (a > window)
				1768	window_offset -= layer1_size;
				1769	for (c = 0; c < layer1_size; c++)
				1770	syn0[c + last_word * layer1_size] += neu1e[c
				1771	+ window_offset];
				1772	}
				1773	}
				1774	} else if(type == 5) {
				1775	for (a = b; a < window * 2 + 1 - b; a++) if (a != window) {
				1776	c = sentence_position - window + a;
				1777	if (c < 0) continue;
				1778	if (c >= sentence_length) continue;
				1779	last_word = sen[c];
				1780	if (last_word == -1) continue;
				1781	inc_collocator(cdb, word, last_word, a - window);
				1782	// printf("%2d: storing %s %s - %d\n", id, vocab[word].word, vocab[last_word].word, (int) a - window);
				1783	// cw++;
				1784	}
				1785	} else {
				1786	printf("unknown type %i", type);
				1787	exit(0);
				1788	}
				1789	sentence_position++;
				1790	if (sentence_position >= sentence_length) {
				1791	sentence_length = 0;
				1792	continue;
				1793	}
				1794	}
				1795	fclose(fi);
				1796	free(neu1);
				1797	free(neu1e);
				1798	threadPos[(long) id] = -1;
				1799	pthread_exit(NULL);
				1800	}
				1801
				1802	void ShowCollocations() {
				1803	long a, b, c, d, e, window_offset, target, max_target = 0, maxmax_target;
				1804	real f, max_f, maxmax_f;
				1805	real *target_sums, bestf[MAX_CC], worstbest;
				1806	long besti[MAX_CC];
				1807	int N = 10, bestp[MAX_CC];
				1808	a = posix_memalign((void *) &target_sums, 128, vocab_size sizeof(real));
				1809
				1810	for (d = cc; d < vocab_size; d++) {
				1811	for (b = 0; b < vocab_size; b++)
				1812	target_sums[b] = 0;
				1813	for (b = 0; b < N; b++)
				1814	bestf[b] = -1;
				1815	worstbest = -1;
				1816
				1817	maxmax_f = -1;
				1818	maxmax_target = 0;
				1819	for (a = window * 2 + 1; a >=0; a--) {
				1820	if (a != window) {
				1821	max_f = -1;
				1822	window_offset = a * layer1_size;
				1823	if (a > window)
				1824	window_offset -= layer1_size;
				1825	for(target = 0; target < vocab_size; target ++) {
				1826	if(target == d)
				1827	continue;
				1828	f = 0;
				1829	for (c = 0; c < layer1_size; c++)
				1830	f += syn0[d* layer1_size + c] * syn1neg_window[target * window_layer_size + window_offset + c];
				1831	if (f < -MAX_EXP)
				1832	continue;
				1833	else if (f > MAX_EXP)
				1834	continue;
				1835	else
				1836	f = expTable[(int) ((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1837	if(f > max_f) {
				1838	max_f = f;
				1839	max_target = target;
				1840	}
				1841	target_sums[target] += (1-target_sums[target]) * f;
				1842	if(f > worstbest) {
				1843	for (b = 0; b < N; b++) {
				1844	if (f > bestf[b]) {
				1845	for (e = N - 1; e > b; e--) {
				1846	bestf[e] = bestf[e - 1];
				1847	besti[e] = besti[e - 1];
				1848	bestp[e] = bestp[e - 1];
				1849	}
				1850	bestf[b] = f;
				1851	besti[b] = target;
				1852	bestp[b] = window-a;
				1853	break;
				1854	}
				1855	}
				1856	worstbest = bestf[N - 1];
				1857	}
				1858	}
				1859	printf("%s (%.2f) ", vocab[max_target].word, max_f);
				1860	if (max_f > maxmax_f) {
				1861	maxmax_f = max_f;
				1862	maxmax_target = max_target;
				1863	}
				1864	} else {
				1865	printf("\x1b[1m%s\x1b[0m ", vocab[d].word);
				1866	}
				1867	}
				1868	max_f = -1;
				1869	for (b = 0; b < vocab_size; b++) {
				1870	if (target_sums[b] > max_f) {
				1871	max_f = target_sums[b];
				1872	max_target = b;
				1873	}
				1874	}
				1875	printf(" – max sum: %s (%.2f), max resp.: \x1b[1m%s\x1b[0m (%.2f)\n",
				1876	vocab[max_target].word, max_f, vocab[maxmax_target].word,
				1877	maxmax_f);
				1878	for (b = 0; b < N && bestf[b] > -1; b++)
				1879	printf("%-32s %.2f %d\n", vocab[besti[b]].word, bestf[b], bestp[b]);
				1880	printf("\n");
				1881	}
				1882	}
				1883
				1884	void TrainModel() {
				1885	long a, b, c, d;
				1886	FILE *fo;
				1887	pthread_t pt = (pthread_t ) malloc(num_threads * sizeof(pthread_t));
				1888	threadPos = malloc(num_threads * sizeof(long long));
				1889	threadIters = malloc(num_threads * sizeof(int));
				1890	char *timebuf = malloc(80);
				1891	printf("Starting training using file %s\n", train_file);
				1892	starting_alpha = alpha;
				1893	if (read_vocab_file[0] != 0)
				1894	ReadVocab();
				1895	else
				1896	LearnVocabFromTrainFile();
				1897	if (save_vocab_file[0] != 0)
				1898	SaveVocab();
				1899	if (output_file[0] == 0)
				1900	return;
				1901	InitNet();
				1902	if (cc > 0)
				1903	ShowCollocations();
				1904	if (negative > 0 \|\| nce > 0)
				1905	InitUnigramTable();
				1906	if (negative_classes_file[0] != 0)
				1907	InitClassUnigramTable();
				1908	start = time(NULL);
				1909	start_clock = clock();
				1910	for (a = 0; a < num_threads; a++)
				1911	pthread_create(&pt[a], NULL, TrainModelThread, (void *) a);
				1912	if(debug_mode > 1)
				1913	pthread_create(&pt[num_threads], NULL, MonitorThread, (void *) a);
				1914	for (a = 0; a < num_threads; a++)
				1915	pthread_join(pt[a], NULL);
				1916	if(debug_mode > 1) {
				1917	pthread_join(pt[num_threads], NULL);
				1918	clock_t now = time(NULL);
				1919	clock_t now_clock = clock();
				1920	printf("\nFinished: %s - user: %lds - real: %lds\n", currentDateTime(timebuf, 0), (now_clock - start_clock) / CLOCKS_PER_SEC, now - start);
				1921	if(type == 5) // don't save vectorsmfor classic collocators
				1922	return;
				1923	printf("Saving vectors to %s ...", output_file);
				1924	fflush(stdout);
				1925	}
				1926	fo = fopen(output_file, "wb");
				1927	if (classes == 0) {
				1928	// Save the word vectors
				1929	fprintf(fo, "%lld %lld\n", vocab_size, layer1_size);
				1930	for (a = 0; a < vocab_size; a++) {
				1931	fprintf(fo, "%s ", vocab[a].word);
				1932	if (binary)
				1933	for (b = 0; b < layer1_size; b++)
				1934	fwrite(&syn0[a * layer1_size + b], sizeof(real), 1, fo);
				1935	else
				1936	for (b = 0; b < layer1_size; b++)
				1937	fprintf(fo, "%lf ", syn0[a * layer1_size + b]);
				1938	fprintf(fo, "\n");
				1939	}
				1940	if(debug_mode > 1)
				1941	fprintf(stderr, "\n");
				1942	} else {
				1943	// Run K-means on the word vectors
				1944	int clcn = classes, iter = 10, closeid;
				1945	int centcn = (int ) malloc(classes * sizeof(int));
				1946	int cl = (int ) calloc(vocab_size, sizeof(int));
				1947	real closev, x;
				1948	real cent = (real ) calloc(classes * layer1_size, sizeof(real));
				1949	for (a = 0; a < vocab_size; a++)
				1950	cl[a] = a % clcn;
				1951	for (a = 0; a < iter; a++) {
				1952	for (b = 0; b < clcn * layer1_size; b++)
				1953	cent[b] = 0;
				1954	for (b = 0; b < clcn; b++)
				1955	centcn[b] = 1;
				1956	for (c = 0; c < vocab_size; c++) {
				1957	for (d = 0; d < layer1_size; d++)
				1958	cent[layer1_size * cl[c] + d] += syn0[c * layer1_size + d];
				1959	centcn[cl[c]]++;
				1960	}
				1961	for (b = 0; b < clcn; b++) {
				1962	closev = 0;
				1963	for (c = 0; c < layer1_size; c++) {
				1964	cent[layer1_size * b + c] /= centcn[b];
				1965	closev += cent[layer1_size * b + c]
				1966	* cent[layer1_size * b + c];
				1967	}
				1968	closev = sqrt(closev);
				1969	for (c = 0; c < layer1_size; c++)
				1970	cent[layer1_size * b + c] /= closev;
				1971	}
				1972	for (c = 0; c < vocab_size; c++) {
				1973	closev = -10;
				1974	closeid = 0;
				1975	for (d = 0; d < clcn; d++) {
				1976	x = 0;
				1977	for (b = 0; b < layer1_size; b++)
				1978	x += cent[layer1_size * d + b]
				1979	* syn0[c * layer1_size + b];
				1980	if (x > closev) {
				1981	closev = x;
				1982	closeid = d;
				1983	}
				1984	}
				1985	cl[c] = closeid;
				1986	}
				1987	}
				1988	// Save the K-means classes
				1989	for (a = 0; a < vocab_size; a++)
				1990	fprintf(fo, "%s %d\n", vocab[a].word, cl[a]);
				1991	free(centcn);
				1992	free(cent);
				1993	free(cl);
				1994	}
				1995	fclose(fo);
				1996	if (save_net_file[0] != 0)
				1997	SaveNet();
				1998	}
				1999
				2000	int ArgPos(char str, int argc, char *argv) {
				2001	int a;
				2002	for (a = 1; a < argc; a++)
				2003	if (!strcmp(str, argv[a])) {
				2004	if (a == argc - 1) {
				2005	printf("Argument missing for %s\n", str);
				2006	exit(1);
				2007	}
				2008	return a;
				2009	}
				2010	return -1;
				2011	}
				2012
				2013	void print_help() {
				2014	printf("WORD VECTOR estimation toolkit v 0.1c\n\n");
				2015	printf("Options:\n");
				2016	printf("Parameters for training:\n");
				2017	printf("\t-train <file>\n");
				2018	printf("\t\tUse text data from <file> to train the model\n");
				2019	printf("\t-output <file>\n");
				2020	printf(
				2021	"\t\tUse <file> to save the resulting word vectors / word clusters\n");
				2022	printf("\t-size <int>\n");
				2023	printf("\t\tSet size of word vectors; default is 100\n");
				2024	printf("\t-window <int>\n");
				2025	printf("\t\tSet max skip length between words; default is 5\n");
				2026	printf("\t-sample <float>\n");
				2027	printf(
				2028	"\t\tSet threshold for occurrence of words. Those that appear with higher frequency in the training data\n");
				2029	printf(
				2030	"\t\twill be randomly down-sampled; default is 1e-3, useful range is (0, 1e-5)\n");
				2031	printf("\t-hs <int>\n");
				2032	printf("\t\tUse Hierarchical Softmax; default is 0 (not used)\n");
				2033	printf("\t-negative <int>\n");
				2034	printf(
				2035	"\t\tNumber of negative examples; default is 5, common values are 3 - 10 (0 = not used)\n");
				2036	printf("\t-negative-classes <file>\n");
				2037	printf("\t\tNegative classes to sample from\n");
				2038	printf("\t-nce <int>\n");
				2039	printf(
				2040	"\t\tNumber of negative examples for nce; default is 0, common values are 3 - 10 (0 = not used)\n");
				2041	printf("\t-threads <int>\n");
				2042	printf("\t\tUse <int> threads (default 12)\n");
				2043	printf("\t-iter <int>\n");
				2044	printf("\t\tRun more training iterations (default 5)\n");
				2045	printf("\t-min-count <int>\n");
				2046	printf(
				2047	"\t\tThis will discard words that appear less than <int> times; default is 5\n");
				2048	printf("\t-alpha <float>\n");
				2049	printf(
				2050	"\t\tSet the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW\n");
				2051	printf("\t-classes <int>\n");
				2052	printf(
				2053	"\t\tOutput word classes rather than word vectors; default number of classes is 0 (vectors are written)\n");
				2054	printf("\t-debug <int>\n");
				2055	printf(
				2056	"\t\tSet the debug mode (default = 2 = more info during training)\n");
				2057	printf("\t-binary <int>\n");
				2058	printf(
				2059	"\t\tSave the resulting vectors in binary moded; default is 0 (off)\n");
				2060	printf("\t-save-vocab <file>\n");
				2061	printf("\t\tThe vocabulary will be saved to <file>\n");
				2062	printf("\t-read-vocab <file>\n");
				2063	printf(
				2064	"\t\tThe vocabulary will be read from <file>, not constructed from the training data\n");
				2065	printf("\t-train-counts <int>\n");
				2066	printf(
				2067	"\t\tUse word counts of actual corpus rather than vocabulary counts; default is 1 (on)\n");
				2068	printf("\t-read-net <file>\n");
				2069	printf(
				2070	"\t\tThe net parameters will be read from <file>, not initialized randomly\n");
				2071	printf("\t-save-net <file>\n");
				2072	printf("\t\tThe net parameters will be saved to <file>\n");
				2073	printf("\t-magic-stop-file <file>\n");
				2074	printf("\t\tIf the magic file <file> exists training will stop after the current cycle.\n");
				2075	printf("\t-show-cc <int>\n");
				2076	printf("\t\tShow words with their collocators starting from word rank <int>. Depends on -read-vocab and -read-net.\n");
				2077	printf("\t-type <int>\n");
				2078	printf(
				2079	"\t\tType of embeddings (0 for cbow, 1 for skipngram, 2 for cwindow, 3 for structured skipngram, 4 for senna type, 5 for store positional bigramms)\n");
				2080	printf("\t-cap <int>\n");
				2081	printf(
				2082	"\t\tlimit the parameter values to the range [-50, 50]; default is 0 (off)\n");
				2083	printf("\nExamples:\n");
				2084	printf(
				2085	"./word2vec -train data.txt -output vec.txt -size 200 -window 5 -sample 1e-4 -negative 5 -hs 0 -binary 0 -type 1 -iter 3\n\n");
				2086	}
				2087
				2088	int main(int argc, char **argv) {
				2089	int i;
				2090	setlocale(LC_ALL, "");
				2091	if (argc == 1) {
				2092	print_help();
				2093	return 0;
				2094	}
				2095	output_file[0] = 0;
				2096	save_vocab_file[0] = 0;
				2097	read_vocab_file[0] = 0;
				2098	save_net_file[0] = 0;
				2099	read_net_file[0] = 0;
				2100	negative_classes_file[0] = 0;
				2101	if ((i = ArgPos((char *) "-h", argc, argv)) > 0) {
				2102	print_help();
				2103	return(0);
				2104	}
				2105	if ((i = ArgPos((char *) "-help", argc, argv)) > 0) {
				2106	print_help();
				2107	return(0);
				2108	}
				2109	if ((i = ArgPos((char *) "-size", argc, argv)) > 0)
				2110	layer1_size = atoi(argv[i + 1]);
				2111	if ((i = ArgPos((char *) "-train", argc, argv)) > 0)
				2112	strcpy(train_file, argv[i + 1]);
				2113	if ((i = ArgPos((char *) "-save-vocab", argc, argv)) > 0)
				2114	strcpy(save_vocab_file, argv[i + 1]);
				2115	if ((i = ArgPos((char *) "-read-vocab", argc, argv)) > 0)
				2116	strcpy(read_vocab_file, argv[i + 1]);
				2117	if ((i = ArgPos((char *) "-train-counts", argc, argv)) > 0)
				2118	tc = atoi(argv[i + 1]);
				2119	if ((i = ArgPos((char *) "-save-net", argc, argv)) > 0)
				2120	strcpy(save_net_file, argv[i + 1]);
				2121	if ((i = ArgPos((char *) "-read-net", argc, argv)) > 0)
				2122	strcpy(read_net_file, argv[i + 1]);
				2123	if ((i = ArgPos((char *) "-magic-stop-file", argc, argv)) > 0) {
				2124	strcpy(magic_stop_file, argv[i + 1]);
				2125	if (access(magic_stop_file, F_OK ) != -1) {
				2126	printf("ERROR: magic stop file %s must not exist at start.\n", magic_stop_file);
				2127	exit(1);
				2128	}
				2129	}
				2130	if ((i = ArgPos((char *) "-debug", argc, argv)) > 0)
				2131	debug_mode = atoi(argv[i + 1]);
				2132	if ((i = ArgPos((char *) "-binary", argc, argv)) > 0)
				2133	binary = atoi(argv[i + 1]);
				2134	if ((i = ArgPos((char *) "-show-cc", argc, argv)) > 0)
				2135	cc = atoi(argv[i + 1]);
				2136	if ((i = ArgPos((char *) "-type", argc, argv)) > 0)
				2137	type = atoi(argv[i + 1]);
				2138	if ((i = ArgPos((char *) "-output", argc, argv)) > 0)
				2139	strcpy(output_file, argv[i + 1]);
				2140	if ((i = ArgPos((char *) "-window", argc, argv)) > 0)
				2141	window = atoi(argv[i + 1]);
				2142	if ((i = ArgPos((char *) "-sample", argc, argv)) > 0)
				2143	sample = atof(argv[i + 1]);
				2144	if ((i = ArgPos((char *) "-hs", argc, argv)) > 0)
				2145	hs = atoi(argv[i + 1]);
				2146	if ((i = ArgPos((char *) "-negative", argc, argv)) > 0)
				2147	negative = atoi(argv[i + 1]);
				2148	if ((i = ArgPos((char *) "-negative-classes", argc, argv)) > 0)
				2149	strcpy(negative_classes_file, argv[i + 1]);
				2150	if ((i = ArgPos((char *) "-nce", argc, argv)) > 0)
				2151	nce = atoi(argv[i + 1]);
				2152	if ((i = ArgPos((char *) "-threads", argc, argv)) > 0)
				2153	num_threads = atoi(argv[i + 1]);
				2154	if ((i = ArgPos((char *) "-iter", argc, argv)) > 0)
				2155	iter = atoi(argv[i + 1]);
				2156	if ((i = ArgPos((char *) "-min-count", argc, argv)) > 0)
				2157	min_count = atoi(argv[i + 1]);
				2158	if ((i = ArgPos((char *) "-classes", argc, argv)) > 0)
				2159	classes = atoi(argv[i + 1]);
				2160	if ((i = ArgPos((char *) "-cap", argc, argv)) > 0)
				2161	cap = atoi(argv[i + 1]);
				2162	if (type == 0 \|\| type == 2 \|\| type == 4)
				2163	alpha = 0.05;
				2164	if (type==5) {
				2165	sample = 0;
				2166	cdb = open_collocatordb_for_write(output_file);
				2167	}
				2168	if ((i = ArgPos((char *) "-alpha", argc, argv)) > 0)
				2169	alpha = atof(argv[i + 1]);
				2170	vocab = (struct vocab_word *) calloc(vocab_max_size,
				2171	sizeof(struct vocab_word));
				2172	vocab_hash = (int *) calloc(vocab_hash_size, sizeof(int));
				2173	expTable = (real ) malloc((EXP_TABLE_SIZE + 1) sizeof(real));
				2174	for (i = 0; i < EXP_TABLE_SIZE; i++) {
				2175	expTable[i] = exp((i / (real) EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table
				2176	expTable[i] = expTable[i] / (expTable[i] + 1); // Precompute f(x) = x / (x + 1)
				2177	}
				2178	SaveArgs(argc, argv);
				2179	TrainModel();
				2180	return 0;
				2181	}
				2182