Blame - word2vecExt1.c - ids-kl/dereko2vec

blob: 7611517885b66dfb6b4453f464f30742b1ad34da [file] [log] [blame]

Peter Fankhauser	542b687	2020-04-19 15:21:44 +0200	[diff] [blame]	1	// Copyright 2013 Google Inc. All Rights Reserved.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#include <locale.h>
				16	#include <stdio.h>
				17	#include <stdlib.h>
				18	#include <string.h>
				19	#include <unistd.h>
				20	#include <math.h>
				21	#include <pthread.h>
				22	#include <collocatordb.h>
				23
				24	#define MAX_STRING 100
				25	#define EXP_TABLE_SIZE 1000
				26	#define MAX_EXP 6
				27	#define MAX_SENTENCE_LENGTH 1000
				28	#define MAX_CC 100
				29	#define MAX_CODE_LENGTH 40
				30
				31	const int vocab_hash_size = 30000000; // Maximum 30 * 0.7 = 21M words in the vocabulary
				32
				33	typedef float real; // Precision of float numbers
				34
				35	struct vocab_word {
				36	long long cn;
				37	int *point;
				38	char word, code, codelen;
				39	};
				40
				41	char train_file[MAX_STRING], output_file[MAX_STRING];
				42	char save_vocab_file[MAX_STRING], read_vocab_file[MAX_STRING];
				43	char save_net_file[MAX_STRING], read_net_file[MAX_STRING];
				44	char magic_stop_file[MAX_STRING];
				45
				46	struct vocab_word *vocab;
				47	int binary = 0, type = 1, debug_mode = 2, window = 5, min_count = 5,
				48	num_threads = 12, min_reduce = 1;
				49	int *vocab_hash;
				50	long long *threadPos;
				51	int *threadIters;
				52	long long vocab_max_size = 1000, vocab_size = 0, layer1_size = 100;
				53	long long train_words = 0, word_count_actual = 0, iter = 5, file_size = 0,
				54	classes = 0;
				55	real alpha = 0.025, starting_alpha, sample = 1e-3;
				56	real syn0, syn1, syn1neg, syn1nce, *expTable;
				57	real avgWordLength=0;
				58	clock_t start, start_clock;
				59
				60	real syn1_window, syn1neg_window, *syn1nce_window;
				61	int w_offset, window_layer_size;
				62
				63	int window_hidden_size = 500;
				64	real syn_window_hidden, syn_hidden_word, *syn_hidden_word_neg,
				65	*syn_hidden_word_nce;
				66
				67	int hs = 0, negative = 5;
				68	const int table_size = 1e8;
				69	int *table;
				70
				71	long cc = 0;
				72	long tc = 1;
				73
				74	//constrastive negative sampling
				75	char negative_classes_file[MAX_STRING];
				76	int *word_to_group;
				77	int group_to_table; //group_sizetable_size
				78	int class_number;
				79
				80	//nce
				81	real* noise_distribution;
				82	int nce = 0;
				83
				84	//param caps
				85	real CAP_VALUE = 50;
				86	int cap = 0;
				87
				88	COLLOCATORDB *cdb = NULL;
				89
				90	void capParam(real* array, int index) {
				91	if (array[index] > CAP_VALUE)
				92	array[index] = CAP_VALUE;
				93	else if (array[index] < -CAP_VALUE)
				94	array[index] = -CAP_VALUE;
				95	}
				96
				97	real hardTanh(real x) {
				98	if (x >= 1) {
				99	return 1;
				100	} else if (x <= -1) {
				101	return -1;
				102	} else {
				103	return x;
				104	}
				105	}
				106
				107	real dHardTanh(real x, real g) {
				108	if (x > 1 && g > 0) {
				109	return 0;
				110	}
				111	if (x < -1 && g < 0) {
				112	return 0;
				113	}
				114	return 1;
				115	}
				116
				117	void InitUnigramTable() {
				118	int a, i;
				119	long long train_words_pow = 0;
				120	real d1, power = 0.75;
				121	table = (int ) malloc(table_size sizeof(int));
				122	for (a = 0; a < vocab_size; a++)
				123	train_words_pow += pow(vocab[a].cn, power);
				124	i = 0;
				125	d1 = pow(vocab[i].cn, power) / (real) train_words_pow;
				126	for (a = 0; a < table_size; a++) {
				127	table[a] = i;
				128	if (a / (real) table_size > d1) {
				129	i++;
				130	d1 += pow(vocab[i].cn, power) / (real) train_words_pow;
				131	}
				132	if (i >= vocab_size)
				133	i = vocab_size - 1;
				134	}
				135
				136	noise_distribution = (real *) calloc(vocab_size, sizeof(real));
				137	for (a = 0; a < vocab_size; a++)
				138	noise_distribution[a] = pow(vocab[a].cn, power)
				139	/ (real) train_words_pow;
				140	}
				141
				142	// Reads a single word from a file, assuming space + tab + EOL to be word boundaries
				143	void ReadWord(char word, FILE fin) {
				144	int a = 0, ch;
				145	while (!feof(fin)) {
				146	ch = fgetc(fin);
				147	if (ch == 13)
				148	continue;
				149	if ((ch == ' ') \|\| (ch == '\t') \|\| (ch == '\n')) {
				150	if (a > 0) {
				151	if (ch == '\n')
				152	ungetc(ch, fin);
				153	break;
				154	}
				155	if (ch == '\n') {
				156	strcpy(word, (char *) "</s>");
				157	return;
				158	} else
				159	continue;
				160	}
				161	word[a] = ch;
				162	a++;
				163	if (a >= MAX_STRING - 1)
				164	a--; // Truncate too long words
				165	}
				166	word[a] = 0;
				167	}
				168
				169	// Returns hash value of a word
				170	int GetWordHash(char *word) {
				171	unsigned long long a, hash = 0;
				172	for (a = 0; a < strlen(word); a++)
				173	hash = hash * 257 + word[a];
				174	hash = hash % vocab_hash_size;
				175	return hash;
				176	}
				177
				178	// Returns position of a word in the vocabulary; if the word is not found, returns -1
				179	int SearchVocab(char *word) {
				180	unsigned int hash = GetWordHash(word);
				181	while (1) {
				182	if (vocab_hash[hash] == -1)
				183	return -1;
				184	if (!strcmp(word, vocab[vocab_hash[hash]].word))
				185	return vocab_hash[hash];
				186	hash = (hash + 1) % vocab_hash_size;
				187	}
				188	return -1;
				189	}
				190
				191	// Reads a word and returns its index in the vocabulary
				192	int ReadWordIndex(FILE *fin) {
				193	char word[MAX_STRING];
				194	ReadWord(word, fin);
				195	if (feof(fin))
				196	return -1;
				197	return SearchVocab(word);
				198	}
				199
				200	// Adds a word to the vocabulary
				201	int AddWordToVocab(char *word) {
				202	unsigned int hash, length = strlen(word) + 1;
				203	if (length > MAX_STRING)
				204	length = MAX_STRING;
				205	vocab[vocab_size].word = (char *) calloc(length, sizeof(char));
				206	strcpy(vocab[vocab_size].word, word);
				207	vocab[vocab_size].cn = 0;
				208	vocab_size++;
				209	// Reallocate memory if needed
				210	if (vocab_size + 2 >= vocab_max_size) {
				211	vocab_max_size += 1000;
				212	vocab = (struct vocab_word *) realloc(vocab,
				213	vocab_max_size * sizeof(struct vocab_word));
				214	}
				215	hash = GetWordHash(word);
				216	while (vocab_hash[hash] != -1)
				217	hash = (hash + 1) % vocab_hash_size;
				218	vocab_hash[hash] = vocab_size - 1;
				219	return vocab_size - 1;
				220	}
				221
				222	// Used later for sorting by word counts
				223	int VocabCompare(const void a, const void b) {
				224	return ((struct vocab_word ) b)->cn - ((struct vocab_word ) a)->cn;
				225	}
				226
				227	// Sorts the vocabulary by frequency using word counts
				228	void SortVocab() {
				229	int a, size;
				230	unsigned int hash;
				231	// Sort the vocabulary and keep </s> at the first position
				232	qsort(&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
				233	for (a = 0; a < vocab_hash_size; a++)
				234	vocab_hash[a] = -1;
				235	size = vocab_size;
				236	train_words = 0;
				237	for (a = 0; a < size; a++) {
				238	avgWordLength += vocab[a].cn * (strlen(vocab[a].word) + 1);
				239	// Words occuring less than min_count times will be discarded from the vocab
				240	if ((vocab[a].cn < min_count) && (a != 0)) {
				241	vocab_size--;
				242	free(vocab[a].word);
				243	} else {
				244	// Hash will be re-computed, as after the sorting it is not actual
				245	hash = GetWordHash(vocab[a].word);
				246	while (vocab_hash[hash] != -1)
				247	hash = (hash + 1) % vocab_hash_size;
				248	vocab_hash[hash] = a;
				249	train_words += vocab[a].cn;
				250	}
				251	}
				252	avgWordLength /= train_words;
				253	vocab = (struct vocab_word *) realloc(vocab,
				254	(vocab_size + 1) * sizeof(struct vocab_word));
				255	// Allocate memory for the binary tree construction
				256	for (a = 0; a < vocab_size; a++) {
				257	vocab[a].code = (char *) calloc(MAX_CODE_LENGTH, sizeof(char));
				258	vocab[a].point = (int *) calloc(MAX_CODE_LENGTH, sizeof(int));
				259	}
				260	}
				261
				262	// Reduces the vocabulary by removing infrequent tokens
				263	void ReduceVocab() {
				264	int a, b = 0;
				265	unsigned int hash;
				266	for (a = 0; a < vocab_size; a++)
				267	if (vocab[a].cn > min_reduce) {
				268	vocab[b].cn = vocab[a].cn;
				269	vocab[b].word = vocab[a].word;
				270	b++;
				271	} else
				272	free(vocab[a].word);
				273	vocab_size = b;
				274	for (a = 0; a < vocab_hash_size; a++)
				275	vocab_hash[a] = -1;
				276	for (a = 0; a < vocab_size; a++) {
				277	// Hash will be re-computed, as it is not actual
				278	hash = GetWordHash(vocab[a].word);
				279	while (vocab_hash[hash] != -1)
				280	hash = (hash + 1) % vocab_hash_size;
				281	vocab_hash[hash] = a;
				282	}
				283	fflush(stdout);
				284	min_reduce++;
				285	}
				286
				287	// Create binary Huffman tree using the word counts
				288	// Frequent words will have short uniqe binary codes
				289	void CreateBinaryTree() {
				290	long long a, b, i, min1i, min2i, pos1, pos2, point[MAX_CODE_LENGTH];
				291	char code[MAX_CODE_LENGTH];
				292	long long count = (long long ) calloc(vocab_size * 2 + 1,
				293	sizeof(long long));
				294	long long binary = (long long ) calloc(vocab_size * 2 + 1,
				295	sizeof(long long));
				296	long long parent_node = (long long ) calloc(vocab_size * 2 + 1,
				297	sizeof(long long));
				298	// todo: this needs to operate on a sorted copy of vocab[a].cn if we use local counts
				299	for (a = 0; a < vocab_size; a++)
				300	count[a] = vocab[a].cn;
				301	for (a = vocab_size; a < vocab_size * 2; a++)
				302	count[a] = 1e15;
				303	pos1 = vocab_size - 1;
				304	pos2 = vocab_size;
				305	// Following algorithm constructs the Huffman tree by adding one node at a time
				306	for (a = 0; a < vocab_size - 1; a++) {
				307	// First, find two smallest nodes 'min1, min2'
				308	if (pos1 >= 0) {
				309	if (count[pos1] < count[pos2]) {
				310	min1i = pos1;
				311	pos1--;
				312	} else {
				313	min1i = pos2;
				314	pos2++;
				315	}
				316	} else {
				317	min1i = pos2;
				318	pos2++;
				319	}
				320	if (pos1 >= 0) {
				321	if (count[pos1] < count[pos2]) {
				322	min2i = pos1;
				323	pos1--;
				324	} else {
				325	min2i = pos2;
				326	pos2++;
				327	}
				328	} else {
				329	min2i = pos2;
				330	pos2++;
				331	}
				332	count[vocab_size + a] = count[min1i] + count[min2i];
				333	parent_node[min1i] = vocab_size + a;
				334	parent_node[min2i] = vocab_size + a;
				335	binary[min2i] = 1;
				336	}
				337	// Now assign binary code to each vocabulary word
				338	for (a = 0; a < vocab_size; a++) {
				339	b = a;
				340	i = 0;
				341	while (1) {
				342	code[i] = binary[b];
				343	point[i] = b;
				344	i++;
				345	b = parent_node[b];
				346	if (b == vocab_size * 2 - 2)
				347	break;
				348	}
				349	vocab[a].codelen = i;
				350	vocab[a].point[0] = vocab_size - 2;
				351	for (b = 0; b < i; b++) {
				352	vocab[a].code[i - b - 1] = code[b];
				353	vocab[a].point[i - b] = point[b] - vocab_size;
				354	}
				355	}
				356	free(count);
				357	free(binary);
				358	free(parent_node);
				359	}
				360
				361	void LearnVocabFromTrainFile() {
				362	char word[MAX_STRING];
				363	FILE *fin;
				364	long long a, i;
				365	for (a = 0; a < vocab_hash_size; a++)
				366	vocab_hash[a] = -1;
				367	fin = fopen(train_file, "rb");
				368	if (fin == NULL) {
				369	printf("ERROR: training data file not found!\n");
				370	exit(1);
				371	}
				372	vocab_size = 0;
				373	AddWordToVocab((char *) "</s>");
				374	while (1) {
				375	ReadWord(word, fin);
				376	if (feof(fin))
				377	break;
				378	train_words++;
				379	if ((debug_mode > 1) && (train_words % 100000 == 0)) {
				380	printf("%lldK%c", train_words / 1000, 13);
				381	fflush(stdout);
				382	}
				383	i = SearchVocab(word);
				384	if (i == -1) {
				385	a = AddWordToVocab(word);
				386	vocab[a].cn = 1;
				387	} else
				388	vocab[i].cn++;
				389	if (vocab_size > vocab_hash_size * 0.7)
				390	ReduceVocab();
				391	}
				392	SortVocab();
				393	if (debug_mode > 0) {
				394	printf("Vocab size: %lld\n", vocab_size);
				395	printf("Words in train file: %lld\n", train_words);
				396	}
				397	file_size = ftell(fin);
				398	fclose(fin);
				399	}
				400
				401	void SaveVocab() {
				402	long long i;
				403	FILE *fo = fopen(save_vocab_file, "wb");
				404	for (i = 0; i < vocab_size; i++)
				405	fprintf(fo, "%s %lld\n", vocab[i].word, vocab[i].cn);
				406	fclose(fo);
				407	}
				408
				409	void ReadVocab() {
				410	long long a, i = 0;
				411	char c;
				412	char word[MAX_STRING];
				413	FILE *fin = fopen(read_vocab_file, "rb");
				414	if (fin == NULL) {
				415	printf("Vocabulary file not found\n");
				416	exit(1);
				417	}
				418	for (a = 0; a < vocab_hash_size; a++)
				419	vocab_hash[a] = -1;
				420	vocab_size = 0;
				421	while (1) {
				422	ReadWord(word, fin);
				423	if (feof(fin))
				424	break;
				425	a = AddWordToVocab(word);
				426	fscanf(fin, "%lld%c", &vocab[a].cn, &c);
				427	i++;
				428	}
				429	fclose(fin);
				430	SortVocab();
				431
				432	if (tc > 0) {
				433	// recalculate counts for the current corpus
				434	// adapted from LearnVocabFromTrainFile()
				435	// note that we don't sort or rehash the vocabulary again, we only adapt vocab[.].cn.
				436	fin = fopen(train_file, "rb");
				437	if (fin == NULL) {
				438	printf("ERROR: training data file not found!\n");
				439	exit(1);
				440	}
				441	// reset vocabulary counts
				442	for (a = 0; a < vocab_size; a++)
				443	vocab[a].cn = 0;
				444	train_words = 0;
				445	while (1) {
				446	ReadWord(word, fin);
				447	if (feof(fin))
				448	break;
				449	if ((debug_mode > 1) && (train_words % 100000 == 0)) {
				450	printf("%lldK%c", train_words / 1000, 13);
				451	fflush(stdout);
				452	}
				453	i = SearchVocab(word);
				454	// the word must be in the vocabulary but we don't issue a warning,
				455	// because it may have been cut off due to minfreq.
				456	if (i >= 0) {
				457	vocab[i].cn++;
				458	train_words++;
				459	}
				460	}
				461	// we cannot have 0 counts.
				462	for (a = 0; a < vocab_size; a++) {
				463	if(vocab[a].cn == 0) {
				464	vocab[a].cn = 1;
				465	train_words++;
				466	}
				467	}
				468	if (debug_mode > 0) {
				469	printf("Vocab size: %lld\n", vocab_size);
				470	printf("Words in current train file: %'lld\n", train_words);
				471	}
				472	fseek(fin, 0, SEEK_END);
				473	file_size = ftell(fin);
				474	fclose(fin);
				475	}
				476	train_words = file_size / avgWordLength;
				477	if(debug_mode > 0)
				478	printf("Estimated words in train file: %'lld\n", train_words);
				479	}
				480
				481	void InitClassUnigramTable() {
				482	// TODO: this probably needs to be adapted for dealing with subcorpus adjusted vocabulary counts
				483	long long a, c;
				484	printf("loading class unigrams \n");
				485	FILE *fin = fopen(negative_classes_file, "rb");
				486	if (fin == NULL) {
				487	printf("ERROR: class file not found!\n");
				488	exit(1);
				489	}
				490	word_to_group = (int ) malloc(vocab_size sizeof(int));
				491	for (a = 0; a < vocab_size; a++)
				492	word_to_group[a] = -1;
				493	char class[MAX_STRING];
				494	char prev_class[MAX_STRING];
				495	prev_class[0] = 0;
				496	char word[MAX_STRING];
				497	class_number = -1;
				498	while (1) {
				499	if (feof(fin))
				500	break;
				501	ReadWord(class, fin);
				502	ReadWord(word, fin);
				503	int word_index = SearchVocab(word);
				504	if (word_index != -1) {
				505	if (strcmp(class, prev_class) != 0) {
				506	class_number++;
				507	strcpy(prev_class, class);
				508	}
				509	word_to_group[word_index] = class_number;
				510	}
				511	ReadWord(word, fin);
				512	}
				513	class_number++;
				514	fclose(fin);
				515
				516	group_to_table = (int ) malloc(table_size class_number * sizeof(int));
				517	long long train_words_pow = 0;
				518	real d1, power = 0.75;
				519
				520	for (c = 0; c < class_number; c++) {
				521	long long offset = c * table_size;
				522	train_words_pow = 0;
				523	for (a = 0; a < vocab_size; a++)
				524	if (word_to_group[a] == c)
				525	train_words_pow += pow(vocab[a].cn, power);
				526	int i = 0;
				527	while (word_to_group[i] != c && i < vocab_size)
				528	i++;
				529	d1 = pow(vocab[i].cn, power) / (real) train_words_pow;
				530	for (a = 0; a < table_size; a++) {
				531	//printf("index %lld , word %d\n", a, i);
				532	group_to_table[offset + a] = i;
				533	if (a / (real) table_size > d1) {
				534	i++;
				535	while (word_to_group[i] != c && i < vocab_size)
				536	i++;
				537	d1 += pow(vocab[i].cn, power) / (real) train_words_pow;
				538	}
				539	if (i >= vocab_size)
				540	while (word_to_group[i] != c && i >= 0)
				541	i--;
				542	}
				543	}
				544	}
				545
				546	void SaveArgs(int argc, char **argv) {
				547	unsigned int i;
				548	char args_file[MAX_STRING];
				549	strcpy(args_file, output_file);
				550	strcat(args_file, ".args");
				551	FILE *fargs = fopen(args_file, "w");
				552	if (fargs == NULL) {
				553	printf("Cannot save args to %s.\n", args_file);
				554	return;
				555	}
				556
				557	for(i=1; i<argc; i++)
				558	fprintf(fargs, "%s ", argv[i]);
				559
				560	fprintf(fargs, "\n");
				561	fclose(fargs);
				562
				563	return;
				564	}
				565
				566	void SaveNet() {
				567	if (type == 4 \|\| negative <= 0) {
				568	fprintf(stderr,
				569	"save-net only supported for type 0,1,2,3 with negative sampling\n");
				570	return;
				571	}
				572
				573	FILE *fnet = fopen(save_net_file, "wb");
				574	if (fnet == NULL) {
				575	printf("Net parameter file not found\n");
				576	exit(1);
				577	}
				578	fwrite(syn0, sizeof(real), vocab_size * layer1_size, fnet);
				579	if (type == 0 \|\| type == 1) {
				580	fwrite(syn1neg, sizeof(real), vocab_size * layer1_size, fnet);
				581	}
				582	if (type == 2 \|\| type == 3) {
				583	fwrite(syn1neg_window, sizeof(real), vocab_size * window_layer_size, fnet);
				584	}
				585	fclose(fnet);
				586	}
				587
				588	void InitNet() {
				589	long long a, b;
				590	unsigned long long next_random = 1;
				591	long long read;
				592
				593	window_layer_size = layer1_size * window * 2;
				594	a = posix_memalign((void **) &syn0, 128,
				595	(long long) vocab_size * layer1_size * sizeof(real));
				596	if (syn0 == NULL) {
				597	printf("Memory allocation failed\n");
				598	exit(1);
				599	}
				600
				601	if (hs) {
				602	a = posix_memalign((void **) &syn1, 128,
				603	(long long) vocab_size * layer1_size * sizeof(real));
				604	if (syn1 == NULL) {
				605	printf("Memory allocation failed\n");
				606	exit(1);
				607	}
				608	a = posix_memalign((void **) &syn1_window, 128,
				609	(long long) vocab_size * window_layer_size * sizeof(real));
				610	if (syn1_window == NULL) {
				611	printf("Memory allocation failed\n");
				612	exit(1);
				613	}
				614	a = posix_memalign((void **) &syn_hidden_word, 128,
				615	(long long) vocab_size * window_hidden_size * sizeof(real));
				616	if (syn_hidden_word == NULL) {
				617	printf("Memory allocation failed\n");
				618	exit(1);
				619	}
				620
				621	for (a = 0; a < vocab_size; a++)
				622	for (b = 0; b < layer1_size; b++)
				623	syn1[a * layer1_size + b] = 0;
				624	for (a = 0; a < vocab_size; a++)
				625	for (b = 0; b < window_layer_size; b++)
				626	syn1_window[a * window_layer_size + b] = 0;
				627	for (a = 0; a < vocab_size; a++)
				628	for (b = 0; b < window_hidden_size; b++)
				629	syn_hidden_word[a * window_hidden_size + b] = 0;
				630	}
				631	if (negative > 0) {
				632	if (type == 0 \|\| type == 1) {
				633	a = posix_memalign((void **) &syn1neg, 128,
				634	(long long) vocab_size * layer1_size * sizeof(real));
				635	if (syn1neg == NULL) {
				636	printf("Memory allocation failed\n");
				637	exit(1);
				638	}
				639	for (a = 0; a < vocab_size; a++)
				640	for (b = 0; b < layer1_size; b++)
				641	syn1neg[a * layer1_size + b] = 0;
				642	} else if (type == 2 \|\| type == 3) {
				643	a = posix_memalign((void **) &syn1neg_window, 128,
				644	(long long) vocab_size * window_layer_size * sizeof(real));
				645	if (syn1neg_window == NULL) {
				646	printf("Memory allocation failed\n");
				647	exit(1);
				648	}
				649	for (a = 0; a < vocab_size; a++)
				650	for (b = 0; b < window_layer_size; b++)
				651	syn1neg_window[a * window_layer_size + b] = 0;
				652	} else if (type == 4) {
				653	a = posix_memalign((void **) &syn_hidden_word_neg, 128,
				654	(long long) vocab_size * window_hidden_size * sizeof(real));
				655	if (syn_hidden_word_neg == NULL) {
				656	printf("Memory allocation failed\n");
				657	exit(1);
				658	}
				659	for (a = 0; a < vocab_size; a++)
				660	for (b = 0; b < window_hidden_size; b++)
				661	syn_hidden_word_neg[a * window_hidden_size + b] = 0;
				662	}
				663	}
				664	if (nce > 0) {
				665	a = posix_memalign((void **) &syn1nce, 128,
				666	(long long) vocab_size * layer1_size * sizeof(real));
				667	if (syn1nce == NULL) {
				668	printf("Memory allocation failed\n");
				669	exit(1);
				670	}
				671	a = posix_memalign((void **) &syn1nce_window, 128,
				672	(long long) vocab_size * window_layer_size * sizeof(real));
				673	if (syn1nce_window == NULL) {
				674	printf("Memory allocation failed\n");
				675	exit(1);
				676	}
				677	a = posix_memalign((void **) &syn_hidden_word_nce, 128,
				678	(long long) vocab_size * window_hidden_size * sizeof(real));
				679	if (syn_hidden_word_nce == NULL) {
				680	printf("Memory allocation failed\n");
				681	exit(1);
				682	}
				683
				684	for (a = 0; a < vocab_size; a++)
				685	for (b = 0; b < layer1_size; b++)
				686	syn1nce[a * layer1_size + b] = 0;
				687	for (a = 0; a < vocab_size; a++)
				688	for (b = 0; b < window_layer_size; b++)
				689	syn1nce_window[a * window_layer_size + b] = 0;
				690	for (a = 0; a < vocab_size; a++)
				691	for (b = 0; b < window_hidden_size; b++)
				692	syn_hidden_word_nce[a * window_hidden_size + b] = 0;
				693	}
				694
				695	if (type == 4) {
				696	a = posix_memalign((void **) &syn_window_hidden, 128,
				697	window_hidden_size * window_layer_size * sizeof(real));
				698	if (syn_window_hidden == NULL) {
				699	printf("Memory allocation failed\n");
				700	exit(1);
				701	}
				702	for (a = 0; a < window_hidden_size * window_layer_size; a++) {
				703	next_random = next_random * (unsigned long long) 25214903917 + 11;
				704	syn_window_hidden[a] = (((next_random & 0xFFFF) / (real) 65536)
				705	- 0.5) / (window_hidden_size * window_layer_size);
				706	}
				707	}
				708
				709	if (read_net_file[0] == 0) {
				710	for (a = 0; a < vocab_size; a++)
				711	for (b = 0; b < layer1_size; b++) {
				712	next_random = next_random * (unsigned long long) 25214903917
				713	+ 11;
				714	syn0[a * layer1_size + b] = (((next_random & 0xFFFF)
				715	/ (real) 65536) - 0.5) / layer1_size;
				716	}
				717	} else if ((type == 0 \|\| type == 1) && negative > 0) {
				718	FILE *fnet = fopen(read_net_file, "rb");
				719	if (fnet == NULL) {
				720	printf("Net parameter file not found\n");
				721	exit(1);
				722	}
				723	printf("vocab-size: %lld, layer1_size: %lld\n",
				724	vocab_size, layer1_size);
				725	read = fread(syn0, sizeof(real), vocab_size * layer1_size, fnet);
				726	if (read != vocab_size * layer1_size) {
				727	fprintf(stderr, "read-net failed %lld\n", read);
				728	exit(-1);
				729	}
				730	read = fread(syn1neg, sizeof(real),
				731	vocab_size * layer1_size, fnet);
				732	if (read != (long long) vocab_size * layer1_size) {
				733	fprintf(stderr, "read-net failed, read %lld, expected: %lld\n",
				734	read,
				735	(long long) sizeof(real) * vocab_size * layer1_size);
				736	exit(-1);
				737	}
				738	fgetc(fnet);
				739	if (!feof(fnet)) {
				740	fprintf(stderr,
				741	"Remaining bytes in net-file after read-net. File position: %ld\n",
				742	ftell(fnet));
				743	exit(-1);
				744	}
				745	fclose(fnet);
				746	} else if ((type == 2 \|\| type == 3) && negative > 0) {
				747	FILE *fnet = fopen(read_net_file, "rb");
				748	if (fnet == NULL) {
				749	printf("Net parameter file not found\n");
				750	exit(1);
				751	}
				752	printf("vocab-size: %lld, layer1_size: %lld, window_layer_size %d\n",
				753	vocab_size, layer1_size, window_layer_size);
				754	read = fread(syn0, sizeof(real), vocab_size * layer1_size, fnet);
				755	if (read != vocab_size * layer1_size) {
				756	fprintf(stderr, "read-net failed %lld\n", read);
				757	exit(-1);
				758	}
				759	read = fread(syn1neg_window, sizeof(real),
				760	vocab_size * window_layer_size, fnet);
				761	if (read != (long long) vocab_size * window_layer_size) {
				762	fprintf(stderr, "read-net failed, read %lld, expected: %lld\n",
				763	read,
				764	(long long) sizeof(real) * vocab_size * window_layer_size);
				765	exit(-1);
				766	}
				767	fgetc(fnet);
				768	if (!feof(fnet)) {
				769	fprintf(stderr,
				770	"Remaining bytes in net-file after read-net. File position: %ld\n",
				771	ftell(fnet));
				772	exit(-1);
				773	}
				774	fclose(fnet);
				775	} else {
				776	fprintf(stderr,
				777	"read-net only supported for type 3 with negative sampling\n");
				778	exit(-1);
				779	}
				780
				781	CreateBinaryTree();
				782	}
				783
				784	char currentDateTime(char buf, real offset) {
				785	time_t t;
				786	time(&t);
				787	t += (long) offset;
				788	struct tm tstruct;
				789	tstruct = *localtime(&t);
				790	strftime(buf, 80, "%c", &tstruct);
				791	return buf;
				792	}
				793
				794	void MonitorThread(void id) {
				795	char *timebuf = malloc(80);;
				796	int i, n=num_threads;
				797	long long sum;
				798	sleep(1);
				799	while(n > 0) {
				800	sleep(1);
				801	sum = n = 0;
				802	for(i=0; i < num_threads; i++) {
				803	if(threadPos[i] >= 0) {
				804	sum += (iter - threadIters[i]) * file_size / num_threads + threadPos[i] - (file_size / num_threads) * i;
				805	n++;
				806	} else {
				807	sum += iter * file_size / num_threads;
				808	}
				809	}
				810	if(n == 0)
				811	break;
				812	real finished_portion = (real) sum / (float) (file_size * iter);
				813	long long now = time(NULL);
				814	long long elapsed = (now - start);
				815	long long ttg = ((1.0 / finished_portion) * (real) elapsed - elapsed);
				816
				817	printf("\rAlpha: %.3f Done: %.2f%% with %.2fKB/s TE: %llds TTG: %llds ETA: %s\033[K",
				818	alpha,
				819	finished_portion * 100,
				820	(float) sum / elapsed / 1000,
				821	elapsed,
				822	ttg,
				823	currentDateTime(timebuf, ttg)
				824	);
				825	fflush(stdout);
				826	}
				827	pthread_exit(NULL);
				828	}
				829
				830	void TrainModelThread(void id) {
				831	long long a, b, d, cw, word, last_word, sentence_length = 0,
				832	sentence_position = 0;
				833	long long word_count = 0, last_word_count = 0, sen[MAX_SENTENCE_LENGTH + 1];
				834	long long l1, l2, c, target, label, local_iter = iter;
				835	unsigned long long next_random = (long long) id;
				836	real f, g;
				837	int input_len_1 = layer1_size;
				838	int window_offset = -1;
				839	if (type == 2 \|\| type == 4) {
				840	input_len_1 = window_layer_size;
				841	}
				842	real neu1 = (real ) calloc(input_len_1, sizeof(real));
				843	real neu1e = (real ) calloc(input_len_1, sizeof(real));
				844	threadIters[(long) id] = iter;
				845
				846	int input_len_2 = 0;
				847	if (type == 4) {
				848	input_len_2 = window_hidden_size;
				849	}
				850	real neu2 = (real ) calloc(input_len_2, sizeof(real));
				851	real neu2e = (real ) calloc(input_len_2, sizeof(real));
				852
				853	FILE *fi = fopen(train_file, "rb");
				854	long long start_pos = file_size / (long long) num_threads * (long long) id;
				855	long long end_pos = file_size / (long long) num_threads * (long long) (id + 1) -1;
				856	long long current_pos = start_pos;
				857	long long last_pos = start_pos;;
				858	fseek(fi, start_pos, SEEK_SET);
				859	while (1) {
				860	if (word_count - last_word_count > 10000) {
				861	// if ((current_pos - last_pos > 100000)) {
				862	// PF: changed back, because it seems that alpha is not correctly adjusted otherwise.
				863	word_count_actual += word_count - last_word_count;
				864	last_pos = current_pos;
				865	last_word_count = word_count;
				866	alpha = starting_alpha
				867	* (1 - word_count_actual / (real) (iter * train_words + 1));
				868	if (alpha < starting_alpha * 0.0001)
				869	alpha = starting_alpha * 0.0001;
				870	}
				871	if (sentence_length == 0) {
				872	while (1) {
				873	word = ReadWordIndex(fi);
				874	if (feof(fi))
				875	break;
				876	if (word == -1)
				877	continue;
				878	word_count++;
				879	if (word == 0)
				880	break;
				881	// The subsampling randomly discards frequent words while keeping the ranking same
				882	if (sample > 0) {
				883	real ran = (sqrt(vocab[word].cn / (sample * train_words))
				884	+ 1) * (sample * train_words) / vocab[word].cn;
				885	next_random = next_random * (unsigned long long) 25214903917
				886	+ 11;
				887	if (ran < (next_random & 0xFFFF) / (real) 65536) {
				888	if (type == 3) // in structured skipgrams
				889	word = -2; // keep the window position correct
				890	else
				891	continue;
				892	}
				893	}
				894	sen[sentence_length] = word;
				895	sentence_length++;
				896	if (sentence_length >= MAX_SENTENCE_LENGTH)
				897	break;
				898	}
				899	sentence_position = 0;
				900	}
				901	current_pos = threadPos[(long) id] = ftell(fi);
				902	if (feof(fi) \|\| current_pos >= end_pos ) {
				903	word_count_actual += word_count - last_word_count;
				904	threadIters[(long) id]--;
				905	local_iter--;
				906	if (local_iter == 0)
				907	break;
				908	if (magic_stop_file[0] && access(magic_stop_file, F_OK ) != -1) {
				909	printf("Magic stop file %s found. Stopping traing ...\n", magic_stop_file);
				910	break;
				911	}
				912	word_count = 0;
				913	last_word_count = 0;
				914	sentence_length = 0;
				915	fseek(fi, file_size / (long long) num_threads * (long long) id,
				916	SEEK_SET);
				917	continue;
				918	}
				919	word = sen[sentence_position];
				920	while (word == -2 && sentence_position<sentence_length)
				921	word = sen[++sentence_position];
				922	if (sentence_position>=sentence_length) {
				923	sentence_length=0;
				924	continue;
				925	}
				926	if (word < 0)
				927	continue;
				928	for (c = 0; c < input_len_1; c++)
				929	neu1[c] = 0;
				930	for (c = 0; c < input_len_1; c++)
				931	neu1e[c] = 0;
				932	for (c = 0; c < input_len_2; c++)
				933	neu2[c] = 0;
				934	for (c = 0; c < input_len_2; c++)
				935	neu2e[c] = 0;
				936	next_random = next_random * (unsigned long long) 25214903917 + 11;
				937	b = next_random % window;
				938	if (type == 0) { //train the cbow architecture
				939	// in -> hidden
				940	cw = 0;
				941	for (a = b; a < window * 2 + 1 - b; a++)
				942	if (a != window) {
				943	c = sentence_position - window + a;
				944	if (c < 0)
				945	continue;
				946	if (c >= sentence_length)
				947	continue;
				948	last_word = sen[c];
				949	if (last_word == -1)
				950	continue;
				951	for (c = 0; c < layer1_size; c++)
				952	neu1[c] += syn0[c + last_word * layer1_size];
				953	cw++;
				954	}
				955	if (cw) {
				956	for (c = 0; c < layer1_size; c++)
				957	neu1[c] /= cw;
				958	if (hs)
				959	for (d = 0; d < vocab[word].codelen; d++) {
				960	f = 0;
				961	l2 = vocab[word].point[d] * layer1_size;
				962	// Propagate hidden -> output
				963	for (c = 0; c < layer1_size; c++)
				964	f += neu1[c] * syn1[c + l2];
				965	if (f <= -MAX_EXP)
				966	continue;
				967	else if (f >= MAX_EXP)
				968	continue;
				969	else
				970	f = expTable[(int) ((f + MAX_EXP)
				971	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				972	// 'g' is the gradient multiplied by the learning rate
				973	g = (1 - vocab[word].code[d] - f) * alpha;
				974	// Propagate errors output -> hidden
				975	for (c = 0; c < layer1_size; c++)
				976	neu1e[c] += g * syn1[c + l2];
				977	// Learn weights hidden -> output
				978	for (c = 0; c < layer1_size; c++)
				979	syn1[c + l2] += g * neu1[c];
				980	if (cap == 1)
				981	for (c = 0; c < layer1_size; c++)
				982	capParam(syn1, c + l2);
				983	}
				984	// NEGATIVE SAMPLING
				985	if (negative > 0)
				986	for (d = 0; d < negative + 1; d++) {
				987	if (d == 0) {
				988	target = word;
				989	label = 1;
				990	} else {
				991	next_random = next_random
				992	* (unsigned long long) 25214903917 + 11;
				993	if (word_to_group != NULL
				994	&& word_to_group[word] != -1) {
				995	target = word;
				996	while (target == word) {
				997	target = group_to_table[word_to_group[word]
				998	* table_size
				999	+ (next_random >> 16) % table_size];
				1000	next_random = next_random
				1001	* (unsigned long long) 25214903917
				1002	+ 11;
				1003	}
				1004	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1005	} else {
				1006	target =
				1007	table[(next_random >> 16) % table_size];
				1008	}
				1009	if (target == 0)
				1010	target = next_random % (vocab_size - 1) + 1;
				1011	if (target == word)
				1012	continue;
				1013	label = 0;
				1014	}
				1015	l2 = target * layer1_size;
				1016	f = 0;
				1017	for (c = 0; c < layer1_size; c++)
				1018	f += neu1[c] * syn1neg[c + l2];
				1019	if (f > MAX_EXP)
				1020	g = (label - 1) * alpha;
				1021	else if (f < -MAX_EXP)
				1022	g = (label - 0) * alpha;
				1023	else
				1024	g = (label
				1025	- expTable[(int) ((f + MAX_EXP)
				1026	* (EXP_TABLE_SIZE / MAX_EXP / 2))])
				1027	* alpha;
				1028	for (c = 0; c < layer1_size; c++)
				1029	neu1e[c] += g * syn1neg[c + l2];
				1030	for (c = 0; c < layer1_size; c++)
				1031	syn1neg[c + l2] += g * neu1[c];
				1032	if (cap == 1)
				1033	for (c = 0; c < layer1_size; c++)
				1034	capParam(syn1neg, c + l2);
				1035	}
				1036	// Noise Contrastive Estimation
				1037	if (nce > 0)
				1038	for (d = 0; d < nce + 1; d++) {
				1039	if (d == 0) {
				1040	target = word;
				1041	label = 1;
				1042	} else {
				1043	next_random = next_random
				1044	* (unsigned long long) 25214903917 + 11;
				1045	if (word_to_group != NULL
				1046	&& word_to_group[word] != -1) {
				1047	target = word;
				1048	while (target == word) {
				1049	target = group_to_table[word_to_group[word]
				1050	* table_size
				1051	+ (next_random >> 16) % table_size];
				1052	next_random = next_random
				1053	* (unsigned long long) 25214903917
				1054	+ 11;
				1055	}
				1056	} else {
				1057	target =
				1058	table[(next_random >> 16) % table_size];
				1059	}
				1060	if (target == 0)
				1061	target = next_random % (vocab_size - 1) + 1;
				1062	if (target == word)
				1063	continue;
				1064	label = 0;
				1065	}
				1066	l2 = target * layer1_size;
				1067	f = 0;
				1068
				1069	for (c = 0; c < layer1_size; c++)
				1070	f += neu1[c] * syn1nce[c + l2];
				1071	if (f > MAX_EXP)
				1072	g = (label - 1) * alpha;
				1073	else if (f < -MAX_EXP)
				1074	g = (label - 0) * alpha;
				1075	else {
				1076	f = exp(f);
				1077	g =
				1078	(label
				1079	- f
				1080	/ (noise_distribution[target]
				1081	* nce + f)) * alpha;
				1082	}
				1083	for (c = 0; c < layer1_size; c++)
				1084	neu1e[c] += g * syn1nce[c + l2];
				1085	for (c = 0; c < layer1_size; c++)
				1086	syn1nce[c + l2] += g * neu1[c];
				1087	if (cap == 1)
				1088	for (c = 0; c < layer1_size; c++)
				1089	capParam(syn1nce, c + l2);
				1090	}
				1091	// hidden -> in
				1092	for (a = b; a < window * 2 + 1 - b; a++)
				1093	if (a != window) {
				1094	c = sentence_position - window + a;
				1095	if (c < 0)
				1096	continue;
				1097	if (c >= sentence_length)
				1098	continue;
				1099	last_word = sen[c];
				1100	if (last_word == -1)
				1101	continue;
				1102	for (c = 0; c < layer1_size; c++)
				1103	syn0[c + last_word * layer1_size] += neu1e[c];
				1104	}
				1105	}
				1106	} else if (type == 1) { //train skip-gram
				1107	for (a = b; a < window * 2 + 1 - b; a++)
				1108	if (a != window) {
				1109	c = sentence_position - window + a;
				1110	if (c < 0)
				1111	continue;
				1112	if (c >= sentence_length)
				1113	continue;
				1114	last_word = sen[c];
				1115	if (last_word == -1)
				1116	continue;
				1117	l1 = last_word * layer1_size;
				1118	for (c = 0; c < layer1_size; c++)
				1119	neu1e[c] = 0;
				1120	// HIERARCHICAL SOFTMAX
				1121	if (hs)
				1122	for (d = 0; d < vocab[word].codelen; d++) {
				1123	f = 0;
				1124	l2 = vocab[word].point[d] * layer1_size;
				1125	// Propagate hidden -> output
				1126	for (c = 0; c < layer1_size; c++)
				1127	f += syn0[c + l1] * syn1[c + l2];
				1128	if (f <= -MAX_EXP)
				1129	continue;
				1130	else if (f >= MAX_EXP)
				1131	continue;
				1132	else
				1133	f = expTable[(int) ((f + MAX_EXP)
				1134	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1135	// 'g' is the gradient multiplied by the learning rate
				1136	g = (1 - vocab[word].code[d] - f) * alpha;
				1137	// Propagate errors output -> hidden
				1138	for (c = 0; c < layer1_size; c++)
				1139	neu1e[c] += g * syn1[c + l2];
				1140	// Learn weights hidden -> output
				1141	for (c = 0; c < layer1_size; c++)
				1142	syn1[c + l2] += g * syn0[c + l1];
				1143	if (cap == 1)
				1144	for (c = 0; c < layer1_size; c++)
				1145	capParam(syn1, c + l2);
				1146	}
				1147	// NEGATIVE SAMPLING
				1148	if (negative > 0)
				1149	for (d = 0; d < negative + 1; d++) {
				1150	if (d == 0) {
				1151	target = word;
				1152	label = 1;
				1153	} else {
				1154	next_random = next_random
				1155	* (unsigned long long) 25214903917 + 11;
				1156	if (word_to_group != NULL
				1157	&& word_to_group[word] != -1) {
				1158	target = word;
				1159	while (target == word) {
				1160	target =
				1161	group_to_table[word_to_group[word]
				1162	* table_size
				1163	+ (next_random >> 16)
				1164	% table_size];
				1165	next_random =
				1166	next_random
				1167	* (unsigned long long) 25214903917
				1168	+ 11;
				1169	}
				1170	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1171	} else {
				1172	target = table[(next_random >> 16)
				1173	% table_size];
				1174	}
				1175	if (target == 0)
				1176	target = next_random % (vocab_size - 1) + 1;
				1177	if (target == word)
				1178	continue;
				1179	label = 0;
				1180	}
				1181	l2 = target * layer1_size;
				1182	f = 0;
				1183	for (c = 0; c < layer1_size; c++)
				1184	f += syn0[c + l1] * syn1neg[c + l2];
				1185	if (f > MAX_EXP)
				1186	g = (label - 1) * alpha;
				1187	else if (f < -MAX_EXP)
				1188	g = (label - 0) * alpha;
				1189	else
				1190	g =
				1191	(label
				1192	- expTable[(int) ((f + MAX_EXP)
				1193	* (EXP_TABLE_SIZE
				1194	/ MAX_EXP / 2))])
				1195	* alpha;
				1196	for (c = 0; c < layer1_size; c++)
				1197	neu1e[c] += g * syn1neg[c + l2];
				1198	for (c = 0; c < layer1_size; c++)
				1199	syn1neg[c + l2] += g * syn0[c + l1];
				1200	if (cap == 1)
				1201	for (c = 0; c < layer1_size; c++)
				1202	capParam(syn1neg, c + l2);
				1203	}
				1204	//Noise Contrastive Estimation
				1205	if (nce > 0)
				1206	for (d = 0; d < nce + 1; d++) {
				1207	if (d == 0) {
				1208	target = word;
				1209	label = 1;
				1210	} else {
				1211	next_random = next_random
				1212	* (unsigned long long) 25214903917 + 11;
				1213	if (word_to_group != NULL
				1214	&& word_to_group[word] != -1) {
				1215	target = word;
				1216	while (target == word) {
				1217	target =
				1218	group_to_table[word_to_group[word]
				1219	* table_size
				1220	+ (next_random >> 16)
				1221	% table_size];
				1222	next_random =
				1223	next_random
				1224	* (unsigned long long) 25214903917
				1225	+ 11;
				1226	}
				1227	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1228	} else {
				1229	target = table[(next_random >> 16)
				1230	% table_size];
				1231	}
				1232	if (target == 0)
				1233	target = next_random % (vocab_size - 1) + 1;
				1234	if (target == word)
				1235	continue;
				1236	label = 0;
				1237	}
				1238	l2 = target * layer1_size;
				1239	f = 0;
				1240	for (c = 0; c < layer1_size; c++)
				1241	f += syn0[c + l1] * syn1nce[c + l2];
				1242	if (f > MAX_EXP)
				1243	g = (label - 1) * alpha;
				1244	else if (f < -MAX_EXP)
				1245	g = (label - 0) * alpha;
				1246	else {
				1247	f = exp(f);
				1248	g = (label
				1249	- f
				1250	/ (noise_distribution[target]
				1251	* nce + f)) * alpha;
				1252	}
				1253	for (c = 0; c < layer1_size; c++)
				1254	neu1e[c] += g * syn1nce[c + l2];
				1255	for (c = 0; c < layer1_size; c++)
				1256	syn1nce[c + l2] += g * syn0[c + l1];
				1257	if (cap == 1)
				1258	for (c = 0; c < layer1_size; c++)
				1259	capParam(syn1nce, c + l2);
				1260	}
				1261	// Learn weights input -> hidden
				1262	for (c = 0; c < layer1_size; c++)
				1263	syn0[c + l1] += neu1e[c];
				1264	}
				1265	} else if (type == 2) { //train the cwindow architecture
				1266	// in -> hidden
				1267	cw = 0;
				1268	for (a = 0; a < window * 2 + 1; a++)
				1269	if (a != window) {
				1270	c = sentence_position - window + a;
				1271	if (c < 0)
				1272	continue;
				1273	if (c >= sentence_length)
				1274	continue;
				1275	last_word = sen[c];
				1276	if (last_word == -1)
				1277	continue;
				1278	window_offset = a * layer1_size;
				1279	if (a > window)
				1280	window_offset -= layer1_size;
				1281	for (c = 0; c < layer1_size; c++)
				1282	neu1[c + window_offset] += syn0[c
				1283	+ last_word * layer1_size];
				1284	cw++;
				1285	}
				1286	if (cw) {
				1287	if (hs)
				1288	for (d = 0; d < vocab[word].codelen; d++) {
				1289	f = 0;
				1290	l2 = vocab[word].point[d] * window_layer_size;
				1291	// Propagate hidden -> output
				1292	for (c = 0; c < window_layer_size; c++)
				1293	f += neu1[c] * syn1_window[c + l2];
				1294	if (f <= -MAX_EXP)
				1295	continue;
				1296	else if (f >= MAX_EXP)
				1297	continue;
				1298	else
				1299	f = expTable[(int) ((f + MAX_EXP)
				1300	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1301	// 'g' is the gradient multiplied by the learning rate
				1302	g = (1 - vocab[word].code[d] - f) * alpha;
				1303	// Propagate errors output -> hidden
				1304	for (c = 0; c < window_layer_size; c++)
				1305	neu1e[c] += g * syn1_window[c + l2];
				1306	// Learn weights hidden -> output
				1307	for (c = 0; c < window_layer_size; c++)
				1308	syn1_window[c + l2] += g * neu1[c];
				1309	if (cap == 1)
				1310	for (c = 0; c < window_layer_size; c++)
				1311	capParam(syn1_window, c + l2);
				1312	}
				1313	// NEGATIVE SAMPLING
				1314	if (negative > 0)
				1315	for (d = 0; d < negative + 1; d++) {
				1316	if (d == 0) {
				1317	target = word;
				1318	label = 1;
				1319	} else {
				1320	next_random = next_random
				1321	* (unsigned long long) 25214903917 + 11;
				1322	if (word_to_group != NULL
				1323	&& word_to_group[word] != -1) {
				1324	target = word;
				1325	while (target == word) {
				1326	target = group_to_table[word_to_group[word]
				1327	* table_size
				1328	+ (next_random >> 16) % table_size];
				1329	next_random = next_random
				1330	* (unsigned long long) 25214903917
				1331	+ 11;
				1332	}
				1333	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1334	} else {
				1335	target =
				1336	table[(next_random >> 16) % table_size];
				1337	}
				1338	if (target == 0)
				1339	target = next_random % (vocab_size - 1) + 1;
				1340	if (target == word)
				1341	continue;
				1342	label = 0;
				1343	}
				1344	l2 = target * window_layer_size;
				1345	f = 0;
				1346	for (c = 0; c < window_layer_size; c++)
				1347	f += neu1[c] * syn1neg_window[c + l2];
				1348	if (f > MAX_EXP)
				1349	g = (label - 1) * alpha;
				1350	else if (f < -MAX_EXP)
				1351	g = (label - 0) * alpha;
				1352	else
				1353	g = (label
				1354	- expTable[(int) ((f + MAX_EXP)
				1355	* (EXP_TABLE_SIZE / MAX_EXP / 2))])
				1356	* alpha;
				1357	for (c = 0; c < window_layer_size; c++)
				1358	neu1e[c] += g * syn1neg_window[c + l2];
				1359	for (c = 0; c < window_layer_size; c++)
				1360	syn1neg_window[c + l2] += g * neu1[c];
				1361	if (cap == 1)
				1362	for (c = 0; c < window_layer_size; c++)
				1363	capParam(syn1neg_window, c + l2);
				1364	}
				1365	// Noise Contrastive Estimation
				1366	if (nce > 0)
				1367	for (d = 0; d < nce + 1; d++) {
				1368	if (d == 0) {
				1369	target = word;
				1370	label = 1;
				1371	} else {
				1372	next_random = next_random
				1373	* (unsigned long long) 25214903917 + 11;
				1374	if (word_to_group != NULL
				1375	&& word_to_group[word] != -1) {
				1376	target = word;
				1377	while (target == word) {
				1378	target = group_to_table[word_to_group[word]
				1379	* table_size
				1380	+ (next_random >> 16) % table_size];
				1381	next_random = next_random
				1382	* (unsigned long long) 25214903917
				1383	+ 11;
				1384	}
				1385	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1386	} else {
				1387	target =
				1388	table[(next_random >> 16) % table_size];
				1389	}
				1390	if (target == 0)
				1391	target = next_random % (vocab_size - 1) + 1;
				1392	if (target == word)
				1393	continue;
				1394	label = 0;
				1395	}
				1396	l2 = target * window_layer_size;
				1397	f = 0;
				1398	for (c = 0; c < window_layer_size; c++)
				1399	f += neu1[c] * syn1nce_window[c + l2];
				1400	if (f > MAX_EXP)
				1401	g = (label - 1) * alpha;
				1402	else if (f < -MAX_EXP)
				1403	g = (label - 0) * alpha;
				1404	else {
				1405	f = exp(f);
				1406	g =
				1407	(label
				1408	- f
				1409	/ (noise_distribution[target]
				1410	* nce + f)) * alpha;
				1411	}
				1412	for (c = 0; c < window_layer_size; c++)
				1413	neu1e[c] += g * syn1nce_window[c + l2];
				1414	for (c = 0; c < window_layer_size; c++)
				1415	syn1nce_window[c + l2] += g * neu1[c];
				1416	if (cap == 1)
				1417	for (c = 0; c < window_layer_size; c++)
				1418	capParam(syn1nce_window, c + l2);
				1419	}
				1420	// hidden -> in
				1421	for (a = 0; a < window * 2 + 1; a++)
				1422	if (a != window) {
				1423	c = sentence_position - window + a;
				1424	if (c < 0)
				1425	continue;
				1426	if (c >= sentence_length)
				1427	continue;
				1428	last_word = sen[c];
				1429	if (last_word == -1)
				1430	continue;
				1431	window_offset = a * layer1_size;
				1432	if (a > window)
				1433	window_offset -= layer1_size;
				1434	for (c = 0; c < layer1_size; c++)
				1435	syn0[c + last_word * layer1_size] += neu1e[c
				1436	+ window_offset];
				1437	}
				1438	}
				1439	} else if (type == 3) { //train structured skip-gram
				1440	for (a = 0; a < window * 2 + 1; a++)
				1441	if (a != window) {
				1442	c = sentence_position - window + a;
				1443	if (c < 0)
				1444	continue;
				1445	if (c >= sentence_length)
				1446	continue;
				1447	last_word = sen[c];
				1448	if (last_word < 0)
				1449	continue;
				1450	l1 = last_word * layer1_size;
				1451	window_offset = a * layer1_size;
				1452	if (a > window)
				1453	window_offset -= layer1_size;
				1454	for (c = 0; c < layer1_size; c++)
				1455	neu1e[c] = 0;
				1456	// HIERARCHICAL SOFTMAX
				1457	if (hs)
				1458	for (d = 0; d < vocab[word].codelen; d++) {
				1459	f = 0;
				1460	l2 = vocab[word].point[d] * window_layer_size;
				1461	// Propagate hidden -> output
				1462	for (c = 0; c < layer1_size; c++)
				1463	f += syn0[c + l1]
				1464	* syn1_window[c + l2 + window_offset];
				1465	if (f <= -MAX_EXP)
				1466	continue;
				1467	else if (f >= MAX_EXP)
				1468	continue;
				1469	else
				1470	f = expTable[(int) ((f + MAX_EXP)
				1471	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1472	// 'g' is the gradient multiplied by the learning rate
				1473	g = (1 - vocab[word].code[d] - f) * alpha;
				1474	// Propagate errors output -> hidden
				1475	for (c = 0; c < layer1_size; c++)
				1476	neu1e[c] += g
				1477	* syn1_window[c + l2 + window_offset];
				1478	// Learn weights hidden -> output
				1479	for (c = 0; c < layer1_size; c++)
				1480	syn1[c + l2 + window_offset] += g
				1481	* syn0[c + l1];
				1482	if (cap == 1)
				1483	for (c = 0; c < layer1_size; c++)
				1484	capParam(syn1, c + l2 + window_offset);
				1485	}
				1486	// NEGATIVE SAMPLING
				1487	if (negative > 0)
				1488	for (d = 0; d < negative + 1; d++) {
				1489	if (d == 0) {
				1490	target = word;
				1491	label = 1;
				1492	} else {
				1493	next_random = next_random
				1494	* (unsigned long long) 25214903917 + 11;
				1495	if (word_to_group != NULL
				1496	&& word_to_group[word] != -1) {
				1497	target = word;
				1498	while (target == word) {
				1499	target =
				1500	group_to_table[word_to_group[word]
				1501	* table_size
				1502	+ (next_random >> 16)
				1503	% table_size];
				1504	next_random =
				1505	next_random
				1506	* (unsigned long long) 25214903917
				1507	+ 11;
				1508	}
				1509	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1510	} else {
				1511	target = table[(next_random >> 16)
				1512	% table_size];
				1513	}
				1514	if (target == 0)
				1515	target = next_random % (vocab_size - 1) + 1;
				1516	if (target == word)
				1517	continue;
				1518	label = 0;
				1519	}
				1520	l2 = target * window_layer_size;
				1521	f = 0;
				1522	for (c = 0; c < layer1_size; c++)
				1523	f +=
				1524	syn0[c + l1]
				1525	* syn1neg_window[c + l2
				1526	+ window_offset];
				1527	if (f > MAX_EXP)
				1528	g = (label - 1) * alpha;
				1529	else if (f < -MAX_EXP)
				1530	g = (label - 0) * alpha;
				1531	else
				1532	g =
				1533	(label
				1534	- expTable[(int) ((f + MAX_EXP)
				1535	* (EXP_TABLE_SIZE
				1536	/ MAX_EXP / 2))])
				1537	* alpha;
				1538	if(debug_mode > 2 && ((long long) id) == 0) {
				1539	printf("negative sampling %lld for input (word) %s (#%lld), target (last word) %s returned %s (#%lld), ", d, vocab[word].word, word, vocab[last_word].word, vocab[target].word, target);
				1540	printf("label %lld, a %lld, gain %.4f\n", label, a-window, g);
				1541	}
				1542	for (c = 0; c < layer1_size; c++)
				1543	neu1e[c] +=
				1544	g
				1545	* syn1neg_window[c + l2
				1546	+ window_offset];
				1547	for (c = 0; c < layer1_size; c++)
				1548	syn1neg_window[c + l2 + window_offset] += g
				1549	* syn0[c + l1];
				1550	if (cap == 1)
				1551	for (c = 0; c < layer1_size; c++)
				1552	capParam(syn1neg_window,
				1553	c + l2 + window_offset);
				1554	}
				1555	// Noise Constrastive Estimation
				1556	if (nce > 0)
				1557	for (d = 0; d < nce + 1; d++) {
				1558	if (d == 0) {
				1559	target = word;
				1560	label = 1;
				1561	} else {
				1562	next_random = next_random
				1563	* (unsigned long long) 25214903917 + 11;
				1564	if (word_to_group != NULL
				1565	&& word_to_group[word] != -1) {
				1566	target = word;
				1567	while (target == word) {
				1568	target =
				1569	group_to_table[word_to_group[word]
				1570	* table_size
				1571	+ (next_random >> 16)
				1572	% table_size];
				1573	next_random =
				1574	next_random
				1575	* (unsigned long long) 25214903917
				1576	+ 11;
				1577	}
				1578	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1579	} else {
				1580	target = table[(next_random >> 16)
				1581	% table_size];
				1582	}
				1583	if (target == 0)
				1584	target = next_random % (vocab_size - 1) + 1;
				1585	if (target == word)
				1586	continue;
				1587	label = 0;
				1588	}
				1589	l2 = target * window_layer_size;
				1590	f = 0;
				1591	for (c = 0; c < layer1_size; c++)
				1592	f +=
				1593	syn0[c + l1]
				1594	* syn1nce_window[c + l2
				1595	+ window_offset];
				1596	if (f > MAX_EXP)
				1597	g = (label - 1) * alpha;
				1598	else if (f < -MAX_EXP)
				1599	g = (label - 0) * alpha;
				1600	else {
				1601	f = exp(f);
				1602	g = (label
				1603	- f
				1604	/ (noise_distribution[target]
				1605	* nce + f)) * alpha;
				1606	}
				1607	for (c = 0; c < layer1_size; c++)
				1608	neu1e[c] +=
				1609	g
				1610	* syn1nce_window[c + l2
				1611	+ window_offset];
				1612	for (c = 0; c < layer1_size; c++)
				1613	syn1nce_window[c + l2 + window_offset] += g
				1614	* syn0[c + l1];
				1615	if (cap == 1)
				1616	for (c = 0; c < layer1_size; c++)
				1617	capParam(syn1nce_window,
				1618	c + l2 + window_offset);
				1619	}
				1620	// Learn weights input -> hidden
				1621	for (c = 0; c < layer1_size; c++) {
				1622	syn0[c + l1] += neu1e[c];
				1623	if (syn0[c + l1] > 50)
				1624	syn0[c + l1] = 50;
				1625	if (syn0[c + l1] < -50)
				1626	syn0[c + l1] = -50;
				1627	}
				1628	}
				1629	} else if (type == 4) { //training senna
				1630	// in -> hidden
				1631	cw = 0;
				1632	for (a = 0; a < window * 2 + 1; a++)
				1633	if (a != window) {
				1634	c = sentence_position - window + a;
				1635	if (c < 0)
				1636	continue;
				1637	if (c >= sentence_length)
				1638	continue;
				1639	last_word = sen[c];
				1640	if (last_word == -1)
				1641	continue;
				1642	window_offset = a * layer1_size;
				1643	if (a > window)
				1644	window_offset -= layer1_size;
				1645	for (c = 0; c < layer1_size; c++)
				1646	neu1[c + window_offset] += syn0[c
				1647	+ last_word * layer1_size];
				1648	cw++;
				1649	}
				1650	if (cw) {
				1651	for (a = 0; a < window_hidden_size; a++) {
				1652	c = a * window_layer_size;
				1653	for (b = 0; b < window_layer_size; b++) {
				1654	neu2[a] += syn_window_hidden[c + b] * neu1[b];
				1655	}
				1656	}
				1657	if (hs)
				1658	for (d = 0; d < vocab[word].codelen; d++) {
				1659	f = 0;
				1660	l2 = vocab[word].point[d] * window_hidden_size;
				1661	// Propagate hidden -> output
				1662	for (c = 0; c < window_hidden_size; c++)
				1663	f += hardTanh(neu2[c]) * syn_hidden_word[c + l2];
				1664	if (f <= -MAX_EXP)
				1665	continue;
				1666	else if (f >= MAX_EXP)
				1667	continue;
				1668	else
				1669	f = expTable[(int) ((f + MAX_EXP)
				1670	* (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1671	// 'g' is the gradient multiplied by the learning rate
				1672	g = (1 - vocab[word].code[d] - f) * alpha;
				1673	// Propagate errors output -> hidden
				1674	for (c = 0; c < window_hidden_size; c++)
				1675	neu2e[c] += dHardTanh(neu2[c], g) * g
				1676	* syn_hidden_word[c + l2];
				1677	// Learn weights hidden -> output
				1678	for (c = 0; c < window_hidden_size; c++)
				1679	syn_hidden_word[c + l2] += dHardTanh(neu2[c], g) * g
				1680	* neu2[c];
				1681	}
				1682	// NEGATIVE SAMPLING
				1683	if (negative > 0)
				1684	for (d = 0; d < negative + 1; d++) {
				1685	if (d == 0) {
				1686	target = word;
				1687	label = 1;
				1688	} else {
				1689	next_random = next_random
				1690	* (unsigned long long) 25214903917 + 11;
				1691	if (word_to_group != NULL
				1692	&& word_to_group[word] != -1) {
				1693	target = word;
				1694	while (target == word) {
				1695	target = group_to_table[word_to_group[word]
				1696	* table_size
				1697	+ (next_random >> 16) % table_size];
				1698	next_random = next_random
				1699	* (unsigned long long) 25214903917
				1700	+ 11;
				1701	}
				1702	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1703	} else {
				1704	target =
				1705	table[(next_random >> 16) % table_size];
				1706	}
				1707	if (target == 0)
				1708	target = next_random % (vocab_size - 1) + 1;
				1709	if (target == word)
				1710	continue;
				1711	label = 0;
				1712	}
				1713	l2 = target * window_hidden_size;
				1714	f = 0;
				1715	for (c = 0; c < window_hidden_size; c++)
				1716	f += hardTanh(neu2[c])
				1717	* syn_hidden_word_neg[c + l2];
				1718	if (f > MAX_EXP)
				1719	g = (label - 1) * alpha / negative;
				1720	else if (f < -MAX_EXP)
				1721	g = (label - 0) * alpha / negative;
				1722	else
				1723	g = (label
				1724	- expTable[(int) ((f + MAX_EXP)
				1725	* (EXP_TABLE_SIZE / MAX_EXP / 2))])
				1726	* alpha / negative;
				1727	for (c = 0; c < window_hidden_size; c++)
				1728	neu2e[c] += dHardTanh(neu2[c], g) * g
				1729	* syn_hidden_word_neg[c + l2];
				1730	for (c = 0; c < window_hidden_size; c++)
				1731	syn_hidden_word_neg[c + l2] += dHardTanh(neu2[c], g)
				1732	* g * neu2[c];
				1733	}
				1734	for (a = 0; a < window_hidden_size; a++)
				1735	for (b = 0; b < window_layer_size; b++)
				1736	neu1e[b] += neu2e[a]
				1737	* syn_window_hidden[a * window_layer_size + b];
				1738	for (a = 0; a < window_hidden_size; a++)
				1739	for (b = 0; b < window_layer_size; b++)
				1740	syn_window_hidden[a * window_layer_size + b] += neu2e[a]
				1741	* neu1[b];
				1742	// hidden -> in
				1743	for (a = 0; a < window * 2 + 1; a++)
				1744	if (a != window) {
				1745	c = sentence_position - window + a;
				1746	if (c < 0)
				1747	continue;
				1748	if (c >= sentence_length)
				1749	continue;
				1750	last_word = sen[c];
				1751	if (last_word == -1)
				1752	continue;
				1753	window_offset = a * layer1_size;
				1754	if (a > window)
				1755	window_offset -= layer1_size;
				1756	for (c = 0; c < layer1_size; c++)
				1757	syn0[c + last_word * layer1_size] += neu1e[c
				1758	+ window_offset];
				1759	}
				1760	}
				1761	} else if(type == 5) {
				1762	for (a = b; a < window * 2 + 1 - b; a++) if (a != window) {
				1763	c = sentence_position - window + a;
				1764	if (c < 0) continue;
				1765	if (c >= sentence_length) continue;
				1766	last_word = sen[c];
				1767	if (last_word == -1) continue;
				1768	inc_collocator(cdb, word, last_word, a - window);
				1769	// printf("%2d: storing %s %s - %d\n", id, vocab[word].word, vocab[last_word].word, (int) a - window);
				1770	// cw++;
				1771	}
				1772	} else {
				1773	printf("unknown type %i", type);
				1774	exit(0);
				1775	}
				1776	sentence_position++;
				1777	if (sentence_position >= sentence_length) {
				1778	sentence_length = 0;
				1779	continue;
				1780	}
				1781	}
				1782	fclose(fi);
				1783	free(neu1);
				1784	free(neu1e);
				1785	threadPos[(long) id] = -1;
				1786	pthread_exit(NULL);
				1787	}
				1788
				1789	void ShowCollocations() {
				1790	long a, b, c, d, e, window_offset, target, max_target = 0, maxmax_target;
				1791	real f, max_f, maxmax_f;
				1792	real *target_sums, bestf[MAX_CC], worstbest;
				1793	long besti[MAX_CC];
				1794	int N = 10, bestp[MAX_CC];
				1795	a = posix_memalign((void *) &target_sums, 128, vocab_size sizeof(real));
				1796
				1797	for (d = cc; d < vocab_size; d++) {
				1798	for (b = 0; b < vocab_size; b++)
				1799	target_sums[b] = 0;
				1800	for (b = 0; b < N; b++)
				1801	bestf[b] = -1;
				1802	worstbest = -1;
				1803
				1804	maxmax_f = -1;
				1805	maxmax_target = 0;
				1806	for (a = window * 2 + 1; a >=0; a--) {
				1807	if (a != window) {
				1808	max_f = -1;
				1809	window_offset = a * layer1_size;
				1810	if (a > window)
				1811	window_offset -= layer1_size;
				1812	for(target = 0; target < vocab_size; target ++) {
				1813	if(target == d)
				1814	continue;
				1815	f = 0;
				1816	for (c = 0; c < layer1_size; c++)
				1817	f += syn0[d* layer1_size + c] * syn1neg_window[target * window_layer_size + window_offset + c];
				1818	if (f < -MAX_EXP)
				1819	continue;
				1820	else if (f > MAX_EXP)
				1821	continue;
				1822	else
				1823	f = expTable[(int) ((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1824	if(f > max_f) {
				1825	max_f = f;
				1826	max_target = target;
				1827	}
				1828	target_sums[target] += (1-target_sums[target]) * f;
				1829	if(f > worstbest) {
				1830	for (b = 0; b < N; b++) {
				1831	if (f > bestf[b]) {
				1832	for (e = N - 1; e > b; e--) {
				1833	bestf[e] = bestf[e - 1];
				1834	besti[e] = besti[e - 1];
				1835	bestp[e] = bestp[e - 1];
				1836	}
				1837	bestf[b] = f;
				1838	besti[b] = target;
				1839	bestp[b] = window-a;
				1840	break;
				1841	}
				1842	}
				1843	worstbest = bestf[N - 1];
				1844	}
				1845	}
				1846	printf("%s (%.2f) ", vocab[max_target].word, max_f);
				1847	if (max_f > maxmax_f) {
				1848	maxmax_f = max_f;
				1849	maxmax_target = max_target;
				1850	}
				1851	} else {
				1852	printf("\x1b[1m%s\x1b[0m ", vocab[d].word);
				1853	}
				1854	}
				1855	max_f = -1;
				1856	for (b = 0; b < vocab_size; b++) {
				1857	if (target_sums[b] > max_f) {
				1858	max_f = target_sums[b];
				1859	max_target = b;
				1860	}
				1861	}
				1862	printf(" – max sum: %s (%.2f), max resp.: \x1b[1m%s\x1b[0m (%.2f)\n",
				1863	vocab[max_target].word, max_f, vocab[maxmax_target].word,
				1864	maxmax_f);
				1865	for (b = 0; b < N && bestf[b] > -1; b++)
				1866	printf("%-32s %.2f %d\n", vocab[besti[b]].word, bestf[b], bestp[b]);
				1867	printf("\n");
				1868	}
				1869	}
				1870
				1871	void TrainModel() {
				1872	long a, b, c, d;
				1873	FILE *fo;
				1874	pthread_t pt = (pthread_t ) malloc(num_threads * sizeof(pthread_t));
				1875	threadPos = malloc(num_threads * sizeof(long long));
				1876	threadIters = malloc(num_threads * sizeof(int));
				1877	char *timebuf = malloc(80);
				1878	printf("Starting training using file %s\n", train_file);
				1879	starting_alpha = alpha;
				1880	if (read_vocab_file[0] != 0)
				1881	ReadVocab();
				1882	else
				1883	LearnVocabFromTrainFile();
				1884	if (save_vocab_file[0] != 0)
				1885	SaveVocab();
				1886	if (output_file[0] == 0)
				1887	return;
				1888	InitNet();
				1889	if (cc > 0)
				1890	ShowCollocations();
				1891	if (negative > 0 \|\| nce > 0)
				1892	InitUnigramTable();
				1893	if (negative_classes_file[0] != 0)
				1894	InitClassUnigramTable();
				1895	start = time(NULL);
				1896	start_clock = clock();
				1897	for (a = 0; a < num_threads; a++)
				1898	pthread_create(&pt[a], NULL, TrainModelThread, (void *) a);
				1899	if(debug_mode > 1)
				1900	pthread_create(&pt[num_threads], NULL, MonitorThread, (void *) a);
				1901	for (a = 0; a < num_threads; a++)
				1902	pthread_join(pt[a], NULL);
				1903	if(debug_mode > 1) {
				1904	pthread_join(pt[num_threads], NULL);
				1905	clock_t now = time(NULL);
				1906	clock_t now_clock = clock();
				1907	printf("\nFinished: %s - user: %lds - real: %lds\n", currentDateTime(timebuf, 0), (now_clock - start_clock) / CLOCKS_PER_SEC, now - start);
				1908	if(type == 5) // don't save vectorsmfor classic collocators
				1909	return;
				1910	printf("Saving vectors to %s ...", output_file);
				1911	fflush(stdout);
				1912	}
				1913	fo = fopen(output_file, "wb");
				1914	if (classes == 0) {
				1915	// Save the word vectors
				1916	fprintf(fo, "%lld %lld\n", vocab_size, layer1_size);
				1917	for (a = 0; a < vocab_size; a++) {
				1918	fprintf(fo, "%s ", vocab[a].word);
				1919	if (binary)
				1920	for (b = 0; b < layer1_size; b++)
				1921	fwrite(&syn0[a * layer1_size + b], sizeof(real), 1, fo);
				1922	else
				1923	for (b = 0; b < layer1_size; b++)
				1924	fprintf(fo, "%lf ", syn0[a * layer1_size + b]);
				1925	fprintf(fo, "\n");
				1926	}
				1927	if(debug_mode > 1)
				1928	fprintf(stderr, "\n");
				1929	} else {
				1930	// Run K-means on the word vectors
				1931	int clcn = classes, iter = 10, closeid;
				1932	int centcn = (int ) malloc(classes * sizeof(int));
				1933	int cl = (int ) calloc(vocab_size, sizeof(int));
				1934	real closev, x;
				1935	real cent = (real ) calloc(classes * layer1_size, sizeof(real));
				1936	for (a = 0; a < vocab_size; a++)
				1937	cl[a] = a % clcn;
				1938	for (a = 0; a < iter; a++) {
				1939	for (b = 0; b < clcn * layer1_size; b++)
				1940	cent[b] = 0;
				1941	for (b = 0; b < clcn; b++)
				1942	centcn[b] = 1;
				1943	for (c = 0; c < vocab_size; c++) {
				1944	for (d = 0; d < layer1_size; d++)
				1945	cent[layer1_size * cl[c] + d] += syn0[c * layer1_size + d];
				1946	centcn[cl[c]]++;
				1947	}
				1948	for (b = 0; b < clcn; b++) {
				1949	closev = 0;
				1950	for (c = 0; c < layer1_size; c++) {
				1951	cent[layer1_size * b + c] /= centcn[b];
				1952	closev += cent[layer1_size * b + c]
				1953	* cent[layer1_size * b + c];
				1954	}
				1955	closev = sqrt(closev);
				1956	for (c = 0; c < layer1_size; c++)
				1957	cent[layer1_size * b + c] /= closev;
				1958	}
				1959	for (c = 0; c < vocab_size; c++) {
				1960	closev = -10;
				1961	closeid = 0;
				1962	for (d = 0; d < clcn; d++) {
				1963	x = 0;
				1964	for (b = 0; b < layer1_size; b++)
				1965	x += cent[layer1_size * d + b]
				1966	* syn0[c * layer1_size + b];
				1967	if (x > closev) {
				1968	closev = x;
				1969	closeid = d;
				1970	}
				1971	}
				1972	cl[c] = closeid;
				1973	}
				1974	}
				1975	// Save the K-means classes
				1976	for (a = 0; a < vocab_size; a++)
				1977	fprintf(fo, "%s %d\n", vocab[a].word, cl[a]);
				1978	free(centcn);
				1979	free(cent);
				1980	free(cl);
				1981	}
				1982	fclose(fo);
				1983	if (save_net_file[0] != 0)
				1984	SaveNet();
				1985	}
				1986
				1987	int ArgPos(char str, int argc, char *argv) {
				1988	int a;
				1989	for (a = 1; a < argc; a++)
				1990	if (!strcmp(str, argv[a])) {
				1991	if (a == argc - 1) {
				1992	printf("Argument missing for %s\n", str);
				1993	exit(1);
				1994	}
				1995	return a;
				1996	}
				1997	return -1;
				1998	}
				1999
				2000	void print_help() {
				2001	printf("WORD VECTOR estimation toolkit v 0.1c\n\n");
				2002	printf("Options:\n");
				2003	printf("Parameters for training:\n");
				2004	printf("\t-train <file>\n");
				2005	printf("\t\tUse text data from <file> to train the model\n");
				2006	printf("\t-output <file>\n");
				2007	printf(
				2008	"\t\tUse <file> to save the resulting word vectors / word clusters\n");
				2009	printf("\t-size <int>\n");
				2010	printf("\t\tSet size of word vectors; default is 100\n");
				2011	printf("\t-window <int>\n");
				2012	printf("\t\tSet max skip length between words; default is 5\n");
				2013	printf("\t-sample <float>\n");
				2014	printf(
				2015	"\t\tSet threshold for occurrence of words. Those that appear with higher frequency in the training data\n");
				2016	printf(
				2017	"\t\twill be randomly down-sampled; default is 1e-3, useful range is (0, 1e-5)\n");
				2018	printf("\t-hs <int>\n");
				2019	printf("\t\tUse Hierarchical Softmax; default is 0 (not used)\n");
				2020	printf("\t-negative <int>\n");
				2021	printf(
				2022	"\t\tNumber of negative examples; default is 5, common values are 3 - 10 (0 = not used)\n");
				2023	printf("\t-negative-classes <file>\n");
				2024	printf("\t\tNegative classes to sample from\n");
				2025	printf("\t-nce <int>\n");
				2026	printf(
				2027	"\t\tNumber of negative examples for nce; default is 0, common values are 3 - 10 (0 = not used)\n");
				2028	printf("\t-threads <int>\n");
				2029	printf("\t\tUse <int> threads (default 12)\n");
				2030	printf("\t-iter <int>\n");
				2031	printf("\t\tRun more training iterations (default 5)\n");
				2032	printf("\t-min-count <int>\n");
				2033	printf(
				2034	"\t\tThis will discard words that appear less than <int> times; default is 5\n");
				2035	printf("\t-alpha <float>\n");
				2036	printf(
				2037	"\t\tSet the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW\n");
				2038	printf("\t-classes <int>\n");
				2039	printf(
				2040	"\t\tOutput word classes rather than word vectors; default number of classes is 0 (vectors are written)\n");
				2041	printf("\t-debug <int>\n");
				2042	printf(
				2043	"\t\tSet the debug mode (default = 2 = more info during training)\n");
				2044	printf("\t-binary <int>\n");
				2045	printf(
				2046	"\t\tSave the resulting vectors in binary moded; default is 0 (off)\n");
				2047	printf("\t-save-vocab <file>\n");
				2048	printf("\t\tThe vocabulary will be saved to <file>\n");
				2049	printf("\t-read-vocab <file>\n");
				2050	printf(
				2051	"\t\tThe vocabulary will be read from <file>, not constructed from the training data\n");
				2052	printf("\t-train-counts <int>\n");
				2053	printf(
				2054	"\t\tUse word counts of actual corpus rather than vocabulary counts; default is 1 (on)\n");
				2055	printf("\t-read-net <file>\n");
				2056	printf(
				2057	"\t\tThe net parameters will be read from <file>, not initialized randomly\n");
				2058	printf("\t-save-net <file>\n");
				2059	printf("\t\tThe net parameters will be saved to <file>\n");
				2060	printf("\t-magic-stop-file <file>\n");
				2061	printf("\t\tIf the magic file <file> exists training will stop after the current cycle.\n");
				2062	printf("\t-show-cc <int>\n");
				2063	printf("\t\tShow words with their collocators starting from word rank <int>. Depends on -read-vocab and -read-net.\n");
				2064	printf("\t-type <int>\n");
				2065	printf(
				2066	"\t\tType of embeddings (0 for cbow, 1 for skipngram, 2 for cwindow, 3 for structured skipngram, 4 for senna type, 5 for store positional bigramms)\n");
				2067	printf("\t-cap <int>\n");
				2068	printf(
				2069	"\t\tlimit the parameter values to the range [-50, 50]; default is 0 (off)\n");
				2070	printf("\nExamples:\n");
				2071	printf(
				2072	"./word2vec -train data.txt -output vec.txt -size 200 -window 5 -sample 1e-4 -negative 5 -hs 0 -binary 0 -type 1 -iter 3\n\n");
				2073	}
				2074
				2075	int main(int argc, char **argv) {
				2076	int i;
				2077	setlocale(LC_ALL, "");
				2078	if (argc == 1) {
				2079	print_help();
				2080	return 0;
				2081	}
				2082	output_file[0] = 0;
				2083	save_vocab_file[0] = 0;
				2084	read_vocab_file[0] = 0;
				2085	save_net_file[0] = 0;
				2086	read_net_file[0] = 0;
				2087	negative_classes_file[0] = 0;
				2088	if ((i = ArgPos((char *) "-h", argc, argv)) > 0) {
				2089	print_help();
				2090	return(0);
				2091	}
				2092	if ((i = ArgPos((char *) "-help", argc, argv)) > 0) {
				2093	print_help();
				2094	return(0);
				2095	}
				2096	if ((i = ArgPos((char *) "-size", argc, argv)) > 0)
				2097	layer1_size = atoi(argv[i + 1]);
				2098	if ((i = ArgPos((char *) "-train", argc, argv)) > 0)
				2099	strcpy(train_file, argv[i + 1]);
				2100	if ((i = ArgPos((char *) "-save-vocab", argc, argv)) > 0)
				2101	strcpy(save_vocab_file, argv[i + 1]);
				2102	if ((i = ArgPos((char *) "-read-vocab", argc, argv)) > 0)
				2103	strcpy(read_vocab_file, argv[i + 1]);
				2104	if ((i = ArgPos((char *) "-train-counts", argc, argv)) > 0)
				2105	tc = atoi(argv[i + 1]);
				2106	if ((i = ArgPos((char *) "-save-net", argc, argv)) > 0)
				2107	strcpy(save_net_file, argv[i + 1]);
				2108	if ((i = ArgPos((char *) "-read-net", argc, argv)) > 0)
				2109	strcpy(read_net_file, argv[i + 1]);
				2110	if ((i = ArgPos((char *) "-magic-stop-file", argc, argv)) > 0) {
				2111	strcpy(magic_stop_file, argv[i + 1]);
				2112	if (access(magic_stop_file, F_OK ) != -1) {
				2113	printf("ERROR: magic stop file %s must not exist at start.\n", magic_stop_file);
				2114	exit(1);
				2115	}
				2116	}
				2117	if ((i = ArgPos((char *) "-debug", argc, argv)) > 0)
				2118	debug_mode = atoi(argv[i + 1]);
				2119	if ((i = ArgPos((char *) "-binary", argc, argv)) > 0)
				2120	binary = atoi(argv[i + 1]);
				2121	if ((i = ArgPos((char *) "-show-cc", argc, argv)) > 0)
				2122	cc = atoi(argv[i + 1]);
				2123	if ((i = ArgPos((char *) "-type", argc, argv)) > 0)
				2124	type = atoi(argv[i + 1]);
				2125	if ((i = ArgPos((char *) "-output", argc, argv)) > 0)
				2126	strcpy(output_file, argv[i + 1]);
				2127	if ((i = ArgPos((char *) "-window", argc, argv)) > 0)
				2128	window = atoi(argv[i + 1]);
				2129	if ((i = ArgPos((char *) "-sample", argc, argv)) > 0)
				2130	sample = atof(argv[i + 1]);
				2131	if ((i = ArgPos((char *) "-hs", argc, argv)) > 0)
				2132	hs = atoi(argv[i + 1]);
				2133	if ((i = ArgPos((char *) "-negative", argc, argv)) > 0)
				2134	negative = atoi(argv[i + 1]);
				2135	if ((i = ArgPos((char *) "-negative-classes", argc, argv)) > 0)
				2136	strcpy(negative_classes_file, argv[i + 1]);
				2137	if ((i = ArgPos((char *) "-nce", argc, argv)) > 0)
				2138	nce = atoi(argv[i + 1]);
				2139	if ((i = ArgPos((char *) "-threads", argc, argv)) > 0)
				2140	num_threads = atoi(argv[i + 1]);
				2141	if ((i = ArgPos((char *) "-iter", argc, argv)) > 0)
				2142	iter = atoi(argv[i + 1]);
				2143	if ((i = ArgPos((char *) "-min-count", argc, argv)) > 0)
				2144	min_count = atoi(argv[i + 1]);
				2145	if ((i = ArgPos((char *) "-classes", argc, argv)) > 0)
				2146	classes = atoi(argv[i + 1]);
				2147	if ((i = ArgPos((char *) "-cap", argc, argv)) > 0)
				2148	cap = atoi(argv[i + 1]);
				2149	if (type == 0 \|\| type == 2 \|\| type == 4)
				2150	alpha = 0.05;
				2151	if (type==5) {
				2152	sample = 0;
				2153	cdb = open_collocatordb_for_write(output_file);
				2154	}
				2155	if ((i = ArgPos((char *) "-alpha", argc, argv)) > 0)
				2156	alpha = atof(argv[i + 1]);
				2157	vocab = (struct vocab_word *) calloc(vocab_max_size,
				2158	sizeof(struct vocab_word));
				2159	vocab_hash = (int *) calloc(vocab_hash_size, sizeof(int));
				2160	expTable = (real ) malloc((EXP_TABLE_SIZE + 1) sizeof(real));
				2161	for (i = 0; i < EXP_TABLE_SIZE; i++) {
				2162	expTable[i] = exp((i / (real) EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table
				2163	expTable[i] = expTable[i] / (expTable[i] + 1); // Precompute f(x) = x / (x + 1)
				2164	}
				2165	SaveArgs(argc, argv);
				2166	TrainModel();
				2167	return 0;
				2168	}
				2169