Blame - wordless2vec.c - ids-kl/dereko2vec

blob: e68bbee6182434b974ea11e12d340fa83e04a2b8 [file] [log] [blame]

Marc Kupietz	d6f9c71	2016-03-16 11:50:56 +0100	[diff] [blame^]	1	// Copyright 2013 Google Inc. All Rights Reserved.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#include <stdio.h>
				16	#include <stdlib.h>
				17	#include <string.h>
				18	#include <math.h>
				19	#include <pthread.h>
				20
				21	#define MAX_STRING 100
				22	#define EXP_TABLE_SIZE 1000
				23	#define MAX_EXP 6
				24	#define MAX_SENTENCE_LENGTH 1000
				25	#define MAX_CODE_LENGTH 40
				26
				27	const int vocab_hash_size = 30000000; // Maximum 30 * 0.7 = 21M words in the vocabulary
				28
				29	typedef float real; // Precision of float numbers
				30
				31	struct vocab_word {
				32	long long cn;
				33	int *point;
				34	char word, code, codelen;
				35	};
				36
				37	char train_file[MAX_STRING], output_file[MAX_STRING];
				38	char save_vocab_file[MAX_STRING], read_vocab_file[MAX_STRING];
				39	struct vocab_word *vocab;
				40	int binary = 0, type = 1, debug_mode = 2, window = 5, min_count = 5, num_threads = 12, min_reduce = 1;
				41	int *vocab_hash;
				42	long long vocab_max_size = 1000, vocab_size = 0, layer1_size = 100;
				43	long long train_words = 0, word_count_actual = 0, iter = 5, file_size = 0, classes = 0;
				44	real alpha = 0.025, starting_alpha, sample = 1e-3;
				45	real syn0, syn1, syn1neg, expTable, *tanhTable;
				46	clock_t start;
				47
				48	real syn1_window, syn1neg_window;
				49	int window_offset, window_layer_size;
				50
				51	int window_hidden_size = 500;
				52	real syn_window_hidden, syn_hidden_word, *syn_hidden_word_neg;
				53
				54	int hs = 0, negative = 5;
				55	const int table_size = 1e8;
				56	int *table;
				57
				58	//constrastive negative sampling
				59	char negative_classes_file[MAX_STRING];
				60	int *word_to_group;
				61	int group_to_table; //group_sizetable_size
				62	int class_number;
				63
				64	//char table
				65	int rep = 0;
				66	#define C_MAX_CODE 65536
				67	int c_state_size = 5;
				68	int c_cell_size = 5;
				69	int c_proj_size = 3;
				70	int c_params_number;
				71	int c_lstm_params_number;
				72	real *c_lookup;
				73
				74	//char lstm params
				75	real *f_init_state;
				76	real *f_init_cell;
				77	real *b_init_state;
				78	real *b_init_cell;
				79	real *f_b_params;
				80
				81	//short term memory
				82	real*syn0_initial;
				83	real*syn0_in_memory;
				84
				85	int batch_size = 100;
				86
				87	void printStates(real*states, int start){
				88	int s;
				89	printf("igate ");
				90	for(s = 0; s < c_state_size; s++){ printf("%f ", states[start++]);} printf("\n");
				91	printf("fgate ");
				92	for(s = 0; s < c_state_size; s++){ printf("%f ", states[start++]);} printf("\n");
				93	printf("c + tanh ");
				94	for(s = 0; s < c_state_size; s++){ printf("%f ", states[start++]);} printf("\n");
				95	printf("cgate ");
				96	for(s = 0; s < c_state_size; s++){ printf("%f ", states[start++]);} printf("\n");
				97	printf("ogate ");
				98	for(s = 0; s < c_state_size; s++){ printf("%f ", states[start++]);} printf("\n");
				99	printf("cgate + tanh ");
				100	for(s = 0; s < c_state_size; s++){ printf("%f ", states[start++]);} printf("\n");
				101	printf("state ");
				102	for(s = 0; s < c_state_size; s++){ printf("%f ", states[start++]);} printf("\n");
				103
				104	}
				105
				106	void lstmForwardBlock(real chars, int char_start, realstates, int next_start, int p){
				107	int i,s,si,sf,sc,sct,sctt,so,s1=next_start;
				108	int prev_cell_start = s1 - c_state_size*4;
				109	int prev_state_start = s1 - c_state_size;
				110	if(states[prev_cell_start]==0){
				111	// printf("crap! cell is zero\n");
				112	}
				113	if(states[prev_state_start]==0){
				114	// printf("crap! state is zero\n");
				115	}
				116	if(states[s1]!=0){
				117	// printf("crap! start not zero\n");
				118	}
				119	//igate
				120	si = s1;
				121	for(s = 0; s < c_state_size; s++){
				122	for(i = 0; i < c_proj_size; i++){
				123	states[s1]+=chars[char_start+i]*f_b_params[p++];
				124	}
				125	for(i = 0; i < c_cell_size; i++){
				126	states[s1]+=states[prev_cell_start+i]*f_b_params[p++];
				127	}
				128	for(i = 0; i < c_state_size; i++){
				129	states[s1]+=states[prev_state_start+i]*f_b_params[p++];
				130	}
				131	states[s1]+=f_b_params[p++];
				132	if(states[s1]>MAX_EXP){
				133	states[s1]=1;
				134	}
				135	else if(states[s1]<-MAX_EXP){
				136	states[s1]=0;
				137	}
				138	else{
				139	states[s1] = expTable[(int)((states[s1] + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				140	}
				141	s1++;
				142	}
				143
				144	//fgate
				145	sf=s1;
				146	for(s = 0; s < c_state_size; s++){
				147	for(i = 0; i < c_proj_size; i++){
				148	states[s1]+=chars[char_start+i]*f_b_params[p++];
				149	}
				150	for(i = 0; i < c_cell_size; i++){
				151	states[s1]+=states[prev_cell_start+i]*f_b_params[p++];
				152	}
				153	for(i = 0; i < c_state_size; i++){
				154	states[s1]+=states[prev_state_start+i]*f_b_params[p++];
				155	}
				156	states[s1]+=f_b_params[p++];
				157	if(states[s1]>MAX_EXP){
				158	states[s1]=1;
				159	}
				160	else if(states[s1]<-MAX_EXP){
				161	states[s1]=0;
				162	}
				163	else{
				164	states[s1] = expTable[(int)((states[s1] + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				165	}
				166	s1++;
				167	}
				168
				169	//c + tanh
				170	sct=s1;
				171	for(s = 0; s < c_state_size; s++){
				172	for(i = 0; i < c_proj_size; i++){
				173	states[s1]+=chars[char_start+i]*f_b_params[p++];
				174	}
				175	for(i = 0; i < c_state_size; i++){
				176	states[s1]+=states[prev_state_start+i]*f_b_params[p++];
				177	}
				178	states[s1]+=f_b_params[p++];
				179	if(states[s1]>MAX_EXP){
				180	states[s1]=1;
				181	}
				182	else if(states[s1]<-MAX_EXP){
				183	states[s1]=-1;
				184	}
				185	else{
				186	states[s1] = tanhTable[(int)((states[s1] + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				187	}
				188	s1++;
				189	}
				190
				191	//cgate
				192	sc=s1;
				193	for(s = 0; s < c_state_size; s++){
				194	states[s1]+=states[sct+s]states[si+s]+states[sf+s]states[prev_cell_start+s];
				195	s1++;
				196	}
				197
				198	//ogate
				199	so=s1;
				200	for(s = 0; s < c_state_size; s++){
				201	for(i = 0; i < c_proj_size; i++){
				202	states[s1]+=chars[char_start+i]*f_b_params[p++];
				203	}
				204	for(i = 0; i < c_cell_size; i++){
				205	states[s1]+=states[sc+s]*f_b_params[p++];
				206	}
				207	for(i = 0; i < c_state_size; i++){
				208	states[s1]+=states[prev_state_start+i]*f_b_params[p++];
				209	}
				210	states[s1]+=f_b_params[p++];
				211	if(states[s1]>MAX_EXP){
				212	states[s1]=1;
				213	}
				214	else if(states[s1]<-MAX_EXP){
				215	states[s1]=0;
				216	}
				217	else{
				218	states[s1] = expTable[(int)((states[s1] + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				219	}
				220	s1++;
				221	}
				222
				223	//cgate + tan
				224	sctt = s1;
				225	for(s = 0; s < c_state_size; s++){
				226	if(states[sc+s]>MAX_EXP){
				227	states[s1]=1;
				228	}
				229	else if(states[sc+s]<-MAX_EXP){
				230	states[s1]=-1;
				231	}
				232	else{
				233	states[s1] = tanhTable[(int)((states[sc+s] + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				234	}
				235	s1++;
				236	}
				237
				238	//next state
				239	if(states[s1]!=0){
				240	printf("crap! end not zero\n");
				241	}
				242	for(s = 0; s < c_state_size; s++){
				243	states[s1] = states[sctt+s] * states[so+s];
				244	s1++;
				245	}
				246
				247
				248	}
				249
				250	void lstmBackwardBlock(real chars, int char_start, realstates, int next_start, int pStart, realchars_e, realstates_e, real*lstm_params_e){
				251	int p=pStart+c_lstm_params_number-1;
				252	int i,s,si,sf,sc,sct,sctt,so,s1=next_start+c_state_size*7-1;
				253	int prev_cell_start = next_start - c_state_size*4;
				254	int prev_state_start = next_start - c_state_size;
				255
				256	real e;
				257	si = next_start;
				258	sf = next_start + c_state_size;
				259	sct = next_start + c_state_size*2;
				260	sc = next_start + c_state_size*3;
				261	so = next_start + c_state_size*4;
				262	sctt = next_start + c_state_size*5;
				263
				264	//next state
				265	for(s = c_state_size-1; s >= 0; s--){
				266	states_e[sctt+s] += states_e[s1]*states[so+s];
				267	states_e[so+s] += states_e[s1]*states[sctt+s];
				268	s1--;
				269	}
				270
				271
				272	//cgate + tan
				273	for(s = c_state_size-1; s >= 0; s--){
				274	states_e[sc+s] += states_e[s1](1-states[s1]states[s1]);
				275	s1--;
				276	}
				277
				278	//ogate
				279	for(s = c_state_size-1; s >= 0; s--){
				280	e = states[s1](1-states[s1])states_e[s1];
				281	for(i = c_proj_size-1; i >= 0; i--){
				282	chars_e[char_start+i] += e*f_b_params[p];
				283	lstm_params_e[p--] += e*chars[char_start+i];
				284	}
				285
				286	for(i = c_cell_size-1; i >= 0; i--){
				287	states_e[sc+s]+=e*f_b_params[p];
				288	lstm_params_e[p--] += e*states[sc+s];
				289	}
				290	for(i = c_state_size-1; i >= 0; i--){
				291	states_e[prev_state_start+i] += e*f_b_params[p];
				292	lstm_params_e[p--] += e*states_e[prev_state_start+i];
				293	}
				294	lstm_params_e[p--]+=e;
				295	s1--;
				296	}
				297
				298	//cgate
				299	for(s = c_state_size-1; s >= 0; s--){
				300	states_e[sct+s]+=states_e[s1]*states[si+s];
				301	states_e[si+s]+=states_e[s1]*states[sct+s];
				302	states_e[prev_cell_start+s]+=states_e[s1]*states[sf+s];
				303	states_e[sf+s]+=states_e[s1]*states[prev_cell_start+s];
				304	s1--;
				305	}
				306
				307	//c + tanh
				308	for(s = c_state_size-1; s >= 0; s--){
				309	e = (1-states[s1]states[s1])states_e[s1];
				310	for(i = c_proj_size-1; i >= 0; i--){
				311	chars_e[char_start+i] += e*f_b_params[p];
				312	lstm_params_e[p--] += e*chars[char_start+i];
				313	}
				314	for(i = c_state_size-1; i >= 0; i--){
				315	states_e[prev_state_start+i]+=e*f_b_params[p];
				316	lstm_params_e[p--] +=e*states[prev_state_start+i];
				317	}
				318	lstm_params_e[p--]+=e;
				319	s1--;
				320	}
				321
				322
				323	//fgate
				324	for(s = c_state_size-1; s >= 0; s--){
				325	e = states[s1](1-states[s1])states_e[s1];
				326	for(i = c_proj_size-1; i >= 0; i--){
				327	chars_e[char_start+i] += e*f_b_params[p];
				328	lstm_params_e[p--] += e*chars[char_start+i];
				329	}
				330	for(i = c_cell_size-1; i >= 0; i--){
				331	states_e[prev_cell_start+i]+=e*f_b_params[p];
				332	lstm_params_e[p--] +=e*states[prev_cell_start+i];
				333	}
				334	for(i = c_state_size-1; i >= 0; i--){
				335	states_e[prev_state_start+i]+=e*f_b_params[p];
				336	lstm_params_e[p--] +=e*states[prev_state_start+i];
				337	}
				338	lstm_params_e[p--]+=e;
				339	s1--;
				340	}
				341
				342	//igate
				343	for(s = c_state_size-1; s >= 0; s--){
				344	e = states[s1](1-states[s1])states_e[s1];
				345	for(i = c_proj_size-1; i >= 0; i--){
				346	chars_e[char_start+i] += e*f_b_params[p];
				347	lstm_params_e[p--] += e*chars[char_start+i];
				348	}
				349	for(i = c_cell_size-1; i >= 0; i--){
				350	states_e[prev_cell_start+i]+=e*f_b_params[p];
				351	lstm_params_e[p--] +=e*states[prev_cell_start+i];
				352	}
				353	for(i = c_state_size-1; i >= 0; i--){
				354	states_e[prev_state_start+i]+=e*f_b_params[p];
				355	lstm_params_e[p--] +=e*states[prev_state_start+i];
				356	}
				357	lstm_params_e[p--]+=e;
				358	s1--;
				359	}
				360
				361	if(p+1!=pStart){
				362	printf("crap! p!= %d p = %d\n",pStart,p+1);
				363	}
				364	if(s1+1!=next_start){
				365	printf("crap! s1!= %d s1 = %d\n",next_start,s1+1);
				366	}
				367	}
				368
				369	void lstmForward(char* word, int len, real* out, real f_states, real b_states, real *chars){
				370	//printf("%s\n",word);
				371	int i,s,c,p;
				372	for(s = 0; s < (len+1)(c_state_size7); s++){
				373	f_states[s]=0;
				374	b_states[s]=0;
				375	}
				376	for(s = 0; s < c_state_size; s++){
				377	f_states[c_state_size*3]=f_init_cell[s];
				378	f_states[c_state_size*6]=f_init_state[s];
				379	b_states[c_state_size*3]=b_init_cell[s];
				380	b_states[c_state_size*6]=b_init_state[s];
				381	}
				382	for(i = 0; i < len; i++){
				383	c = word[i];
				384	if(c>=C_MAX_CODE){c=C_MAX_CODE-1;}
				385	for(s = 0; s < c_proj_size; s++){
				386	chars[ic_proj_size+s] = c_lookup[cc_proj_size+s];
				387	}
				388	}
				389
				390	for(i = 0; i < len; i++){
				391	lstmForwardBlock(chars, ic_proj_size, f_states, (i+1)c_state_size*7, 0);
				392	}
				393	for(i = 0; i < len; i++){
				394	lstmForwardBlock(chars, (len-i-1)c_proj_size, b_states, (i+1)c_state_size*7, c_lstm_params_number);
				395	}
				396
				397	//printStates(f_states,c_state_size*7);
				398
				399	for(s = 0; s < layer1_size; s++){
				400	out[s]=0;
				401	}
				402	p=c_lstm_params_number*2;
				403	for(s = 0; s < layer1_size; s++){
				404	for(i = 0; i < c_state_size; i++){
				405	out[s]+=f_states[lenc_state_size7+c_state_size6 + i]f_b_params[p++];
				406	out[s]+=b_states[lenc_state_size7+c_state_size6 + i]f_b_params[p++];
				407	}
				408	// printf("%f ",out[s]);
				409	}
				410	// printf("\n");
				411	}
				412
				413	void lstmBackward(char* word, int len, real* out, real f_states, real b_states, real* chars, real* out_e, real f_states_e, real b_states_e, real* chars_e, real *lstm_params_e){
				414	int i,s,c=-1,p;
				415	for(s = 0; s < (len+1)c_state_size7; s++){
				416	f_states_e[s]=0;
				417	b_states_e[s]=0;
				418	}
				419	for(i = 0; i < len; i++){
				420	for(s = 0; s < c_proj_size; s++){
				421	chars_e[i*c_proj_size+s] = 0;
				422	}
				423	}
				424	for(i = 0; i < c_lstm_params_number*2; i++){
				425	lstm_params_e[i]=0;
				426	}
				427
				428	p=c_lstm_params_number*2;
				429	for(s = 0; s < layer1_size; s++){
				430	for(i = 0; i < c_state_size; i++){
				431	f_states_e[lenc_state_size7+c_state_size6 + i]+=out_e[s]f_b_params[p];
				432	f_b_params[p] += out_e[s] * f_states[lenc_state_size7+c_state_size*6 + i];
				433	p++;
				434	b_states_e[lenc_state_size7+c_state_size6 + i]+=out_e[s]f_b_params[p];
				435	f_b_params[p] += out_e[s] * b_states[lenc_state_size7+c_state_size*6 + i];
				436	p++;
				437	}
				438	}
				439
				440	for(i = len-1; i >=0; i--){
				441	lstmBackwardBlock(chars, ic_proj_size, b_states, (i+1)c_state_size*7, c_lstm_params_number, chars_e,b_states_e,lstm_params_e);
				442	}
				443
				444	for(i = len-1; i >=0; i--){
				445	lstmBackwardBlock(chars, (len-i-1)c_proj_size, f_states, (i+1)c_state_size*7, 0, chars_e,f_states_e,lstm_params_e);
				446	}
				447
				448	for(i = 0; i < len; i++){
				449	c = word[i];
				450	if(c>=C_MAX_CODE){c=C_MAX_CODE-1;}
				451	for(s = 0; s < c_proj_size; s++){
				452	c_lookup[cc_proj_size+s] += chars_e[ic_proj_size+s];
				453	}
				454	}
				455
				456	for(s = 0; s < c_state_size; s++){
				457	f_init_cell[s]+=f_states_e[c_state_size*3];
				458	f_init_state[s]+=f_states_e[c_state_size*6];
				459	b_init_cell[s]+=b_states_e[c_state_size*3];
				460	b_init_state[s]+=b_states_e[c_state_size*6];
				461	}
				462
				463	for(s = 0; s < c_lstm_params_number*2; s++){
				464	f_b_params[c]+=lstm_params_e[c];
				465	}
				466
				467	//printf("out\n");
				468	//printStates(f_states,(len)c_state_size7);
				469	//printf("err\n");
				470	//printStates(f_states_e,(len)c_state_size7);
				471
				472	}
				473
				474	void lstmFitting(char* word, int len, real* out, real f_states, real b_states, real* chars, real* out_expected, real* out_e, real f_states_e, real b_states_e, real* chars_e, real *lstm_params_e){
				475	int i;
				476	real g = 0;
				477	lstmForward(word, len, out, f_states, b_states, chars);
				478	for(i = 0; i < layer1_size; i++){
				479	if(out_expected[i]>out[i]){
				480	g += out_expected[i]-out[i];
				481	}
				482	else{
				483	g += -out_expected[i]+out[i];
				484	}
				485	out_e[i] = (out_expected[i]-out[i])*alpha;
				486	}
				487	printf("error before fitting = %f\n", g);
				488	lstmBackward(word, len, out, f_states, b_states, chars, out_e, f_states_e, b_states_e, chars_e, lstm_params_e);
				489	lstmForward(word, len, out, f_states, b_states, chars);
				490	g=0;
				491	for(i = 0; i < layer1_size; i++){
				492	if(out_expected[i]>out[i]){
				493	g += out_expected[i]-out[i];
				494	}
				495	else{
				496	g += -out_expected[i]+out[i];
				497	}
				498	out_e[i] = (out_expected[i]-out[i])*alpha;
				499	}
				500	printf("error after fitting = %f\n", g);
				501
				502	}
				503
				504	real hardTanh(real x){
				505	if(x>=1){
				506	return 1;
				507	}
				508	else if(x<=-1){
				509	return -1;
				510	}
				511	else{
				512	return x;
				513	}
				514	}
				515
				516	real dHardTanh(real x, real g){
				517	if(x > 1 && g > 0){
				518	return 0;
				519	}
				520	if(x < -1 && g < 0){
				521	return 0;
				522	}
				523	return 1;
				524	}
				525
				526	void InitUnigramTable() {
				527	int a, i;
				528	long long train_words_pow = 0;
				529	real d1, power = 0.75;
				530	table = (int )malloc(table_size sizeof(int));
				531	for (a = 0; a < vocab_size; a++) train_words_pow += pow(vocab[a].cn, power);
				532	i = 0;
				533	d1 = pow(vocab[i].cn, power) / (real)train_words_pow;
				534	for (a = 0; a < table_size; a++) {
				535	table[a] = i;
				536	if (a / (real)table_size > d1) {
				537	i++;
				538	d1 += pow(vocab[i].cn, power) / (real)train_words_pow;
				539	}
				540	if (i >= vocab_size) i = vocab_size - 1;
				541	}
				542	}
				543
				544	// Reads a single word from a file, assuming space + tab + EOL to be word boundaries
				545	void ReadWord(char word, FILE fin) {
				546	int a = 0, ch;
				547	while (!feof(fin)) {
				548	ch = fgetc(fin);
				549	if (ch == 13) continue;
				550	if ((ch == ' ') \|\| (ch == '\t') \|\| (ch == '\n')) {
				551	if (a > 0) {
				552	if (ch == '\n') ungetc(ch, fin);
				553	break;
				554	}
				555	if (ch == '\n') {
				556	strcpy(word, (char *)"</s>");
				557	return;
				558	} else continue;
				559	}
				560	word[a] = ch;
				561	a++;
				562	if (a >= MAX_STRING - 1) a--; // Truncate too long words
				563	}
				564	word[a] = 0;
				565	}
				566
				567	// Returns hash value of a word
				568	int GetWordHash(char *word) {
				569	unsigned long long a, hash = 0;
				570	for (a = 0; a < strlen(word); a++) hash = hash * 257 + word[a];
				571	hash = hash % vocab_hash_size;
				572	return hash;
				573	}
				574
				575	// Returns position of a word in the vocabulary; if the word is not found, returns -1
				576	int SearchVocab(char *word) {
				577	unsigned int hash = GetWordHash(word);
				578	while (1) {
				579	if (vocab_hash[hash] == -1) return -1;
				580	if (!strcmp(word, vocab[vocab_hash[hash]].word)) return vocab_hash[hash];
				581	hash = (hash + 1) % vocab_hash_size;
				582	}
				583	return -1;
				584	}
				585
				586	// Reads a word and returns its index in the vocabulary
				587	int ReadWordIndex(FILE *fin) {
				588	char word[MAX_STRING];
				589	ReadWord(word, fin);
				590	if (feof(fin)) return -1;
				591	return SearchVocab(word);
				592	}
				593
				594	// Reads a word and returns its index in the vocabulary
				595	int ReadAndStoreWordIndex(FILE fin, char word) {
				596	ReadWord(word, fin);
				597	if (feof(fin)) return -1;
				598	return SearchVocab(word);
				599	}
				600
				601	// Adds a word to the vocabulary
				602	int AddWordToVocab(char *word) {
				603	unsigned int hash, length = strlen(word) + 1;
				604	if (length > MAX_STRING) length = MAX_STRING;
				605	vocab[vocab_size].word = (char *)calloc(length, sizeof(char));
				606	strcpy(vocab[vocab_size].word, word);
				607	vocab[vocab_size].cn = 0;
				608	vocab_size++;
				609	// Reallocate memory if needed
				610	if (vocab_size + 2 >= vocab_max_size) {
				611	vocab_max_size += 1000;
				612	vocab = (struct vocab_word )realloc(vocab, vocab_max_size sizeof(struct vocab_word));
				613	}
				614	hash = GetWordHash(word);
				615	while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
				616	vocab_hash[hash] = vocab_size - 1;
				617	return vocab_size - 1;
				618	}
				619
				620	// Used later for sorting by word counts
				621	int VocabCompare(const void a, const void b) {
				622	return ((struct vocab_word )b)->cn - ((struct vocab_word )a)->cn;
				623	}
				624
				625	// Sorts the vocabulary by frequency using word counts
				626	void SortVocab() {
				627	int a, size;
				628	unsigned int hash;
				629	// Sort the vocabulary and keep </s> at the first position
				630	qsort(&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
				631	for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
				632	size = vocab_size;
				633	train_words = 0;
				634	for (a = 0; a < size; a++) {
				635	// Words occuring less than min_count times will be discarded from the vocab
				636	if ((vocab[a].cn < min_count) && (a != 0)) {
				637	vocab_size--;
				638	free(vocab[a].word);
				639	} else {
				640	// Hash will be re-computed, as after the sorting it is not actual
				641	hash=GetWordHash(vocab[a].word);
				642	while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
				643	vocab_hash[hash] = a;
				644	train_words += vocab[a].cn;
				645	}
				646	}
				647	vocab = (struct vocab_word )realloc(vocab, (vocab_size + 1) sizeof(struct vocab_word));
				648	// Allocate memory for the binary tree construction
				649	for (a = 0; a < vocab_size; a++) {
				650	vocab[a].code = (char *)calloc(MAX_CODE_LENGTH, sizeof(char));
				651	vocab[a].point = (int *)calloc(MAX_CODE_LENGTH, sizeof(int));
				652	}
				653	}
				654
				655	// Reduces the vocabulary by removing infrequent tokens
				656	void ReduceVocab() {
				657	int a, b = 0;
				658	unsigned int hash;
				659	for (a = 0; a < vocab_size; a++) if (vocab[a].cn > min_reduce) {
				660	vocab[b].cn = vocab[a].cn;
				661	vocab[b].word = vocab[a].word;
				662	b++;
				663	} else free(vocab[a].word);
				664	vocab_size = b;
				665	for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
				666	for (a = 0; a < vocab_size; a++) {
				667	// Hash will be re-computed, as it is not actual
				668	hash = GetWordHash(vocab[a].word);
				669	while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
				670	vocab_hash[hash] = a;
				671	}
				672	fflush(stdout);
				673	min_reduce++;
				674	}
				675
				676	// Create binary Huffman tree using the word counts
				677	// Frequent words will have short uniqe binary codes
				678	void CreateBinaryTree() {
				679	long long a, b, i, min1i, min2i, pos1, pos2, point[MAX_CODE_LENGTH];
				680	char code[MAX_CODE_LENGTH];
				681	long long count = (long long )calloc(vocab_size * 2 + 1, sizeof(long long));
				682	long long binary = (long long )calloc(vocab_size * 2 + 1, sizeof(long long));
				683	long long parent_node = (long long )calloc(vocab_size * 2 + 1, sizeof(long long));
				684	for (a = 0; a < vocab_size; a++) count[a] = vocab[a].cn;
				685	for (a = vocab_size; a < vocab_size * 2; a++) count[a] = 1e15;
				686	pos1 = vocab_size - 1;
				687	pos2 = vocab_size;
				688	// Following algorithm constructs the Huffman tree by adding one node at a time
				689	for (a = 0; a < vocab_size - 1; a++) {
				690	// First, find two smallest nodes 'min1, min2'
				691	if (pos1 >= 0) {
				692	if (count[pos1] < count[pos2]) {
				693	min1i = pos1;
				694	pos1--;
				695	} else {
				696	min1i = pos2;
				697	pos2++;
				698	}
				699	} else {
				700	min1i = pos2;
				701	pos2++;
				702	}
				703	if (pos1 >= 0) {
				704	if (count[pos1] < count[pos2]) {
				705	min2i = pos1;
				706	pos1--;
				707	} else {
				708	min2i = pos2;
				709	pos2++;
				710	}
				711	} else {
				712	min2i = pos2;
				713	pos2++;
				714	}
				715	count[vocab_size + a] = count[min1i] + count[min2i];
				716	parent_node[min1i] = vocab_size + a;
				717	parent_node[min2i] = vocab_size + a;
				718	binary[min2i] = 1;
				719	}
				720	// Now assign binary code to each vocabulary word
				721	for (a = 0; a < vocab_size; a++) {
				722	b = a;
				723	i = 0;
				724	while (1) {
				725	code[i] = binary[b];
				726	point[i] = b;
				727	i++;
				728	b = parent_node[b];
				729	if (b == vocab_size * 2 - 2) break;
				730	}
				731	vocab[a].codelen = i;
				732	vocab[a].point[0] = vocab_size - 2;
				733	for (b = 0; b < i; b++) {
				734	vocab[a].code[i - b - 1] = code[b];
				735	vocab[a].point[i - b] = point[b] - vocab_size;
				736	}
				737	}
				738	free(count);
				739	free(binary);
				740	free(parent_node);
				741	}
				742
				743	void LearnVocabFromTrainFile() {
				744	char word[MAX_STRING];
				745	FILE *fin;
				746	long long a, i;
				747	for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
				748	fin = fopen(train_file, "rb");
				749	if (fin == NULL) {
				750	printf("ERROR: training data file not found!\n");
				751	exit(1);
				752	}
				753	vocab_size = 0;
				754	AddWordToVocab((char *)"</s>");
				755	while (1) {
				756	ReadWord(word, fin);
				757	if (feof(fin)) break;
				758	train_words++;
				759	if ((debug_mode > 1) && (train_words % 100000 == 0)) {
				760	printf("%lldK%c", train_words / 1000, 13);
				761	fflush(stdout);
				762	}
				763	i = SearchVocab(word);
				764	if (i == -1) {
				765	a = AddWordToVocab(word);
				766	vocab[a].cn = 1;
				767	} else vocab[i].cn++;
				768	if (vocab_size > vocab_hash_size * 0.7) ReduceVocab();
				769	}
				770	SortVocab();
				771	if (debug_mode > 0) {
				772	printf("Vocab size: %lld\n", vocab_size);
				773	printf("Words in train file: %lld\n", train_words);
				774	}
				775	file_size = ftell(fin);
				776	fclose(fin);
				777	}
				778
				779	void SaveVocab() {
				780	long long i;
				781	FILE *fo = fopen(save_vocab_file, "wb");
				782	for (i = 0; i < vocab_size; i++) fprintf(fo, "%s %lld\n", vocab[i].word, vocab[i].cn);
				783	fclose(fo);
				784	}
				785
				786	void ReadVocab() {
				787	long long a, i = 0;
				788	char c;
				789	char word[MAX_STRING];
				790	FILE *fin = fopen(read_vocab_file, "rb");
				791	if (fin == NULL) {
				792	printf("Vocabulary file not found\n");
				793	exit(1);
				794	}
				795	for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
				796	vocab_size = 0;
				797	while (1) {
				798	ReadWord(word, fin);
				799	if (feof(fin)) break;
				800	a = AddWordToVocab(word);
				801	fscanf(fin, "%lld%c", &vocab[a].cn, &c);
				802	i++;
				803	}
				804	SortVocab();
				805	if (debug_mode > 0) {
				806	printf("Vocab size: %lld\n", vocab_size);
				807	printf("Words in train file: %lld\n", train_words);
				808	}
				809	fin = fopen(train_file, "rb");
				810	if (fin == NULL) {
				811	printf("ERROR: training data file not found!\n");
				812	exit(1);
				813	}
				814	fseek(fin, 0, SEEK_END);
				815	file_size = ftell(fin);
				816	fclose(fin);
				817	}
				818
				819	void InitClassUnigramTable() {
				820	long long a,c;
				821	printf("loading class unigrams \n");
				822	FILE *fin = fopen(negative_classes_file, "rb");
				823	if (fin == NULL) {
				824	printf("ERROR: class file not found!\n");
				825	exit(1);
				826	}
				827	word_to_group = (int )malloc(vocab_size sizeof(int));
				828	for(a = 0; a < vocab_size; a++) word_to_group[a] = -1;
				829	char class[MAX_STRING];
				830	char prev_class[MAX_STRING];
				831	prev_class[0] = 0;
				832	char word[MAX_STRING];
				833	class_number = -1;
				834	while (1) {
				835	if (feof(fin)) break;
				836	ReadWord(class, fin);
				837	ReadWord(word, fin);
				838	int word_index = SearchVocab(word);
				839	if (word_index != -1){
				840	if(strcmp(class, prev_class) != 0){
				841	class_number++;
				842	strcpy(prev_class, class);
				843	}
				844	word_to_group[word_index] = class_number;
				845	}
				846	ReadWord(word, fin);
				847	}
				848	class_number++;
				849	fclose(fin);
				850
				851	group_to_table = (int )malloc(table_size class_number * sizeof(int));
				852	long long train_words_pow = 0;
				853	real d1, power = 0.75;
				854
				855	for(c = 0; c < class_number; c++){
				856	long long offset = c * table_size;
				857	train_words_pow = 0;
				858	for (a = 0; a < vocab_size; a++) if(word_to_group[a] == c) train_words_pow += pow(vocab[a].cn, power);
				859	int i = 0;
				860	while(word_to_group[i]!=c && i < vocab_size) i++;
				861	d1 = pow(vocab[i].cn, power) / (real)train_words_pow;
				862	for (a = 0; a < table_size; a++) {
				863	//printf("index %lld , word %d\n", a, i);
				864	group_to_table[offset + a] = i;
				865	if (a / (real)table_size > d1) {
				866	i++;
				867	while(word_to_group[i]!=c && i < vocab_size) i++;
				868	d1 += pow(vocab[i].cn, power) / (real)train_words_pow;
				869	}
				870	if (i >= vocab_size) while(word_to_group[i]!=c && i >= 0) i--;
				871	}
				872	}
				873	}
				874
				875	void InitNet() {
				876	long long a, b;
				877	unsigned long long next_random = 1;
				878	window_layer_size = layer1_sizewindow2;
				879	a = posix_memalign((void *)&syn0, 128, (long long)vocab_size layer1_size * sizeof(real));
				880	if (syn0 == NULL) {printf("Memory allocation failed\n"); exit(1);}
				881
				882	if (hs) {
				883	a = posix_memalign((void *)&syn1, 128, (long long)vocab_size layer1_size * sizeof(real));
				884	if (syn1 == NULL) {printf("Memory allocation failed\n"); exit(1);}
				885	a = posix_memalign((void *)&syn1_window, 128, (long long)vocab_size window_layer_size * sizeof(real));
				886	if (syn1_window == NULL) {printf("Memory allocation failed\n"); exit(1);}
				887	a = posix_memalign((void *)&syn_hidden_word, 128, (long long)vocab_size window_hidden_size * sizeof(real));
				888	if (syn_hidden_word == NULL) {printf("Memory allocation failed\n"); exit(1);}
				889
				890	for (a = 0; a < vocab_size; a++) for (b = 0; b < layer1_size; b++)
				891	syn1[a * layer1_size + b] = 0;
				892	for (a = 0; a < vocab_size; a++) for (b = 0; b < window_layer_size; b++)
				893	syn1_window[a * window_layer_size + b] = 0;
				894	for (a = 0; a < vocab_size; a++) for (b = 0; b < window_hidden_size; b++)
				895	syn_hidden_word[a * window_hidden_size + b] = 0;
				896	}
				897	if (negative>0) {
				898	a = posix_memalign((void *)&syn1neg, 128, (long long)vocab_size layer1_size * sizeof(real));
				899	if (syn1neg == NULL) {printf("Memory allocation failed\n"); exit(1);}
				900	a = posix_memalign((void *)&syn1neg_window, 128, (long long)vocab_size window_layer_size * sizeof(real));
				901	if (syn1neg_window == NULL) {printf("Memory allocation failed\n"); exit(1);}
				902	a = posix_memalign((void *)&syn_hidden_word_neg, 128, (long long)vocab_size window_hidden_size * sizeof(real));
				903	if (syn_hidden_word_neg == NULL) {printf("Memory allocation failed\n"); exit(1);}
				904
				905	for (a = 0; a < vocab_size; a++) for (b = 0; b < layer1_size; b++)
				906	syn1neg[a * layer1_size + b] = 0;
				907	for (a = 0; a < vocab_size; a++) for (b = 0; b < window_layer_size; b++)
				908	syn1neg_window[a * window_layer_size + b] = 0;
				909	for (a = 0; a < vocab_size; a++) for (b = 0; b < window_hidden_size; b++)
				910	syn_hidden_word_neg[a * window_hidden_size + b] = 0;
				911	}
				912	for (a = 0; a < vocab_size; a++) for (b = 0; b < layer1_size; b++) {
				913	next_random = next_random * (unsigned long long)25214903917 + 11;
				914	syn0[a * layer1_size + b] = (((next_random & 0xFFFF) / (real)65536) - 0.5) / layer1_size;
				915	}
				916
				917	a = posix_memalign((void *)&syn_window_hidden, 128, window_hidden_size window_layer_size * sizeof(real));
				918	if (syn_window_hidden == NULL) {printf("Memory allocation failed\n"); exit(1);}
				919	for (a = 0; a < window_hidden_size * window_layer_size; a++){
				920	next_random = next_random * (unsigned long long)25214903917 + 11;
				921	syn_window_hidden[a] = (((next_random & 0xFFFF) / (real)65536) - 0.5) / (window_hidden_size*window_layer_size);
				922	}
				923
				924	if(rep == 1 \|\| rep == 2){
				925	a = posix_memalign((void *)&c_lookup, 128, (long long)C_MAX_CODE c_proj_size * sizeof(real));
				926	if (c_lookup == NULL) {printf("Memory allocation failed\n"); exit(1);}
				927	for (a = 0; a < C_MAX_CODE * c_proj_size; a++){
				928	next_random = next_random * (unsigned long long)25214903917 + 11;
				929	c_lookup[a] = (((next_random & 0xFFFF) / (real)65536) - 0.5) / (c_proj_size);
				930	}
				931
				932	a = posix_memalign((void *)&f_init_state, 128, c_state_size sizeof(real));
				933	if (f_init_state == NULL) {printf("Memory allocation failed\n"); exit(1);}
				934	a = posix_memalign((void *)&f_init_cell, 128, c_state_size sizeof(real));
				935	if (f_init_cell == NULL) {printf("Memory allocation failed\n"); exit(1);}
				936	a = posix_memalign((void *)&b_init_state, 128, c_state_size sizeof(real));
				937	if (b_init_state == NULL) {printf("Memory allocation failed\n"); exit(1);}
				938	a = posix_memalign((void *)&b_init_cell, 128, c_state_size sizeof(real));
				939	if (b_init_cell == NULL) {printf("Memory allocation failed\n"); exit(1);}
				940
				941	for (a = 0; a < c_state_size; a++){
				942	next_random = next_random * (unsigned long long)25214903917 + 11;
				943	f_init_state[a] = (((next_random & 0xFFFF) / (real)65536) - 0.5) / (c_state_size);
				944	next_random = next_random * (unsigned long long)25214903917 + 11;
				945	f_init_cell[a] = (((next_random & 0xFFFF) / (real)65536) - 0.5) / (c_state_size);
				946	next_random = next_random * (unsigned long long)25214903917 + 11;
				947	b_init_state[a] = (((next_random & 0xFFFF) / (real)65536) - 0.5) / (c_state_size);
				948	next_random = next_random * (unsigned long long)25214903917 + 11;
				949	b_init_cell[a] = (((next_random & 0xFFFF) / (real)65536) - 0.5) / (c_state_size);
				950	}
				951
				952	c_lstm_params_number = /input/ (c_state_size+c_cell_size+c_proj_size+1)*c_state_size +
				953	/forget/ (c_state_size+c_cell_size+c_proj_size+1)*c_state_size +
				954	/cell/ (c_state_size+c_proj_size+1)*c_state_size +
				955	/output/ (c_state_size+c_cell_size+c_proj_size+1)*c_state_size;
				956
				957	c_params_number = ( c_lstm_params_number * 2 + (c_state_size2)layer1_size) ;
				958	a = posix_memalign((void *)&f_b_params, 128, c_params_number sizeof(real));
				959	if (f_b_params == NULL) {printf("Memory allocation failed\n"); exit(1);}
				960
				961	for (a = 0; a < c_params_number; a++){
				962	next_random = next_random * (unsigned long long)25214903917 + 11;
				963	f_b_params[a] = (((next_random & 0xFFFF) / (real)65536) - 0.5) ;
				964	}
				965	}
				966
				967	if(rep == 2){
				968	a = posix_memalign((void *)&syn0_initial, 128, (long long)vocab_size layer1_size * sizeof(real));
				969	if (syn0_initial == NULL) {printf("Memory allocation failed\n"); exit(1);}
				970	a = posix_memalign((void *)&syn0_in_memory, 128, (long long)vocab_size sizeof(real));
				971	if (syn0_in_memory == NULL) {printf("Memory allocation failed\n"); exit(1);}
				972	for(a = 0; a < vocab_size; a++){
				973	syn0_in_memory[a] = -1;
				974	}
				975	}
				976	CreateBinaryTree();
				977	}
				978
				979	void TrainModelThread(void id) {
				980	long long a, b, d, cw, word, last_word, sentence_length = 0, sentence_position = 0;
				981	long long word_count = 0, last_word_count = 0, sen[MAX_SENTENCE_LENGTH + 1];
				982	long long l1, l2, c, target, label, local_iter = iter;
				983	char c_sen[(MAX_SENTENCE_LENGTH + 1) * MAX_STRING];
				984	unsigned long long next_random = (long long)id;
				985	real f, g, acc_g=0;
				986	clock_t now;
				987	int input_len_1 = layer1_size;
				988	if(type == 2 \|\| type == 4){
				989	input_len_1=window_layer_size;
				990	}
				991	real neu1 = (real )calloc(input_len_1, sizeof(real));
				992	real neu1e = (real )calloc(input_len_1, sizeof(real));
				993
				994	int input_len_2 = 0;
				995	if(type == 4){
				996	input_len_2 = window_hidden_size;
				997	}
				998	real neu2 = (real )calloc(input_len_2, sizeof(real));
				999	real neu2e = (real )calloc(input_len_2, sizeof(real));
				1000
				1001	FILE *fi = fopen(train_file, "rb");
				1002	fseek(fi, file_size / (long long)num_threads * (long long)id, SEEK_SET);
				1003
				1004	real f_states = (real )calloc((c_state_size * 7) * (MAX_STRING + 1), sizeof(real));
				1005	real f_states_e = (real )calloc((c_state_size * 7) * (MAX_STRING + 1), sizeof(real));
				1006	real b_states = (real )calloc((c_state_size * 7) * (MAX_STRING + 1), sizeof(real));
				1007	real b_states_e = (real )calloc((c_state_size * 7) * (MAX_STRING + 1), sizeof(real));
				1008	real chars = (real )calloc(c_proj_size * MAX_STRING, sizeof(real));
				1009	real chars_e = (real )calloc(c_proj_size * MAX_STRING, sizeof(real));
				1010	real lstm_params_e = (real )calloc(c_lstm_params_number*2, sizeof(real));
				1011
				1012	//short term memory vars
				1013	real global_divergence = -1;
				1014	int in_mem = 0;
				1015	int skip=0, non_skip=0;
				1016
				1017	while (1) {
				1018	if (word_count - last_word_count > 10000) {
				1019	word_count_actual += word_count - last_word_count;
				1020	last_word_count = word_count;
				1021	if ((debug_mode > 1)) {
				1022	now=clock();
				1023	printf("%cAlpha: %f Progress: %.2f%% Words/thread/sec: %.2fk : error %.4f", 13, alpha,
				1024	word_count_actual / (real)(iter * train_words + 1) * 100,
				1025	word_count_actual / ((real)(now - start + 1) / (real)CLOCKS_PER_SEC * 1000), acc_g);
				1026	if(rep == 2){
				1027	printf(" skiprate %f",skip/(real)(skip+non_skip));
				1028	}
				1029	acc_g=0;
				1030	skip=0;
				1031	non_skip=0;
				1032	fflush(stdout);
				1033	}
				1034	alpha = starting_alpha * (1 - word_count_actual / (real)(iter * train_words + 1));
				1035	if (alpha < starting_alpha * 0.0001) alpha = starting_alpha * 0.0001;
				1036	}
				1037	if (sentence_length == 0) {
				1038	while (1) {
				1039	word = ReadAndStoreWordIndex(fi, &c_sen[sentence_length*MAX_STRING]);
				1040	if (feof(fi)) break;
				1041	if (word == -1) continue;
				1042	word_count++;
				1043	if (word == 0) break;
				1044	// The subsampling randomly discards frequent words while keeping the ranking same
				1045	if (sample > 0) {
				1046	real ran = (sqrt(vocab[word].cn / (sample * train_words)) + 1) * (sample * train_words) / vocab[word].cn;
				1047	next_random = next_random * (unsigned long long)25214903917 + 11;
				1048	if (ran < (next_random & 0xFFFF) / (real)65536) continue;
				1049	}
				1050	sen[sentence_length] = word;
				1051	sentence_length++;
				1052	if (sentence_length >= MAX_SENTENCE_LENGTH) break;
				1053	}
				1054	sentence_position = 0;
				1055	}
				1056	if (feof(fi) \|\| (word_count > train_words / num_threads)) {
				1057	word_count_actual += word_count - last_word_count;
				1058	local_iter--;
				1059	if (local_iter == 0) break;
				1060	word_count = 0;
				1061	last_word_count = 0;
				1062	sentence_length = 0;
				1063	fseek(fi, file_size / (long long)num_threads * (long long)id, SEEK_SET);
				1064	continue;
				1065	}
				1066	word = sen[sentence_position];
				1067	if (word == -1) continue;
				1068	for (c = 0; c < input_len_1; c++) neu1[c] = 0;
				1069	for (c = 0; c < input_len_1; c++) neu1e[c] = 0;
				1070	for (c = 0; c < input_len_2; c++) neu2[c] = 0;
				1071	for (c = 0; c < input_len_2; c++) neu2e[c] = 0;
				1072	next_random = next_random * (unsigned long long)25214903917 + 11;
				1073	b = next_random % window;
				1074	if (type == 0) { //train the cbow architecture
				1075	// in -> hidden
				1076	cw = 0;
				1077	for (a = b; a < window * 2 + 1 - b; a++) if (a != window) {
				1078	c = sentence_position - window + a;
				1079	if (c < 0) continue;
				1080	if (c >= sentence_length) continue;
				1081	last_word = sen[c];
				1082	if (last_word == -1) continue;
				1083	for (c = 0; c < layer1_size; c++) neu1[c] += syn0[c + last_word * layer1_size];
				1084	cw++;
				1085	}
				1086	if (cw) {
				1087	for (c = 0; c < layer1_size; c++) neu1[c] /= cw;
				1088	if (hs) for (d = 0; d < vocab[word].codelen; d++) {
				1089	f = 0;
				1090	l2 = vocab[word].point[d] * layer1_size;
				1091	// Propagate hidden -> output
				1092	for (c = 0; c < layer1_size; c++) f += neu1[c] * syn1[c + l2];
				1093	if (f <= -MAX_EXP) continue;
				1094	else if (f >= MAX_EXP) continue;
				1095	else f = expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1096	// 'g' is the gradient multiplied by the learning rate
				1097	g = (1 - vocab[word].code[d] - f) * alpha;
				1098	// Propagate errors output -> hidden
				1099	for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1[c + l2];
				1100	// Learn weights hidden -> output
				1101	for (c = 0; c < layer1_size; c++) syn1[c + l2] += g * neu1[c];
				1102	}
				1103	// NEGATIVE SAMPLING
				1104	if (negative > 0) for (d = 0; d < negative + 1; d++) {
				1105	if (d == 0) {
				1106	target = word;
				1107	label = 1;
				1108	} else {
				1109	next_random = next_random * (unsigned long long)25214903917 + 11;
				1110	if(word_to_group != NULL && word_to_group[word] != -1){
				1111	target = word;
				1112	while(target == word) {
				1113	target = group_to_table[word_to_group[word]*table_size + (next_random >> 16) % table_size];
				1114	next_random = next_random * (unsigned long long)25214903917 + 11;
				1115	}
				1116	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1117	}
				1118	else{
				1119	target = table[(next_random >> 16) % table_size];
				1120	}
				1121	if (target == 0) target = next_random % (vocab_size - 1) + 1;
				1122	if (target == word) continue;
				1123	label = 0;
				1124	}
				1125	l2 = target * layer1_size;
				1126	f = 0;
				1127	for (c = 0; c < layer1_size; c++) f += neu1[c] * syn1neg[c + l2];
				1128	if (f > MAX_EXP) g = (label - 1) * alpha;
				1129	else if (f < -MAX_EXP) g = (label - 0) * alpha;
				1130	else g = (label - expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))]) * alpha;
				1131	for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1neg[c + l2];
				1132	for (c = 0; c < layer1_size; c++) syn1neg[c + l2] += g * neu1[c];
				1133	}
				1134	// hidden -> in
				1135	for (a = b; a < window * 2 + 1 - b; a++) if (a != window) {
				1136	c = sentence_position - window + a;
				1137	if (c < 0) continue;
				1138	if (c >= sentence_length) continue;
				1139	last_word = sen[c];
				1140	if (last_word == -1) continue;
				1141	for (c = 0; c < layer1_size; c++) syn0[c + last_word * layer1_size] += neu1e[c];
				1142	}
				1143	}
				1144	} else if(type==1) { //train skip-gram
				1145	for (a = b; a < window * 2 + 1 - b; a++) if (a != window) {
				1146	c = sentence_position - window + a;
				1147	if (c < 0) continue;
				1148	if (c >= sentence_length) continue;
				1149	last_word = sen[c];
				1150	if (last_word == -1) continue;
				1151	l1 = last_word * layer1_size;
				1152	for (c = 0; c < layer1_size; c++) neu1e[c] = 0;
				1153	// HIERARCHICAL SOFTMAX
				1154	if (hs) for (d = 0; d < vocab[word].codelen; d++) {
				1155	f = 0;
				1156	l2 = vocab[word].point[d] * layer1_size;
				1157	// Propagate hidden -> output
				1158	for (c = 0; c < layer1_size; c++) f += syn0[c + l1] * syn1[c + l2];
				1159	if (f <= -MAX_EXP) continue;
				1160	else if (f >= MAX_EXP) continue;
				1161	else f = expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1162	// 'g' is the gradient multiplied by the learning rate
				1163	g = (1 - vocab[word].code[d] - f) * alpha;
				1164	// Propagate errors output -> hidden
				1165	for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1[c + l2];
				1166	// Learn weights hidden -> output
				1167	for (c = 0; c < layer1_size; c++) syn1[c + l2] += g * syn0[c + l1];
				1168	}
				1169	// NEGATIVE SAMPLING
				1170	if (negative > 0) for (d = 0; d < negative + 1; d++) {
				1171	if (d == 0) {
				1172	target = word;
				1173	label = 1;
				1174	} else {
				1175	next_random = next_random * (unsigned long long)25214903917 + 11;
				1176	if(word_to_group != NULL && word_to_group[word] != -1){
				1177	target = word;
				1178	while(target == word) {
				1179	target = group_to_table[word_to_group[word]*table_size + (next_random >> 16) % table_size];
				1180	next_random = next_random * (unsigned long long)25214903917 + 11;
				1181	}
				1182	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1183	}
				1184	else{
				1185	target = table[(next_random >> 16) % table_size];
				1186	}
				1187	if (target == 0) target = next_random % (vocab_size - 1) + 1;
				1188	if (target == word) continue;
				1189	label = 0;
				1190	}
				1191	l2 = target * layer1_size;
				1192	f = 0;
				1193	for (c = 0; c < layer1_size; c++) f += syn0[c + l1] * syn1neg[c + l2];
				1194	if (f > MAX_EXP) g = (label - 1) * alpha;
				1195	else if (f < -MAX_EXP) g = (label - 0) * alpha;
				1196	else g = (label - expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))]) * alpha;
				1197	for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1neg[c + l2];
				1198	for (c = 0; c < layer1_size; c++) syn1neg[c + l2] += g * syn0[c + l1];
				1199	}
				1200	// Learn weights input -> hidden
				1201	for (c = 0; c < layer1_size; c++) syn0[c + l1] += neu1e[c];
				1202	}
				1203	}
				1204	else if(type == 2){ //train the cwindow architecture
				1205	// in -> hidden
				1206	cw = 0;
				1207	for (a = 0; a < window * 2 + 1; a++) if (a != window) {
				1208	c = sentence_position - window + a;
				1209	if (c < 0) continue;
				1210	if (c >= sentence_length) continue;
				1211	last_word = sen[c];
				1212	if (last_word == -1) continue;
				1213	window_offset = a*layer1_size;
				1214	if (a > window) window_offset-=layer1_size;
				1215	for (c = 0; c < layer1_size; c++) neu1[c+window_offset] += syn0[c + last_word * layer1_size];
				1216	cw++;
				1217	}
				1218	if (cw) {
				1219	if (hs) for (d = 0; d < vocab[word].codelen; d++) {
				1220	f = 0;
				1221	l2 = vocab[word].point[d] * window_layer_size;
				1222	// Propagate hidden -> output
				1223	for (c = 0; c < window_layer_size; c++) f += neu1[c] * syn1_window[c + l2];
				1224	if (f <= -MAX_EXP) continue;
				1225	else if (f >= MAX_EXP) continue;
				1226	else f = expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1227	// 'g' is the gradient multiplied by the learning rate
				1228	g = (1 - vocab[word].code[d] - f) * alpha;
				1229	// Propagate errors output -> hidden
				1230	for (c = 0; c < window_layer_size; c++) neu1e[c] += g * syn1_window[c + l2];
				1231	// Learn weights hidden -> output
				1232	for (c = 0; c < window_layer_size; c++) syn1_window[c + l2] += g * neu1[c];
				1233	}
				1234	// NEGATIVE SAMPLING
				1235	if (negative > 0) for (d = 0; d < negative + 1; d++) {
				1236	if (d == 0) {
				1237	target = word;
				1238	label = 1;
				1239	} else {
				1240	next_random = next_random * (unsigned long long)25214903917 + 11;
				1241	if(word_to_group != NULL && word_to_group[word] != -1){
				1242	target = word;
				1243	while(target == word) {
				1244	target = group_to_table[word_to_group[word]*table_size + (next_random >> 16) % table_size];
				1245	next_random = next_random * (unsigned long long)25214903917 + 11;
				1246	}
				1247	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1248	}
				1249	else{
				1250	target = table[(next_random >> 16) % table_size];
				1251	}
				1252	if (target == 0) target = next_random % (vocab_size - 1) + 1;
				1253	if (target == word) continue;
				1254	label = 0;
				1255	}
				1256	l2 = target * window_layer_size;
				1257	f = 0;
				1258	for (c = 0; c < window_layer_size; c++) f += neu1[c] * syn1neg_window[c + l2];
				1259	if (f > MAX_EXP) g = (label - 1) * alpha;
				1260	else if (f < -MAX_EXP) g = (label - 0) * alpha;
				1261	else g = (label - expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))]) * alpha;
				1262	acc_g+=g;
				1263	for (c = 0; c < window_layer_size; c++) neu1e[c] += g * syn1neg_window[c + l2];
				1264	for (c = 0; c < window_layer_size; c++) syn1neg_window[c + l2] += g * neu1[c];
				1265	}
				1266	// hidden -> in
				1267	for (a = 0; a < window * 2 + 1; a++) if (a != window) {
				1268	c = sentence_position - window + a;
				1269	if (c < 0) continue;
				1270	if (c >= sentence_length) continue;
				1271	last_word = sen[c];
				1272	if (last_word == -1) continue;
				1273	window_offset = a * layer1_size;
				1274	if(a > window) window_offset -= layer1_size;
				1275	for (c = 0; c < layer1_size; c++) syn0[c + last_word * layer1_size] += neu1e[c + window_offset];
				1276	}
				1277	}
				1278	}
				1279	else if (type == 3){ //train structured skip-gram
				1280	char* c_word = &c_sen[sentence_position*MAX_STRING];
				1281	if(rep == 1){
				1282	lstmForward(c_word, strlen(c_word),neu1, f_states, b_states, chars);
				1283	}
				1284	else if(rep == 2){
				1285	l1 = word * layer1_size;
				1286	if(syn0_in_memory[word]==-1){
				1287	syn0_in_memory[word]=0;
				1288	lstmForward(c_word, strlen(c_word),&syn0_initial[l1], f_states, b_states, chars);
				1289	for (c = 0; c < layer1_size; c++) {syn0[c + l1] = syn0_initial[c + l1];neu1[c] += syn0[c + l1];}
				1290	in_mem = 1;
				1291	}
				1292	else{
				1293	for (c = 0; c < layer1_size; c++) neu1[c] += syn0[c + l1];
				1294	in_mem = 0;
				1295	}
				1296	}
				1297	else{
				1298	l1 = word * layer1_size;
				1299	for (c = 0; c < layer1_size; c++) neu1[c] += syn0[c + l1];
				1300	}
				1301
				1302	for (a = 0; a < window * 2 + 1; a++) if (a != window) {
				1303	c = sentence_position - window + a;
				1304	if (c < 0) continue;
				1305	if (c >= sentence_length) continue;
				1306	last_word = sen[c];
				1307	if (last_word == -1) continue;
				1308
				1309
				1310	window_offset = a * layer1_size;
				1311	if(a > window) window_offset -= layer1_size;
				1312	for (c = 0; c < layer1_size; c++) neu1e[c] = 0;
				1313	// HIERARCHICAL SOFTMAX
				1314	if (hs) for (d = 0; d < vocab[last_word].codelen; d++) {
				1315	f = 0;
				1316	l2 = vocab[last_word].point[d] * window_layer_size;
				1317	// Propagate hidden -> output
				1318	for (c = 0; c < layer1_size; c++) f += neu1[c] * syn1_window[c + l2 + window_offset];
				1319	if (f <= -MAX_EXP) continue;
				1320	else if (f >= MAX_EXP) continue;
				1321	else f = expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1322	// 'g' is the gradient multiplied by the learning rate
				1323	g = (1 - vocab[last_word].code[d] - f) * alpha;
				1324	// Propagate errors output -> hidden
				1325	for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1_window[c + l2 + window_offset];
				1326	// Learn weights hidden -> output
				1327	for (c = 0; c < layer1_size; c++) syn1[c + l2 + window_offset] += g * neu1[c];
				1328	}
				1329	// NEGATIVE SAMPLING
				1330	if (negative > 0) for (d = 0; d < negative + 1; d++) {
				1331	if (d == 0) {
				1332	target = last_word;
				1333	label = 1;
				1334	} else {
				1335	next_random = next_random * (unsigned long long)25214903917 + 11;
				1336	if(word_to_group != NULL && word_to_group[last_word] != -1){
				1337	target = last_word;
				1338	while(target == last_word) {
				1339	target = group_to_table[word_to_group[last_word]*table_size + (next_random >> 16) % table_size];
				1340	next_random = next_random * (unsigned long long)25214903917 + 11;
				1341	}
				1342	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1343	}
				1344	else{
				1345	target = table[(next_random >> 16) % table_size];
				1346	}
				1347	if (target == 0) target = next_random % (vocab_size - 1) + 1;
				1348	if (target == last_word) continue;
				1349	label = 0;
				1350	}
				1351	l2 = target * window_layer_size;
				1352	f = 0;
				1353	for (c = 0; c < layer1_size; c++) f += neu1[c] * syn1neg_window[c + l2 + window_offset];
				1354	if (f > MAX_EXP) g = (label - 1) * alpha;
				1355	else if (f < -MAX_EXP) g = (label - 0) * alpha;
				1356	else g = (label - expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))]) * alpha;
				1357	acc_g+=g;
				1358
				1359	for (c = 0; c < layer1_size; c++) neu1e[c] += g * syn1neg_window[c + l2 + window_offset];
				1360	for (c = 0; c < layer1_size; c++) syn1neg_window[c + l2 + window_offset] += g * neu1[c];
				1361
				1362	}
				1363
				1364	}
				1365	// Learn weights input -> hidden
				1366
				1367	if(rep == 1){
				1368	lstmBackward(c_word, strlen(c_word),neu1, f_states, b_states, chars, neu1e,f_states_e, b_states_e, chars_e, lstm_params_e);
				1369	}
				1370	else if(rep == 2){
				1371	g = 0;
				1372	l1 = word * layer1_size;
				1373	for (c = 0; c < layer1_size; c++) {
				1374	syn0[c + l1] += neu1e[c];
				1375	f = syn0[c + l1] - syn0_initial[c + l1];
				1376	if(f > 0){
				1377	g+=f;
				1378	}
				1379	else{
				1380	g-=f;
				1381	}
				1382	}
				1383	syn0_in_memory[word] = g;
				1384	if(global_divergence == -1){global_divergence = g;}
				1385	long skip_prob = vocab[word].cn-(log(vocab[word].cn)+1);
				1386	next_random = next_random * (unsigned long long)25214903917 + 11;
				1387
				1388	if(skip_prob < next_random%vocab[word].cn){
				1389	non_skip++;
				1390	if(in_mem == 0){
				1391	lstmFitting(c_word, strlen(c_word),neu1, f_states, b_states, chars,&syn0[c +l1], neu1e,f_states_e, b_states_e, chars_e, lstm_params_e);
				1392	}
				1393	else{
				1394	lstmBackward(c_word, strlen(c_word),neu1, f_states, b_states, chars, neu1e,f_states_e, b_states_e, chars_e, lstm_params_e);
				1395	}
				1396	syn0_in_memory[word]=-1;
				1397	}
				1398	else{
				1399	skip++;
				1400	}
				1401	global_divergence = global_divergence0.9 + g0.1;
				1402	}
				1403	else{
				1404	l1 = word * layer1_size;
				1405	for (c = 0; c < layer1_size; c++) syn0[c + l1] += neu1e[c];
				1406	}
				1407	}
				1408	else if(type == 4){ //training senna
				1409	// in -> hidden
				1410	cw = 0;
				1411	for (a = 0; a < window * 2 + 1; a++) if (a != window) {
				1412	c = sentence_position - window + a;
				1413	if (c < 0) continue;
				1414	if (c >= sentence_length) continue;
				1415	last_word = sen[c];
				1416	if (last_word == -1) continue;
				1417	window_offset = a*layer1_size;
				1418	if (a > window) window_offset-=layer1_size;
				1419	for (c = 0; c < layer1_size; c++) neu1[c+window_offset] += syn0[c + last_word * layer1_size];
				1420	cw++;
				1421	}
				1422	if (cw) {
				1423	for (a = 0; a < window_hidden_size; a++){
				1424	c = a*window_layer_size;
				1425	for(b = 0; b < window_layer_size; b++){
				1426	neu2[a] += syn_window_hidden[c + b] * neu1[b];
				1427	}
				1428	}
				1429	if (hs) for (d = 0; d < vocab[word].codelen; d++) {
				1430	f = 0;
				1431	l2 = vocab[word].point[d] * window_hidden_size;
				1432	// Propagate hidden -> output
				1433	for (c = 0; c < window_hidden_size; c++) f += hardTanh(neu2[c]) * syn_hidden_word[c + l2];
				1434	if (f <= -MAX_EXP) continue;
				1435	else if (f >= MAX_EXP) continue;
				1436	else f = expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
				1437	// 'g' is the gradient multiplied by the learning rate
				1438	g = (1 - vocab[word].code[d] - f) * alpha;
				1439	// Propagate errors output -> hidden
				1440	for (c = 0; c < window_hidden_size; c++) neu2e[c] += dHardTanh(neu2[c],g) * g * syn_hidden_word[c + l2];
				1441	// Learn weights hidden -> output
				1442	for (c = 0; c < window_hidden_size; c++) syn_hidden_word[c + l2] += dHardTanh(neu2[c],g) * g * neu2[c];
				1443	}
				1444	// NEGATIVE SAMPLING
				1445	if (negative > 0) for (d = 0; d < negative + 1; d++) {
				1446	if (d == 0) {
				1447	target = word;
				1448	label = 1;
				1449	} else {
				1450	next_random = next_random * (unsigned long long)25214903917 + 11;
				1451	if(word_to_group != NULL && word_to_group[word] != -1){
				1452	target = word;
				1453	while(target == word) {
				1454	target = group_to_table[word_to_group[word]*table_size + (next_random >> 16) % table_size];
				1455	next_random = next_random * (unsigned long long)25214903917 + 11;
				1456	}
				1457	//printf("negative sampling %lld for word %s returned %s\n", d, vocab[word].word, vocab[target].word);
				1458	}
				1459	else{
				1460	target = table[(next_random >> 16) % table_size];
				1461	}
				1462	if (target == 0) target = next_random % (vocab_size - 1) + 1;
				1463	if (target == word) continue;
				1464	label = 0;
				1465	}
				1466	l2 = target * window_hidden_size;
				1467	f = 0;
				1468	for (c = 0; c < window_hidden_size; c++) f += hardTanh(neu2[c]) * syn_hidden_word_neg[c + l2];
				1469	if (f > MAX_EXP) g = (label - 1) * alpha / negative;
				1470	else if (f < -MAX_EXP) g = (label - 0) * alpha / negative;
				1471	else g = (label - expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))]) * alpha / negative;
				1472	for (c = 0; c < window_hidden_size; c++) neu2e[c] += dHardTanh(neu2[c],g) * g * syn_hidden_word_neg[c + l2];
				1473	for (c = 0; c < window_hidden_size; c++) syn_hidden_word_neg[c + l2] += dHardTanh(neu2[c],g) * g * neu2[c];
				1474	}
				1475	for (a = 0; a < window_hidden_size; a++)
				1476	for(b = 0; b < window_layer_size; b++)
				1477	neu1e[b] += neu2e[a] * syn_window_hidden[a*window_layer_size + b];
				1478	for (a = 0; a < window_hidden_size; a++)
				1479	for(b = 0; b < window_layer_size; b++)
				1480	syn_window_hidden[awindow_layer_size + b] += neu2e[a] neu1[b];
				1481	// hidden -> in
				1482	for (a = 0; a < window * 2 + 1; a++) if (a != window) {
				1483	c = sentence_position - window + a;
				1484	if (c < 0) continue;
				1485	if (c >= sentence_length) continue;
				1486	last_word = sen[c];
				1487	if (last_word == -1) continue;
				1488	window_offset = a * layer1_size;
				1489	if(a > window) window_offset -= layer1_size;
				1490	for (c = 0; c < layer1_size; c++) syn0[c + last_word * layer1_size] += neu1e[c + window_offset];
				1491	}
				1492	}
				1493	}
				1494	else{
				1495	printf("unknown type %i", type);
				1496	exit(0);
				1497	}
				1498	sentence_position++;
				1499	if (sentence_position >= sentence_length) {
				1500	sentence_length = 0;
				1501	continue;
				1502	}
				1503	}
				1504	fclose(fi);
				1505	free(neu1);
				1506	free(neu1e);
				1507	pthread_exit(NULL);
				1508	}
				1509
				1510	void TrainModel() {
				1511	long a, b, c, d;
				1512	FILE *fo;
				1513	pthread_t pt = (pthread_t )malloc(num_threads * sizeof(pthread_t));
				1514	printf("Starting training using file %s\n", train_file);
				1515	starting_alpha = alpha;
				1516	if (read_vocab_file[0] != 0) ReadVocab(); else LearnVocabFromTrainFile();
				1517	if (save_vocab_file[0] != 0) SaveVocab();
				1518	if (output_file[0] == 0) return;
				1519	InitNet();
				1520	if (negative > 0) InitUnigramTable();
				1521	if (negative_classes_file[0] != 0) InitClassUnigramTable();
				1522	start = clock();
				1523	for (a = 0; a < num_threads; a++) pthread_create(&pt[a], NULL, TrainModelThread, (void *)a);
				1524	for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL);
				1525	fo = fopen(output_file, "wb");
				1526	if (classes == 0) {
				1527	// Save the word vectors
				1528	real f_states = (real )calloc((c_state_size * 7) * (MAX_STRING + 1), sizeof(real));
				1529	real b_states = (real )calloc((c_state_size * 7) * (MAX_STRING + 1), sizeof(real));
				1530	real chars = (real )calloc(c_proj_size * MAX_STRING, sizeof(real));
				1531	real neu1 = (real )calloc(layer1_size * MAX_STRING, sizeof(real));
				1532
				1533	fprintf(fo, "%lld %lld\n", vocab_size, layer1_size);
				1534	for (a = 0; a < vocab_size; a++) {
				1535	fprintf(fo, "%s ", vocab[a].word);
				1536	if(rep == 1 \|\| rep == 2){
				1537	for (b = 0; b < layer1_size; b++) {neu1[b]=0;}
				1538	lstmForward(vocab[a].word, strlen(vocab[a].word),neu1, f_states,b_states,chars);
				1539	if (binary) for (b = 0; b < layer1_size; b++) fwrite(&neu1[b], sizeof(real), 1, fo);
				1540	else for (b = 0; b < layer1_size; b++) fprintf(fo, "%lf ", neu1[b]);
				1541	}
				1542	else{
				1543	if (binary) for (b = 0; b < layer1_size; b++) fwrite(&syn0[a * layer1_size + b], sizeof(real), 1, fo);
				1544	else for (b = 0; b < layer1_size; b++) fprintf(fo, "%lf ", syn0[a * layer1_size + b]);
				1545	}
				1546	fprintf(fo, "\n");
				1547	}
				1548	} else {
				1549	// Run K-means on the word vectors
				1550	int clcn = classes, iter = 10, closeid;
				1551	int centcn = (int )malloc(classes * sizeof(int));
				1552	int cl = (int )calloc(vocab_size, sizeof(int));
				1553	real closev, x;
				1554	real cent = (real )calloc(classes * layer1_size, sizeof(real));
				1555	for (a = 0; a < vocab_size; a++) cl[a] = a % clcn;
				1556	for (a = 0; a < iter; a++) {
				1557	for (b = 0; b < clcn * layer1_size; b++) cent[b] = 0;
				1558	for (b = 0; b < clcn; b++) centcn[b] = 1;
				1559	for (c = 0; c < vocab_size; c++) {
				1560	for (d = 0; d < layer1_size; d++) cent[layer1_size * cl[c] + d] += syn0[c * layer1_size + d];
				1561	centcn[cl[c]]++;
				1562	}
				1563	for (b = 0; b < clcn; b++) {
				1564	closev = 0;
				1565	for (c = 0; c < layer1_size; c++) {
				1566	cent[layer1_size * b + c] /= centcn[b];
				1567	closev += cent[layer1_size * b + c] * cent[layer1_size * b + c];
				1568	}
				1569	closev = sqrt(closev);
				1570	for (c = 0; c < layer1_size; c++) cent[layer1_size * b + c] /= closev;
				1571	}
				1572	for (c = 0; c < vocab_size; c++) {
				1573	closev = -10;
				1574	closeid = 0;
				1575	for (d = 0; d < clcn; d++) {
				1576	x = 0;
				1577	for (b = 0; b < layer1_size; b++) x += cent[layer1_size * d + b] * syn0[c * layer1_size + b];
				1578	if (x > closev) {
				1579	closev = x;
				1580	closeid = d;
				1581	}
				1582	}
				1583	cl[c] = closeid;
				1584	}
				1585	}
				1586	// Save the K-means classes
				1587	for (a = 0; a < vocab_size; a++) fprintf(fo, "%s %d\n", vocab[a].word, cl[a]);
				1588	free(centcn);
				1589	free(cent);
				1590	free(cl);
				1591	}
				1592	fclose(fo);
				1593	}
				1594
				1595	int ArgPos(char str, int argc, char *argv) {
				1596	int a;
				1597	for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
				1598	if (a == argc - 1) {
				1599	printf("Argument missing for %s\n", str);
				1600	exit(1);
				1601	}
				1602	return a;
				1603	}
				1604	return -1;
				1605	}
				1606
				1607	int main(int argc, char **argv) {
				1608	int i;
				1609	if (argc == 1) {
				1610	printf("WORD VECTOR estimation toolkit v 0.1c\n\n");
				1611	printf("Options:\n");
				1612	printf("Parameters for training:\n");
				1613	printf("\t-train <file>\n");
				1614	printf("\t\tUse text data from <file> to train the model\n");
				1615	printf("\t-output <file>\n");
				1616	printf("\t\tUse <file> to save the resulting word vectors / word clusters\n");
				1617	printf("\t-size <int>\n");
				1618	printf("\t\tSet size of word vectors; default is 100\n");
				1619	printf("\t-window <int>\n");
				1620	printf("\t\tSet max skip length between words; default is 5\n");
				1621	printf("\t-sample <float>\n");
				1622	printf("\t\tSet threshold for occurrence of words. Those that appear with higher frequency in the training data\n");
				1623	printf("\t\twill be randomly down-sampled; default is 1e-3, useful range is (0, 1e-5)\n");
				1624	printf("\t-hs <int>\n");
				1625	printf("\t\tUse Hierarchical Softmax; default is 0 (not used)\n");
				1626	printf("\t-negative <int>\n");
				1627	printf("\t-negative-classes <file>\n");
				1628	printf("\t\tNumber of negative examples; default is 5, common values are 3 - 10 (0 = not used)\n");
				1629	printf("\t-threads <int>\n");
				1630	printf("\t\tUse <int> threads (default 12)\n");
				1631	printf("\t-iter <int>\n");
				1632	printf("\t\tRun more training iterations (default 5)\n");
				1633	printf("\t-min-count <int>\n");
				1634	printf("\t\tThis will discard words that appear less than <int> times; default is 5\n");
				1635	printf("\t-alpha <float>\n");
				1636	printf("\t\tSet the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW\n");
				1637	printf("\t-classes <int>\n");
				1638	printf("\t\tOutput word classes rather than word vectors; default number of classes is 0 (vectors are written)\n");
				1639	printf("\t-debug <int>\n");
				1640	printf("\t\tSet the debug mode (default = 2 = more info during training)\n");
				1641	printf("\t-binary <int>\n");
				1642	printf("\t\tSave the resulting vectors in binary moded; default is 0 (off)\n");
				1643	printf("\t-save-vocab <file>\n");
				1644	printf("\t\tThe vocabulary will be saved to <file>\n");
				1645	printf("\t-read-vocab <file>\n");
				1646	printf("\t\tThe vocabulary will be read from <file>, not constructed from the training data\n");
				1647	printf("\t-type <int>\n");
				1648	printf("\t\tType of embeddings (0 for cbow, 1 for skipngram, 2 for cwindow, 3 for structured skipngram, 4 for senna type)\n");
				1649	printf("\t-rep <int>\n");
				1650	printf("\t\tType of word rep (0 for word, 1 for character, 2 for character with short term memory\n");
				1651	printf("\t-char-state-dim <int>\n");
				1652	printf("\t\tcharacter state size\n");
				1653	printf("\t-char-proj-dim <int>\n");
				1654	printf("\t\tcharacter projection size\n");
				1655	printf("\nExamples:\n");
				1656	printf("./word2vec -train data.txt -output vec.txt -size 200 -window 5 -sample 1e-4 -negative 5 -hs 0 -binary 0 -type 1 -iter 3\n\n");
				1657	return 0;
				1658	}
				1659	output_file[0] = 0;
				1660	save_vocab_file[0] = 0;
				1661	read_vocab_file[0] = 0;
				1662	negative_classes_file[0] = 0;
				1663	if ((i = ArgPos((char *)"-size", argc, argv)) > 0) layer1_size = atoi(argv[i + 1]);
				1664	if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]);
				1665	if ((i = ArgPos((char *)"-save-vocab", argc, argv)) > 0) strcpy(save_vocab_file, argv[i + 1]);
				1666	if ((i = ArgPos((char *)"-read-vocab", argc, argv)) > 0) strcpy(read_vocab_file, argv[i + 1]);
				1667	if ((i = ArgPos((char *)"-debug", argc, argv)) > 0) debug_mode = atoi(argv[i + 1]);
				1668	if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]);
				1669	if ((i = ArgPos((char *)"-type", argc, argv)) > 0) type = atoi(argv[i + 1]);
				1670	if (type==0 \|\| type==2 \|\| type==4) alpha = 0.05;
				1671	if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]);
				1672	if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]);
				1673	if ((i = ArgPos((char *)"-window", argc, argv)) > 0) window = atoi(argv[i + 1]);
				1674	if ((i = ArgPos((char *)"-sample", argc, argv)) > 0) sample = atof(argv[i + 1]);
				1675	if ((i = ArgPos((char *)"-hs", argc, argv)) > 0) hs = atoi(argv[i + 1]);
				1676	if ((i = ArgPos((char *)"-negative", argc, argv)) > 0) negative = atoi(argv[i + 1]);
				1677	if ((i = ArgPos((char *)"-negative-classes", argc, argv)) > 0) strcpy(negative_classes_file, argv[i + 1]);
				1678	if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]);
				1679	if ((i = ArgPos((char *)"-iter", argc, argv)) > 0) iter = atoi(argv[i + 1]);
				1680	if ((i = ArgPos((char *)"-min-count", argc, argv)) > 0) min_count = atoi(argv[i + 1]);
				1681	if ((i = ArgPos((char *)"-classes", argc, argv)) > 0) classes = atoi(argv[i + 1]);
				1682	if ((i = ArgPos((char *)"-rep", argc, argv)) > 0) rep = atoi(argv[i + 1]);
				1683	if ((i = ArgPos((char *)"-char-state-dim", argc, argv)) > 0) {c_state_size = atoi(argv[i + 1]); c_cell_size = c_state_size;}
				1684	if ((i = ArgPos((char *)"-char-proj-dim", argc, argv)) > 0) {c_proj_size = atoi(argv[i + 1]);}
				1685	vocab = (struct vocab_word *)calloc(vocab_max_size, sizeof(struct vocab_word));
				1686	vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int));
				1687	expTable = (real )malloc((EXP_TABLE_SIZE + 1) sizeof(real));
				1688	tanhTable = (real )malloc((EXP_TABLE_SIZE + 1) sizeof(real));
				1689	for (i = 0; i < EXP_TABLE_SIZE; i++) {
				1690	expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table
				1691	expTable[i] = expTable[i] / (expTable[i] + 1); // Precompute f(x) = x / (x + 1)
				1692	tanhTable[i] = tanh((i / (real)EXP_TABLE_SIZE * 2 - 1) * MAX_EXP);
				1693	}
				1694	TrainModel();
				1695	return 0;
				1696	}