w2v-server: show window/column normalized collocators
diff --git a/w2v-server.pl b/w2v-server.pl
index 0487d71..38b8cb8 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -189,9 +189,10 @@
long long wordi;
long position;
float activation;
- float activation_sum;
- float probability_sum;
float probability;
+ float activation_sum;
+ float conorm;
+ float max_activation;
} collocator;
typedef struct {
@@ -794,6 +795,9 @@
memcpy(best + b, &syn_nbs[0]->best[b], sizeof(collocator));
best[b].position = -1; // syn_nbs[0]->pos[b];
best[b].activation_sum = target_sums[syn_nbs[0]->best[b].wordi];
+ best[b].max_activation = 0.0;
+ best[b].conorm = 0.0;
+ best[b].probability = 0.0;
}
float best_window_sum[MAX_NEIGHBOURS];
@@ -804,6 +808,9 @@
if(best[i].wordi == syn_nbs[a]->best[b].wordi)
break;
if(i >= found_index) {
+ best[found_index].max_activation = 0.0;
+ best[found_index].conorm = 0.0;
+ best[found_index].probability = 0.0;
best[found_index++].wordi = syn_nbs[a]->best[b].wordi;
// printf("found: %s\n", &vocab[syn_nbs[a]->index[b] * max_w]);
}
@@ -813,9 +820,9 @@
printf("window: %d - syn_threads: %d, %d\n", window, syn_threads, (1 << syn_threads) -1);
int wpos;
for(i=0; i < found_index; i++) {
- best[i].activation = 0; best[i].probability = 0;
+ best[i].activation = best[i].probability = best[i].conorm = 0;
for(w=1; w < (1 << syn_threads); w++) { // loop through all possible windows
- float word_window_sum = 0, word_activation_sum = 0, total_window_sum = 0;
+ float word_window_sum = 0, word_window_conorm=0, word_activation_sum = 0, total_window_sum = 0;
int bits_set = 0;
for(a=0; a < syn_threads; a++) {
if((1 << a) & w) {
@@ -831,10 +838,13 @@
for(b=0; b < syn_nbs[a]->length; b++)
if(best[i].wordi == syn_nbs[a]->best[b].wordi) {
// word_window_sum += syn_nbs[a]->dist[b] * syn_nbs[a]->norm[b]; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
-// word_window_sum += syn_nbs[a]->dist[b]; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
// word_window_sum += syn_nbs[a]->norm[b]; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
// word_window_sum = (word_window_sum + syn_nbs[a]->norm[b]) - (word_window_sum * syn_nbs[a]->norm[b]); // syn_nbs[a]->norm[b];
- word_window_sum += syn_nbs[a]->best[b].activation - word_window_sum * syn_nbs[a]->best[b].activation; // conormalied activation sum
+ word_window_sum += syn_nbs[a]->best[b].activation; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
+ word_window_conorm += syn_nbs[a]->best[b].activation - word_window_sum * syn_nbs[a]->best[b].activation; // conormalied activation sum
+ word_activation_sum += syn_nbs[a]->best[b].activation;
+ if(syn_nbs[a]->best[b].activation > best[i].max_activation)
+ best[i].max_activation = syn_nbs[a]->best[b].activation;
word_activation_sum += syn_nbs[a]->best[b].activation;
}
}
@@ -843,13 +853,17 @@
// word_activation_sum /= bits_set;
// word_window_sum /= bits_set;
// }
-// word_window_sum /= total_window_sum;
+ word_window_sum /= total_window_sum;
if(word_window_sum > best[i].probability) {
best[i].probability = word_window_sum;
- best[i].activation = word_activation_sum;
best[i].position = w;
}
+
+ if(word_window_conorm > best[i].conorm) {
+ best[i].conorm = word_window_conorm;
+ best[i].activation = word_activation_sum;
+ }
}
}
qsort(best, found_index, sizeof(collocator), cmp_probability);
@@ -927,9 +941,9 @@
if(latin_enc == 0) SvUTF8_on(word);
hv_store(hash, "word", strlen("word"), word , 0);
hv_store(hash, "rank", strlen("rank"), newSVuv(best[a].wordi), 0);
- hv_store(hash, "dist", strlen("dist"), newSVnv(best[a].activation), 0);
- hv_store(hash, "norm", strlen("norm"), newSVnv(best[a].probability), 0);
- hv_store(hash, "sum", strlen("sum"), newSVnv(target_sums[best[a].wordi]), 0);
+ hv_store(hash, "conorm", strlen("conorm"), newSVnv(best[a].conorm), 0);
+ hv_store(hash, "prob", strlen("prob"), newSVnv(best[a].probability), 0);
+ hv_store(hash, "max", strlen("max"), newSVnv(best[a].max_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
hv_store(hash, "pos", strlen("pos"), newSVnv(best[a].position), 0);
av_push(array, newRV_noinc((SV*)hash));
}