w2v-server: add column and overall normalized sums
diff --git a/w2v-server.pl b/w2v-server.pl
index 38b8cb8..9c02dc4 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -189,6 +189,7 @@
long long wordi;
long position;
float activation;
+ float cprobability; // column wise probability
float probability;
float activation_sum;
float conorm;
@@ -500,14 +501,15 @@
}
for (b = 0; b < N; b++)
if(best[b].position == window-a)
- best[b].probability = best[b].activation / wpos_sum;
+ best[b].cprobability = best[b].activation / wpos_sum;
} else {
printf("\x1b[1m%s\x1b[0m ", &vocab[d*max_w]);
}
pars->window_sums[a] = wpos_sum;
}
for (b = 0; b < pars->cutoff; b++)
- pars->target_sums[b] += (target_sums[b] / wpos_sum ) / (window * 2);
+ pars->target_sums[b] += target_sums[b]; //(target_sums[b] / wpos_sum ) / (window * 2);
+ printf("Target-Summe von 0: %f\n", pars->target_sums[150298]);
free(target_sums);
for(b=0; b<N && best[b].wordi >= 0; b++);; // THIS LOOP IS NEEDED (b...)
// printf("%d: best syn: %s %.2f %.5f\n", b, &vocab[best[b].wordi*max_w], best[b].activation, best[b].probability);
@@ -783,11 +785,16 @@
best[b].activation_sum = 0;
}
+ float total_activation = 0;
+
if (M2) {
printf("Waiting for syn threads to join\n");
fflush(stdout);
for (a = 0; a < syn_threads; a++) pthread_join(pt[a+para_threads], (void *) &syn_nbs[a]);
- for (a = 0; a <= syn_threads; a++) printf("window pos: %d, sum: %f\n", a, window_sums[a]);
+ for (a = 0; a <= syn_threads; a++) {
+ total_activation += window_sums[a];
+ printf("window pos: %d, sum: %f\n", a, window_sums[a]);
+ }
printf("syn threads joint\n");
fflush(stdout);
@@ -798,6 +805,7 @@
best[b].max_activation = 0.0;
best[b].conorm = 0.0;
best[b].probability = 0.0;
+ best[b].cprobability = syn_nbs[0]->best[b].cprobability;
}
float best_window_sum[MAX_NEIGHBOURS];
@@ -811,11 +819,14 @@
best[found_index].max_activation = 0.0;
best[found_index].conorm = 0.0;
best[found_index].probability = 0.0;
+ best[found_index].cprobability = syn_nbs[a]->best[b].cprobability;
+ best[found_index].activation_sum = target_sums[syn_nbs[a]->best[b].wordi]; // syn_nbs[a]->best[b].activation_sum;
best[found_index++].wordi = syn_nbs[a]->best[b].wordi;
// printf("found: %s\n", &vocab[syn_nbs[a]->index[b] * max_w]);
}
}
}
+ sort_by =0; // ALWAYS AUTO-FOCUS
if(sort_by != 1 && sort_by != 2) { // sort by auto focus mean
printf("window: %d - syn_threads: %d, %d\n", window, syn_threads, (1 << syn_threads) -1);
int wpos;
@@ -837,15 +848,18 @@
bits_set++;
for(b=0; b < syn_nbs[a]->length; b++)
if(best[i].wordi == syn_nbs[a]->best[b].wordi) {
+// float acti = syn_nbs[a]->best[b].activation / total_window_sum;
// word_window_sum += syn_nbs[a]->dist[b] * syn_nbs[a]->norm[b]; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
// word_window_sum += syn_nbs[a]->norm[b]; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
// word_window_sum = (word_window_sum + syn_nbs[a]->norm[b]) - (word_window_sum * syn_nbs[a]->norm[b]); // syn_nbs[a]->norm[b];
+
word_window_sum += syn_nbs[a]->best[b].activation; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
- word_window_conorm += syn_nbs[a]->best[b].activation - word_window_sum * syn_nbs[a]->best[b].activation; // conormalied activation sum
+// word_window_sum += acti - (word_window_sum * acti); syn_nbs[a]->best[b].activation; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
+
+ word_window_conorm += syn_nbs[a]->best[b].activation - word_window_conorm * syn_nbs[a]->best[b].activation; // conormalied activation sum
word_activation_sum += syn_nbs[a]->best[b].activation;
if(syn_nbs[a]->best[b].activation > best[i].max_activation)
best[i].max_activation = syn_nbs[a]->best[b].activation;
- word_activation_sum += syn_nbs[a]->best[b].activation;
}
}
}
@@ -853,7 +867,8 @@
// word_activation_sum /= bits_set;
// word_window_sum /= bits_set;
// }
- word_window_sum /= total_window_sum;
+
+ word_window_sum /= total_window_sum;
if(word_window_sum > best[i].probability) {
best[i].probability = word_window_sum;
@@ -924,17 +939,19 @@
array = newAV();
for (a = 0, i=0; a < MAX_NEIGHBOURS && best[a].wordi >= 0; a++) {
long long c = best[a].wordi;
+/*
if (dedupe) {
int filtered=0;
for (j=0; j<i; j++)
if (strcasestr(&vocab[c * max_w], chosen[j]) ||
strcasestr(chosen[j], &vocab[c * max_w])) {
- printf("filtering %s %s\n", chosen[j], &vocab[c * max_w]);
- filtered = 1;
- }
+ printf("filtering %s %s\n", chosen[j], &vocab[c * max_w]);
+ filtered = 1;
+ }
if(filtered)
continue;
}
+*/
chosen[i++]=&vocab[c * max_w];
HV* hash = newHV();
SV* word = newSVpvf(&vocab[best[a].wordi * max_w], 0);
@@ -943,7 +960,9 @@
hv_store(hash, "rank", strlen("rank"), newSVuv(best[a].wordi), 0);
hv_store(hash, "conorm", strlen("conorm"), newSVnv(best[a].conorm), 0);
hv_store(hash, "prob", strlen("prob"), newSVnv(best[a].probability), 0);
+ hv_store(hash, "cprob", strlen("cprob"), newSVnv(best[a].cprobability), 0);
hv_store(hash, "max", strlen("max"), newSVnv(best[a].max_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
+ hv_store(hash, "overall", strlen("overall"), newSVnv(best[a].activation_sum/total_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
hv_store(hash, "pos", strlen("pos"), newSVnv(best[a].position), 0);
av_push(array, newRV_noinc((SV*)hash));
}