w2v-server: add auto focus as window/sort option
diff --git a/templates/index.html.ep b/templates/index.html.ep
index 26b8eac..1e57de8 100644
--- a/templates/index.html.ep
+++ b/templates/index.html.ep
@@ -26,7 +26,11 @@
font-family: Arial, sans-serif;
font-size: 11pt;
}
-
+
+ .mono {
+ font-family: "DejaVu Sans Mono", Inconsolata, SourceCodePro, Courier;
+ }
+
.ui-tooltip-content {
font-size: 9pt;
color: #222222;
@@ -320,10 +324,11 @@
max. iterations: <input type="text" name="N" size="4" value="<%= $no_iterations %>">
SOM <input type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>>
% if($collocators) {
- <span> </span>sort collocators by
+ <span> </span>window/sort
<select name="sort">
- <option value="0" <%= ($sort!=1? "selected":"") %>>responsiveness</option>
- <option value="1" <%= ($sort==1? "selected":"") %>>mean p</option>
+ <option value="0" <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
+ <option value="1" <%= ($sort==1? "selected":"") %>>any single position</option>
+ <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option>
</select>
% }
<span> </span><input type="submit" value="Show">
@@ -336,7 +341,7 @@
<tr>
<th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th>
% if($collocators) {
- <th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="right">Σp(c<sub><small>@</small></sub>)/|w|</th><th align="left">syntagmatic</th>
+ <th title="The window around the target word that is considered for summation. Absolute value can be too low because of sub-sampling frequent words.">w</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="right">Σp(c<sub><small>@</small></sub>)/|w|</th><th align="left">syntagmatic</th>
% }
</tr>
% my $j=0; my @words; my @vecs; my @ranks; my @marked;
@@ -379,7 +384,7 @@
% }
% if($c) {
<td align="right">
- <%= $c->{pos} %>:
+ <span class="mono"><%= bitvec2window($c->{pos}) %></span>
</td>
<td align="right">
<%= sprintf("%.3f", $c->{dist}) %>
diff --git a/w2v-server.pl b/w2v-server.pl
index c22aa4c..a649820 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -5,6 +5,8 @@
#use Inline C => Config => CLEAN_AFTER_BUILD => 0, ccflags => $Config{ccflags}." -Ofast -march k8 -mtune k8 ";
use Mojolicious::Lite;
use Mojo::JSON qw(decode_json encode_json to_json);
+use base 'Mojolicious::Plugin';
+
use Encode qw(decode encode);
use Getopt::Std;
use Mojo::Server::Daemon;
@@ -137,6 +139,16 @@
}
};
+helper(bitvec2window => sub {
+ my ($self, $n) = @_;
+ my $str = unpack("B32", pack("N", $n));
+ $str =~ s/^\d{22}//;
+ $str =~ s/^(\d{5})/$1x/;
+ $str =~ s/0/·/g;
+ $str =~ s/1/+/g;
+ return $str;
+ });
+
$daemon->run; # app->start;
exit;
@@ -484,9 +496,9 @@
for (b = 0; b < pars->cutoff; b++)
pars->target_sums[b] += (target_sums[b] / wpos_sum ) / (window * 2);
free(target_sums);
- for(b=0; b<N && besti[b] >= 0; b++) // THIS LOOP IS NEEDED (b...)
- printf("%s %.2f %d * ", &vocab[besti[b]*max_w], bestf[b], bestp[b]);
- printf("\n");
+ for(b=0; b<N && besti[b] >= 0; b++); // THIS LOOP IS NEEDED (b...)
+// printf("%s %.2f %d * ", &vocab[besti[b]*max_w], bestf[b], bestp[b]);
+// printf("\n");
nbs = malloc(sizeof(knn));
nbs->index = besti;
nbs->dist = bestf;
@@ -496,6 +508,7 @@
pthread_exit(nbs);
}
+
wordlist *getTargetWords(char *st1, int search_backw) {
wordlist *wl = malloc(sizeof(wordlist));
char st[100][max_size], sep[100];
@@ -760,13 +773,67 @@
besti[b] = syn_nbs[0]->index[b];
bestd[b] = syn_nbs[0]->dist[b];
bestn[b] = syn_nbs[0]->norm[b];
- bestp[b] = syn_nbs[0]->pos[b];
+ bestp[b] = -1; // syn_nbs[0]->pos[b];
bests[b] = target_sums[syn_nbs[0]->index[b]];
}
-
- if(sort_by != 1) { // sort by responsiveness
+
+ float best_window_sum[MAX_NEIGHBOURS];
+ int found_index=0, i=0, j, w;
+ if(sort_by != 1 && sort_by != 2) { // sort by auto focus mean
for(a=1; a < syn_threads; a++) {
for(b=0; b < syn_nbs[a]->length; b++) {
+ for(i=0; i < found_index; i++)
+ if(besti[i] == syn_nbs[a]->index[b])
+ break;
+ if(i >= found_index) {
+ besti[found_index++] = syn_nbs[a]->index[b];
+// printf("found: %s\n", &vocab[syn_nbs[a]->index[b] * max_w]);
+ }
+ }
+ }
+ printf("window: %d - syn_threads: %d, %d\n", window, syn_threads, (1 << syn_threads) -1);
+ for(i=0; i < found_index; i++) {
+ for(w=1; w < (1 << syn_threads); w++) { // loop through all possible windows
+ float word_window_sum = 0;
+ int bits_set = 0;
+ for(a=1; a < syn_threads; a++) {
+ if((1 << a) & w) {
+ bits_set++;
+ for(b=0; b < syn_nbs[a]->length; b++)
+ if(besti[i] == syn_nbs[a]->index[b])
+ word_window_sum += syn_nbs[a]->dist[b];
+ }
+ }
+ if(bits_set)
+ word_window_sum /= bits_set;
+ if(word_window_sum > bestd[i]) {
+ bestd[i] = word_window_sum;
+ bestp[i] = w;
+ }
+ }
+ }
+ for(i=0; i<found_index;i++) {
+ for(j=0;j<found_index-1;j++) {
+ if(bestd[j]<bestd[j+1]) {
+ float tempd=bestd[j];
+ bestd[j]=bestd[j+1];
+ bestd[j+1]=tempd;
+ int tempi=besti[j];
+ besti[j]=besti[j+1];
+ besti[j+1]=tempi;
+ int tempp=bestp[j];
+ bestp[j]=bestp[j+1];
+ bestp[j+1]=tempp;
+ }
+ }
+ }
+// for(i=0; i < found_index; i++) {
+// printf("found: %s - sum: %f - window: %d\n", &vocab[besti[i] * max_w], bestd[i], bestp[i]);
+// }
+
+ } else if(sort_by ==1) { // single window position
+ for(a=1; a < syn_threads; a++) {
+ for(b=0; b < syn_nbs[a]->length; b++) {
for(c=0; c < MAX_NEIGHBOURS; c++) {
if(syn_nbs[a]->dist[b] > bestd[c]) {
for(d=MAX_NEIGHBOURS-1; d>c; d--) {
@@ -778,7 +845,7 @@
besti[c] = syn_nbs[a]->index[b];
bestd[c] = syn_nbs[a]->dist[b];
bestn[c] = syn_nbs[a]->norm[b];
- bestp[c] = syn_nbs[a]->pos[b];
+ bestp[c] = 1 << (-syn_nbs[a]->pos[b]+window - (syn_nbs[a]->pos[b] < 0 ? 1:0));
break;
}
}
@@ -799,7 +866,7 @@
besti[c] = syn_nbs[a]->index[b];
bestd[c] = syn_nbs[a]->dist[b];
bestn[c] = syn_nbs[a]->norm[b];
- bestp[c] = syn_nbs[a]->pos[b];
+ bestp[c] = (1 << 2*window) - 1; // syn_nbs[a]->pos[b];
bests[c] = target_sums[syn_nbs[a]->index[b]];
break;
}