w2v-server: add option -M to mark (underline) words that appear in a file
diff --git a/w2v-server.pl b/w2v-server.pl
index a255b2f..edd04a4 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -13,13 +13,25 @@
our $opt_l = undef;
our $opt_p = 5676;
our $opt_m;
+our $opt_M;
our $opt_n = '';
our $opt_d;
+my %marked;
my $training_args="";
my $mergedEnd=0;
-getopt('d:il:p:m:');
+getopt('d:il:p:m:M:');
+
+if($opt_M) {
+ open(FILE, "<$opt_M") or die "cannot open $opt_M";
+ while(<FILE>) {
+ foreach my $mw (split /\s+/) {
+ $marked{$mw}=1
+ }
+ }
+ close(FILE);
+}
# -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 40 -binary 1 -iter 15
if(!$ARGV[0]) {
@@ -36,6 +48,7 @@
$mergedEnd = mergeVectors($opt_m);
}
+
if($opt_d) { # -d: dump vecs and exit
dump_vecs($opt_d);
exit;
@@ -73,7 +86,7 @@
}
}
$word =~ s/ *\| */ | /g;
- $c->render(template=>"index", word=>$word, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, searchBaseVocabFirst=>$searchBaseVocabFirst, sort=>$sort, training_args=>$training_args, mergedEnd=> $mergedEnd, lists=> \@lists, collocators=> $res->{syntagmatic});
+ $c->render(template=>"index", word=>$word, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, searchBaseVocabFirst=>$searchBaseVocabFirst, sort=>$sort, training_args=>$training_args, mergedEnd=> $mergedEnd, marked=>\%marked, lists=> \@lists, collocators=> $res->{syntagmatic});
};
$daemon->run; # app->start;
@@ -755,9 +768,12 @@
fill: green;
}
+#first a {
+ text-decoration: none;
+}
+
a.marked {
- color: orange;
- fill: orange;
+ text-decoration: underline;
}
a.target {
@@ -874,12 +890,16 @@
g.append("a")
.attr("xlink:href", function(word) {return "/?word="+word;})
.attr("class", function(d, i) {
+ var res="";
+ if(data.marked[i]) {
+ res="marked ";
+ }
if(data.target.indexOf(" "+d+" ") >= 0) {
- return "target";
- } else if(data.mergedEnd > 0 && data.ranks[i] < data.mergedEnd) {
- return "merged";
+ return res+"target";
+ } else if(data.ranks[i] < data.mergedEnd) {
+ return res+"merged";
} else {
- return ""
+ return res;
}
})
.attr("title", function(d, i) {
@@ -1033,7 +1053,7 @@
<th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="right">Σp(c<sub><small>@</small></sub>)/|w|</th><th align="left">syntagmatic</th>
% }
</tr>
- % my $j=0; my @words; my @vecs; my @ranks; for my $list (@$lists) {
+ % my $j=0; my @words; my @vecs; my @ranks; my @marked; for my $list (@$lists) {
% my $i=0; while($list) {
% my $item = (@$list)[$i];
% my $c = ($collocators? (@$collocators)[$i] : 0);
@@ -1047,15 +1067,16 @@
% push @vecs, $item->{vector};
% push @words, $item->{word};
% push @ranks, $item->{rank};
+ % push @marked, ($marked->{$item->{word}}? 1 : 0);
% }
<td align="right">
<%= sprintf("%.3f", $item->{dist}) %>
</td>
<td>
- % my $class = "";
+ % my $class = ($marked->{$item->{word}}? "marked " : "");
% my $r = $item->{rank};
% if($r < $mergedEnd) {
- % $class="merged";
+ % $class .= "merged";
% $r .= " (merged vocab)";
% } elsif($mergedEnd!=0 && $r > $mergedEnd) {
% $r -= $mergedEnd;
@@ -1092,7 +1113,7 @@
<script>
% use Mojo::ByteStream 'b';
$(window).load(function() {
- showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks})); %>);
+ showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked})); %>);
});
</script>
% }