w2v-server: add option -M to mark (underline) words that appear in a file

commit: 6ed81876a246bbe3396a5c12a7209d3ca4d10ba2 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Wed Apr 27 14:04:04 2016 +0200
committer: Marc Kupietz <kupietz@ids-mannheim.de> Wed Apr 27 14:04:04 2016 +0200
tree: 1dc09de520d8001eb79943bd353a592dc2dbaedf
parent: a2e645094b58d75beb90ca7bba269570ea4f3583 [diff] [blame]
diff --git a/w2v-server.pl b/w2v-server.pl
index a255b2f..edd04a4 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl

@@ -13,13 +13,25 @@
 our $opt_l = undef;
 our $opt_p = 5676;
 our $opt_m;
+our $opt_M;
 our $opt_n = '';
 our $opt_d;
 
+my %marked;
 my $training_args="";
 my $mergedEnd=0;
 
-getopt('d:il:p:m:'); 
+getopt('d:il:p:m:M:'); 
+
+if($opt_M) {
+  open(FILE, "<$opt_M") or die "cannot open $opt_M";
+  while(<FILE>) {
+    foreach my $mw (split /\s+/) {
+      $marked{$mw}=1
+    }
+  }
+  close(FILE);
+}
 
 # -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 40 -binary 1 -iter 15
 if(!$ARGV[0]) {
@@ -36,6 +48,7 @@
   $mergedEnd = mergeVectors($opt_m);
 }
 
+
 if($opt_d) { # -d: dump  vecs and exit
 	dump_vecs($opt_d);
 	exit;
@@ -73,7 +86,7 @@
 		}
 	}
 	$word =~ s/ *\| */ | /g;
-	$c->render(template=>"index", word=>$word, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, searchBaseVocabFirst=>$searchBaseVocabFirst, sort=>$sort, training_args=>$training_args, mergedEnd=> $mergedEnd, lists=> \@lists, collocators=> $res->{syntagmatic});
+	$c->render(template=>"index", word=>$word, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, searchBaseVocabFirst=>$searchBaseVocabFirst, sort=>$sort, training_args=>$training_args, mergedEnd=> $mergedEnd, marked=>\%marked, lists=> \@lists, collocators=> $res->{syntagmatic});
 };
 
 $daemon->run; # app->start;
@@ -755,9 +768,12 @@
   fill: green;
 }
 
+#first a {
+  text-decoration: none;
+}
+
 a.marked {
-  color: orange;
-  fill: orange;
+  text-decoration: underline;
 }
 
 a.target {
@@ -874,12 +890,16 @@
   g.append("a")
 	 .attr("xlink:href", function(word) {return "/?word="+word;})
    .attr("class", function(d, i) {
+     var res="";
+     if(data.marked[i]) {
+			  res="marked ";
+      }
 			if(data.target.indexOf(" "+d+" ") >= 0) {
-				return "target";
-			} else if(data.mergedEnd > 0 && data.ranks[i] < data.mergedEnd) {
-			  return "merged";
+				return res+"target";
+			} else if(data.ranks[i] < data.mergedEnd) {
+			  return res+"merged";
 			} else {
-				return ""
+				return res;
 			}
 	 })
 	 .attr("title", function(d, i) {
@@ -1033,7 +1053,7 @@
 				<th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title="&#34;Responsivenes&#34; of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="right">Σp(c<sub><small>@</small></sub>)/|w|</th><th align="left">syntagmatic</th>
 				% }
 			</tr>
-			% my $j=0; my @words; my @vecs; my @ranks; for my $list (@$lists) {
+			% my $j=0; my @words; my @vecs; my @ranks; my @marked; for my $list (@$lists) {
 			% my $i=0; while($list) {
       % my $item = (@$list)[$i];
       % my $c = ($collocators? (@$collocators)[$i] : 0);
@@ -1047,15 +1067,16 @@
         %   push @vecs, $item->{vector};
 			  %   push @words, $item->{word};
 			  %   push @ranks, $item->{rank};
+			  %   push @marked, ($marked->{$item->{word}}? 1 : 0);
         % }
 				<td align="right">
   				<%= sprintf("%.3f", $item->{dist}) %>
 				</td>
 				<td>
-          % my $class = ""; 
+          % my $class = ($marked->{$item->{word}}? "marked " : ""); 
           % my $r = $item->{rank}; 
           % if($r < $mergedEnd) {
-          %   $class="merged";
+          %   $class .= "merged";
           %   $r .= " (merged vocab)";
           % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
           %   $r -= $mergedEnd;
@@ -1092,7 +1113,7 @@
 		<script>
 		 % use Mojo::ByteStream 'b';
 		 $(window).load(function() {
-			 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks})); %>);
+			 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked})); %>);
 		 });
     </script>
 		% }
commit	6ed81876a246bbe3396a5c12a7209d3ca4d10ba2	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Wed Apr 27 14:04:04 2016 +0200
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Wed Apr 27 14:04:04 2016 +0200
tree	1dc09de520d8001eb79943bd353a592dc2dbaedf
parent	a2e645094b58d75beb90ca7bba269570ea4f3583 [diff] [blame]