w2v-server.pl: add tsne visualization
diff --git a/w2v-server.pl b/w2v-server.pl
index 897e7b1..f51f9da 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -1,11 +1,12 @@
 #!/usr/local/bin/perl
 use Inline C;
 use Mojolicious::Lite;
+use Mojo::JSON qw(decode_json encode_json to_json);
 use Encode qw(decode encode);
 use Mojo::Server::Daemon;
 
 # -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 40 -binary 1 -iter 15
-init_net("vectors15.bin");
+init_net("vectors14.bin");
 
 get '/' => sub {
   my $c    = shift;
@@ -43,7 +44,7 @@
 char *bestw[MAX_NEIGHBOURS];
 char file_name[max_size], st[100][max_size];
 float dist, len, bestd[MAX_NEIGHBOURS], vec[max_size];
-long long words, size, a, b, c, d, cn, bi[100];
+long long words, size, a, b, c, d, cn, bi[100], besti[MAX_NEIGHBOURS];
 char ch;
 float *M;
 char *vocab;
@@ -145,6 +146,7 @@
 					strcpy(bestw[d], bestw[d - 1]);
 				}
 				bestd[a] = dist;
+				besti[a] = c;
 				strcpy(bestw[a], &vocab[c * max_w]);
 				break;
 			}
@@ -155,6 +157,11 @@
 		HV* hash = newHV();
 		hv_store(hash, "word", strlen("word"), newSVpvf(bestw[a], 0), 0);
 		hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
+		AV *vector = newAV();
+		for (b = 0; b < size; b++) {
+			av_push(vector, newSVnv(M[b + besti[a] * size]));
+		}
+		hv_store(hash, "vector", strlen("vector"), newRV_noinc((SV*)vector), 0);
 		av_push(array, newRV_noinc((SV*)hash));
 	}
  end:
@@ -167,7 +174,138 @@
 @@ index.html.ep
 <!DOCTYPE html>
 <html>
-<head><title>DeReKo-Word-Vector-Distances</title></head>
+<head>
+	<title>DeReKo-Word-Vector-Distances</title>
+	<script src="http://code.jquery.com/jquery-latest.min.js"></script>
+	<script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
+	<script src="http://klinux10/word2vec/tsne.js"></script>
+<style>
+svg {
+//  border: 1px solid #333;
+//  margin-right: 5px;
+//  margin-bottom: 5px;
+}
+#wrapper {
+    width: 100%;
+//   border: 1px solid red; 
+    overflow: hidden; /* will contain if #first is longer than #second */
+}
+#first {
+ width: 300px;
+ margin-right: 20px;
+    float:left; /* add this */
+//    border: 1px solid green;
+}
+#second {
+    border: 1px solid #333;
+    overflow: hidden; /* if you don't want #second to wrap below #first */
+}
+</style>
+<script>
+
+var opt = {epsilon: 1, perplexity: 8};
+var T = new tsnejs.tSNE(opt); // create a tSNE instance
+
+var Y;
+
+var data;
+
+function updateEmbedding() {
+  var Y = T.getSolution();
+  svg.selectAll('.u')
+    .data(data.words)
+    .attr("transform", function(d, i) { return "translate(" +
+                                          ((Y[i][0]*20*ss + tx) + 400) + "," +
+                                          ((Y[i][1]*20*ss + ty) + 400) + ")"; });
+}
+
+var svg;
+function drawEmbedding() {
+    $("#embed").empty();
+    var div = d3.select("#embed");
+
+    // get min and max in each column of Y
+    var Y = T.Y;
+    
+    svg = div.append("svg") // svg is global
+    .attr("width", 800)
+    .attr("height", 800);
+
+    var g = svg.selectAll(".b")
+      .data(data.words)
+      .enter().append("g")
+      .attr("class", "u");
+
+    g.append("text")
+      .attr("text-anchor", "top")
+      .attr("font-size", 12)
+      .attr("fill", "#333")
+      .text(function(d) { return d; });
+
+    var zoomListener = d3.behavior.zoom()
+      .scaleExtent([0.1, 10])
+      .center([0,0])
+      .on("zoom", zoomHandler);
+    zoomListener(svg);
+}
+
+var tx=0, ty=0;
+var ss=1;
+var iter_id=-1;
+
+function zoomHandler() {
+  tx = d3.event.translate[0];
+  ty = d3.event.translate[1];
+  ss = d3.event.scale;
+  updateEmbedding();
+}
+
+var stepnum = 0;
+
+function stopStep() {
+  clearInterval(iter_id);
+}
+
+function step() {
+  var i = T.iter;
+  if(i >= 1000) {
+   stopStep();
+  } else {
+    var cost = T.step(); // do a few steps
+    $("#cost").html("iteration " + i + ", cost: " + cost);
+    updateEmbedding();
+  }
+}
+
+	function showMap(j) {
+		data=j;
+    T.iter=0;
+    T.initDataRaw(data.vecs); // init embedding
+    drawEmbedding(); // draw initial embedding
+
+			if(iter_id >= 0) {
+				clearInterval(iter_id);
+			}
+    //T.debugGrad();
+    iter_id = setInterval(step, 1);
+    //step();
+	}
+
+$(window).xxload(function() {
+  $.getJSON( "http://klinux10/word2vec/dings.json", function( j ) {
+    data = j;
+    T.initDataRaw(data.vecs); // init embedding
+    drawEmbedding(); // draw initial embedding
+
+    // T.debugGrad();
+    iter_id = setInterval(step, 1);
+    // step();
+
+  });
+});
+
+</script>
+</head>
 <body>
 	<p>Word vector model based on  DeReKo-2015-II. Trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters:</p>
   <pre>
@@ -182,11 +320,14 @@
   <br>
 	% if($list) {
   <h3>Nearest neighbours of "<%= $word %>"</h3>
-  <table>
+<div id="wrapper">
+    <table id="first">
 		<tr>
      <th align="right">Pos.</th><th align="left">Word</th><th align="right">Cosine dist.</th>
    </tr>
-	% my $i=1; for my $item (@$list) {
+	% my $i=1; my @words; my @vecs; for my $item (@$list) {
+  % push @vecs, $item->{vector};
+  % push @words, $item->{word};
 		<tr>
      <td align="right">
   		<%= $i++ %>.
@@ -202,7 +343,18 @@
 		</tr>
 	% }
   </table>
+		<script>
+			% use Mojo::ByteStream 'b';
+$(window).load(function() {
+			showMap(<%= b(Mojo::JSON::to_json({words => \@words, vecs => \@vecs})); %>);
+});
+    </script>
 	% }
-</body>
+<div id="second" style="width:800px; height:800px; font-family: arial;">
+<div id="embed"></div>
+<div id="cost" style="text-align:left; font-family: Impact;"></div>
+  </div>
+  </div>
+  </body>
 </html>