w2v-server: split perl/c and html/ep/js
diff --git a/templates/index.html.ep b/templates/index.html.ep
new file mode 100644
index 0000000..9183278
--- /dev/null
+++ b/templates/index.html.ep
@@ -0,0 +1,409 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <title>DeReKo-Word-Vector-Distances: <%= $word %></title>
+ <link rel="stylesheet" href="//code.jquery.com/ui/1.11.4/themes/smoothness/jquery-ui.css">
+ <script src="http://code.jquery.com/jquery-latest.min.js"></script>
+ <script src="//code.jquery.com/ui/1.11.4/jquery-ui.js"></script>
+ <script>
+ $(function() {
+ $( document ).tooltip({
+ content: function() {
+ return $(this).attr('title');
+ }}
+ )
+ })
+ </script>
+ <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
+ <script src="http://klinux10/word2vec/tsne.js"></script>
+ <script src="http://klinux10/word2vec/som.js"></script>
+ <script src="http://klinux10/word2vec/labeler.js"></script>
+ <style>
+ body, input {
+ font-family: Arial, sans-serif;
+ font-size: 11pt;
+ }
+
+ .ui-tooltip-content {
+ font-size: 9pt;
+ color: #222222;
+ }
+
+ svg > .ui-tooltip-content {
+ font-size: 8pt;
+ color: #222222;
+ }
+
+ a.merged {
+ color: green;
+ fill: green;
+ }
+
+ #first a {
+ text-decoration: none;
+ }
+
+ a.marked, #first a.marked {
+ text-decoration: underline;
+ }
+
+ a.target {
+ color: red;
+ fill: red;
+ }
+
+ #collocators {
+ margin-bottom: 15px;
+ }
+
+ #wrapper {
+ width: 100%;
+ // border: 1px solid red;
+ overflow: hidden; /* will contain if #first is longer than #second */
+ }
+ #first {
+ margin-right: 20px;
+ float: left;
+ // border: 1px solid green;
+ }
+ #second {
+ border: 1px solid #333;
+ overflow: hidden; /* if you don't want #second to wrap below #first */
+ }
+ #som2 svg {
+ border: 1px solid #333;
+ }
+
+ #cost {
+ font-size: 8pt;
+ color: #222222;
+ margin-top: 4px;
+ margin-bottom: 12px;
+ }
+
+ #sominfo1, #sominfo {
+ font-size: 8pt;
+ color: #222222;
+ margin-top: 0px;
+ }
+
+ #somcolor1, #somcolor2, #somcolor3 {
+ display: inline-block;
+ height: 10px;
+ width: 10px;
+ }
+
+ #third {
+ border: 1px solid #333;
+ }
+
+ </style>
+ <script>
+
+ var opt = {epsilon: <%= $epsilon %>, perplexity: <%= $perplexity %>},
+ mapWidth = 800, // width map
+ mapHeight = 800,
+ jitterRadius = 7;
+
+ var T = new tsnejs.tSNE(opt); // create a tSNE instance
+
+ var Y;
+
+ var data;
+ var labeler;
+
+ function applyJitter() {
+ svg.selectAll('.tsnet')
+ .data(labels)
+ .transition()
+ .duration(50)
+ .attr("transform", function(d, i) {
+ T.Y[i][0] = (d.x - mapWidth/2 - tx)/ss/20;
+ T.Y[i][1] = (d.y - mapHeight/2 - ty)/ss/20;
+ return "translate(" +
+ (d.x) + "," +
+ (d.y) + ")";
+ });
+ }
+
+ function updateEmbedding() {
+ var Y = T.getSolution();
+ svg.selectAll('.tsnet')
+ .data(data.words)
+ .attr("transform", function(d, i) {
+ return "translate(" +
+ ((Y[i][0]*20*ss + tx) + mapWidth/2) + "," +
+ ((Y[i][1]*20*ss + ty) + mapHeight/2) + ")"; });
+ }
+
+ var svg;
+ var labels = [];
+ var anchor_array = [];
+ var text;
+
+ function drawEmbedding() {
+ $("#embed").empty();
+ var div = d3.select("#embed");
+
+ // get min and max in each column of Y
+ var Y = T.Y;
+
+ svg = div.append("svg") // svg is global
+ .attr("width", mapWidth)
+ .attr("height", mapHeight);
+
+ var g = svg.selectAll(".b")
+ .data(data.words)
+ .enter().append("g")
+ .attr("class", "tsnet");
+
+ g.append("a")
+ .attr("xlink:href", function(word) {return "/?word="+word;})
+ .attr("class", function(d, i) {
+ var res="";
+ if(data.marked[i]) {
+ res="marked ";
+ }
+ if(data.target.indexOf(" "+d+" ") >= 0) {
+ return res+"target";
+ } else if(data.ranks[i] < data.mergedEnd) {
+ return res+"merged";
+ } else {
+ return res;
+ }
+ })
+ .attr("title", function(d, i) {
+ if(data.mergedEnd > 0) {
+ if(data.ranks[i] >= data.mergedEnd) {
+ return "rank: "+i +" "+"freq. rank: "+(data.ranks[i]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
+ } else {
+ return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") + " (merged vocab)";
+ }
+ } else {
+ return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
+ }
+ })
+ .append("text")
+ .attr("text-anchor", "top")
+ .attr("font-size", 12)
+ .text(function(d) { return d; });
+
+ var zoomListener = d3.behavior.zoom()
+ .scaleExtent([0.1, 10])
+ .center([0,0])
+ .on("zoom", zoomHandler);
+ zoomListener(svg);
+ }
+
+ var tx=0, ty=0;
+ var ss=1;
+ var iter_id=-1;
+
+ function zoomHandler() {
+ tx = d3.event.translate[0];
+ ty = d3.event.translate[1];
+ ss = d3.event.scale;
+ updateEmbedding();
+ }
+
+ var stepnum = 0;
+
+ function stopStep() {
+ clearInterval(iter_id);
+ text = svg.selectAll("text");
+
+ // jitter function needs different data and co-ordinate representation
+ labels = d3.range(data.words.length).map(function(i) {
+ var x = (T.Y[i][0]*20*ss + tx) + mapWidth/2;
+ var y = (T.Y[i][1]*20*ss + ty) + mapHeight/2;
+ anchor_array.push({x: x, y: y, r: jitterRadius});
+ return {
+ x: x,
+ y: y,
+ name: data.words[i]
+ };
+ });
+
+ // get the actual label bounding boxes for the jitter function
+ var index = 0;
+ text.each(function() {
+ labels[index].width = this.getBBox().width;
+ labels[index].height = this.getBBox().height;
+ index += 1;
+ });
+
+
+ // setTimeout(updateEmbedding, 1);
+ // setTimeout(
+ labeler = d3.labeler()
+ .label(labels)
+ .anchor(anchor_array)
+ .width(mapWidth)
+ .height(mapHeight)
+ .update(applyJitter);
+ // .start(1000);
+
+ iter_id = setInterval(jitterStep, 1);
+ }
+
+ var jitter_i=0;
+
+ function jitterStep() {
+ if(jitter_i++ > 100) {
+ clearInterval(iter_id);
+ } else {
+ labeler.start2(10);
+ applyJitter();
+ }
+ }
+
+ var last_cost=1000;
+
+ function step() {
+ var i = T.iter;
+
+ if(i > <%= $no_iterations %>) {
+ stopStep();
+ } else {
+ var cost = Math.round(T.step() * 100000) / 100000; // do a few steps
+ $("#cost").html("tsne iteration " + i + ", cost: " + cost.toFixed(5));
+ if(i % 250 == 0 && cost >= last_cost) {
+ stopStep();
+ } else {
+ last_cost = cost;
+ updateEmbedding();
+ }
+ }
+ }
+
+ function showMap(j) {
+ data=j;
+ T.iter=0;
+ T.initDataRaw(data.vecs); // init embedding
+ drawEmbedding(); // draw initial embedding
+
+ if(iter_id >= 0) {
+ clearInterval(iter_id);
+ }
+ //T.debugGrad();
+ iter_id = setInterval(step, 1);
+ if(<%= $show_som %>) {
+ makeSOM(j, <%= $no_iterations %>);
+ }
+ }
+
+ </script>
+ </head>
+ <body>
+ <form action="<%=url_for('/')->to_abs%>" method="GET">
+ word(s):
+ <input type="text" name="word" size="20" value="<%= $word %>" title="When looking for multiple words use spaces as separators to search around the average vector and | as separator to get the neighbours for each word.">
+ % if($mergedEnd > 0) {
+ backw. <input type="checkbox" name="sbf" value="1" <%= ($searchBaseVocabFirst ? "checked" : "") %> title="If checkecked base vocabulary will be searched first. Otherwise merged vocabulray will be searched first.">
+ % }
+ max. neighbours: <input type="text" size="8" name="n" value="<%= $no_nbs %>">
+ max. iterations: <input type="text" name="N" size="8" value="<%= $no_iterations %>">
+ SOM <input type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>>
+ % if($collocators) {
+ <span> </span>sort collocators by
+ <select name="sort">
+ <option value="0" <%= ($sort!=1? "selected":"") %>>responsiveness</option>
+ <option value="1" <%= ($sort==1? "selected":"") %>>mean p</option>
+ </select>
+ % }
+ <span> </span><input type="submit" value="Show">
+ </form>
+ <br>
+ % if($lists) {
+ <div id="wrapper">
+ <table id="first">
+ <tr>
+ <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th>
+ % if($collocators) {
+ <th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="right">Σp(c<sub><small>@</small></sub>)/|w|</th><th align="left">syntagmatic</th>
+ % }
+ </tr>
+ % my $j=0; my @words; my @vecs; my @ranks; my @marked;
+ % for my $list (@$lists) {
+ % my $i=0; while($list) {
+ % my $item = (@$list)[$i];
+ % my $c = ($collocators? (@$collocators)[$i] : 0);
+ % last if(!$c && !$item);
+ <tr>
+ <td align="right">
+ <%= ++$i %>.
+ </td>
+ % if($item) {
+ % if(!grep{$_ eq $item->{word}} @words) {
+ % push @vecs, $item->{vector};
+ % push @words, $item->{word};
+ % push @ranks, $item->{rank};
+ % push @marked, ($marked->{$item->{word}}? 1 : 0);
+ % }
+ <td align="right">
+ <%= sprintf("%.3f", $item->{dist}) %>
+ </td>
+ <td>
+ % my $class = ($marked->{$item->{word}}? "marked " : "");
+ % my $r = $item->{rank};
+ % if($r < $mergedEnd) {
+ % $class .= "merged";
+ % $r .= " (merged vocab)";
+ % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
+ % $r -= $mergedEnd;
+ % }
+ <a class="<%= $class %>" title="freq. rank: <%= $r %>" href="/?word=<%= $item->{word} %>"><%= $item->{word} %></a>
+ </td>
+ % } else {
+ <td colspan="2"/>
+ % }
+ % if($c) {
+ <td align="right">
+ <%= $c->{pos} %>:
+ </td>
+ <td align="right">
+ <%= sprintf("%.3f", $c->{dist}) %>
+ </td>
+ <td align="right">
+ <%= sprintf("%.3e", $c->{norm}) %>
+ </td>
+ <td align="right">
+ <%= sprintf("%.3e", $c->{sum}) %>
+ </td>
+ <td align="left">
+ <a href="/?word=<%= $c->{word} %>">
+ <%= $c->{word} %>
+ </td>
+ % } else {
+ <td colspan="5"/>
+ % }
+ </tr>
+ % }
+ % }
+ </table>
+ <script>
+ % use Mojo::ByteStream 'b';
+ $(window).load(function() {
+ showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked})); %>);
+ });
+ </script>
+ % }
+ <div id="second" style="width:800px; height:800px; font-family: arial;">
+ <div id="embed">
+ </div>
+ </div>
+ <div id="cost"></div>
+ % if($show_som) {
+ <div id="som2">
+ </div>
+ <div id="sominfo1"><span id="somcolor1"> </span> <span id="somword1"> </span> <span id="somcolor2"> </span> <span id="somword2"> </span> <span id="somcolor3"> </span></div>
+ <div id="sominfo">SOM iteration <span id="iterations">0</span></div>
+ % }
+ </div>
+ % if($training_args) {
+ <p>
+ Word vector model trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters: <pre><%= $training_args %></pre>
+ </p>
+ % }
+ </body>
+</html>