w2v-server: move js to folder
diff --git a/js/som.js b/js/som.js
new file mode 100644
index 0000000..ca16d28
--- /dev/null
+++ b/js/som.js
@@ -0,0 +1,286 @@
+// Javascript implementation pf Kohonen's Self Organizing Map
+// Based on http://www.ai-junkie.com/ann/som/som1.html
+
+var mapWidth = 800,
+    mapHeight = 800;
+
+function getDistance(weight, inputVector) {
+	var distance = 0;
+	for (var i = 0; i <weight.length; i++) {
+		distance += (inputVector[i] - weight[i]) * (inputVector[i] - weight[i]);
+	}
+	return Math.sqrt(distance);
+}
+
+function makeRandomWeights(vSize, eSize) {
+	var weights = [];
+  if(typeof ArrayBuffer === 'undefined') {
+    // lacking browser support
+	  while (weights.length < vSize) {
+      var arr = new Array(eSize);
+      for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
+      weights.push(arr);
+    }
+  } else {
+	  while (weights.length < vSize) {
+      var arr = new Float64Array(eSize);
+      for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
+      weights.push(arr);
+    }
+  }
+  return weights;
+}
+
+function getBMUIndex(weights, target) {
+	var BMUIndex = 0;
+	var bestScore = 99999;
+	
+	for (i=0; i < weights.length; i++) {
+		distance = getDistance(weights[i], target);
+		if (distance < bestScore) {
+			bestScore = distance;
+			BMUIndex = i;
+		}
+	}
+	return BMUIndex;
+}
+
+function convertIndexToXY(idx, dimW) {
+	var x = parseInt(idx % dimW,10);
+	var y = parseInt((idx / dimW),10);
+	return [x,y];
+}
+
+
+function getEucledianDistance(coord1, coord2) {
+    return (coord1[0] - coord2[0]) * (coord1[0] - coord2[0]) + (coord1[1] - coord2[1]) * (coord1[1] - coord2[1]);
+}
+
+// utilitity that creates contiguous vector of zeros of size n
+  var zeros = function(n) {
+    if(typeof(n)==='undefined' || isNaN(n)) { return []; }
+    if(typeof ArrayBuffer === 'undefined') {
+      // lacking browser support
+      var arr = new Array(n);
+      for(var i=0;i<n;i++) { arr[i]= 0; }
+      return arr;
+    } else {
+      return new Float64Array(n); // typed arrays are faster
+    }
+  }
+
+// compute L2 distance between two vectors
+var L2 = function(x1, x2) {
+  var D = x1.length;
+  var d = 0;
+  for(var i=0;i<D;i++) { 
+    var x1i = x1[i];
+    var x2i = x2[i];
+    d += (x1i-x2i)*(x1i-x2i);
+  }
+  return d;
+}
+
+// compute pairwise distance in all vectors in X
+var xtod = function(X) {
+    var N = X.length;
+  var dist = zeros(N * N); // allocate contiguous array
+  for(var i=0;i<N;i++) {
+    for(var j=i+1;j<N;j++) {
+      var d = L2(X[i], X[j]);
+      dist[i*N+j] = d;
+      dist[j*N+i] = d;
+      }
+  }
+  return dist;
+}
+
+function dotproduct(a,b) {
+    var n = 0, lim = Math.min(a.length,b.length);
+    for (var i = 0; i < lim; i++) n += a[i] * b[i];
+    return n;
+ }
+
+function vecsum(a,b) {
+  var lim = a.length;
+  var sum = new Array(lim);
+  for (var i = 0; i < lim; i++) sum[i] = a[i] + b[i];
+  return sum;
+ }
+
+function norm2(a) {var sumsqr = 0; for (var i = 0; i < a.length; i++) sumsqr += a[i]*a[i]; return Math.sqrt(sumsqr);}
+
+function cosine_sim(x, y) {
+    xnorm = norm2(x);
+    if(!xnorm) return 0;
+    ynorm = norm2(y);
+    if(!ynorm) return 0;
+    return dotproduct(x, y) / (xnorm * ynorm);
+}
+
+function makeSOM(data, training_iterations) {
+	var dimW = 6;
+	var dimH = 6;
+  
+	var radius = (dimW * dimH) / 2;
+	var learning_rate = 1;
+	var time_constant = training_iterations / Math.log(radius);
+	var inputs = xtod(data.vecs);
+  var dimI = data.vecs.length;
+	var weights = makeRandomWeights(dimW * dimH, dimI);
+	var radius_decaying = 0;
+	var learning_rate_decaying = 0;
+	var svg;
+  var no_targets = (data.target.match(/.[ |]+./g) || []).length+1;
+//  var avg, avgsim1, avgsim2, minsim;
+  var refIndex;
+  var colorScale;
+  
+  if(no_targets > 1) {
+    refIndex=1;
+    colorScale = d3.scale.linear()
+      .range(['green', 'yellow', 'red']) // or use hex values
+      .domain([-1, 0, 1]);
+    
+    // avg = vecsum(inputs.slice(0, dimI), inputs.slice(dimI, 2*dimI));
+    // avgsim1 = cosine_sim(inputs.slice(0, dimI), avg);
+    // avgsim2 = cosine_sim(inputs.slice(dimI, 2*dimI), avg);
+
+    $("#somcolor2").css("background-color", colorScale(0));
+    $("#somcolor1").css("background-color", colorScale(-1));
+    $("#somcolor3").css("background-color", colorScale(1));
+  } else {
+    refIndex = data.words.length-1;
+    colorScale = d3.scale.linear()
+      .range(['white', 'red'])
+      .domain([-1, 1]);
+    $("#somcolor1").css("background-color", colorScale(1));
+    $("#somcolor3").css("background-color", colorScale(-1));
+  }
+  
+  $("#somword1").html(data.words[0]);
+  $("#somword2").html(data.words[refIndex]);
+  minsim = cosine_sim(inputs.slice(0, dimI), inputs.slice(refIndex*dimI, (refIndex+1)*dimI));
+
+  var itdom = document.getElementById("iterations");
+
+  var div = d3.select("#som2");
+
+	data.coords = [];
+  for(var i=0; i< data.words.length; i++) {
+    data.coords[i] = [Math.floor(dimW/2), Math.floor(dimH/2)];
+  }
+  
+  svg = div.append("svg")
+					 .attr("width", mapWidth)
+					 .attr("height", mapHeight);
+
+  var rects = svg.selectAll(".r")
+      .data(weights)
+      .enter().append("rect")
+			.attr("class", "r")
+      .attr("width", mapWidth/dimW)
+      .attr("height", mapHeight/dimH)
+      .attr("fill", "white")
+      .attr("z-index", "-1")
+      .attr("x", function(d, i) { return (i % dimW) * (mapWidth/dimW);})
+      .attr("y", function(d, i) { return (Math.floor(i / dimW) * (mapWidth/dimW)); })
+            
+
+  var g = svg.selectAll(".b")
+			.data(data.words)
+			.enter().append("g")
+			.attr("class", "u");
+	g.append("a")
+	  .attr("xlink:href", function(word) {return data.urlprefix+word;})
+	  .attr("title", function(d, i) {
+		  return "rank: "+i +"  "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
+	  })
+		.append("text")
+    .attr("text-anchor", "bottom")
+    .attr("font-size", 12)
+    .attr("fill", function(d) {
+			if(data.target.indexOf(" "+d+" ") >= 0) {
+				return "blue";
+			} else {
+				return "#333"
+			}
+	  })
+    .text(function(d) { return d; });
+  
+  var som_interval = setInterval(somStep, 0);
+  var it=0;
+  
+  function updateSOM() {
+    var oc = [];
+    for(var x = 0; x < dimW; x++) {
+      for(var y = 0; y < dimH; y++) {
+        oc[y*dimW+x]=1;
+      }
+    }
+    svg.selectAll('.u')
+      .data(data.coords)
+      .transition()
+      .attr("transform", function(d, i) { 
+			  return "translate(" +
+          (d[0]*(mapWidth/dimW)+4) + "," +
+          (d[1]*(mapHeight/dimH)+oc[d[1]*dimW+d[0]]++*14+4) + ")"; });
+
+    var colorFun = function(d, i) {
+      var sim1=cosine_sim(d, inputs.slice(0, dimI));
+      var sim2=cosine_sim(d, inputs.slice(dimI, 2*dimI));
+      var col;
+//      col = (sim1-avgsim1)/(1-avgsim1)-(sim2-avgsim2)/(1-avgsim2);
+      col = (sim2-sim1)/(1-minsim);
+//      console.log(Math.floor(i/dimW)+","+i%dimW+":"+(sim1-minsim)/(1-minsim)+ " " + (sim2-minsim)/(1-minsim) + "--> "+ col);
+      if(col > 1) col=1;
+      if(col < -1) col=-1;
+      return colorScale(col);
+    };
+    
+    if(it>training_iterations*.6) {
+      svg.selectAll(".r")
+        .data(weights)
+        .transition()
+        .attr("fill", colorFun);
+    }
+  }
+  
+  function somStep() {
+	  if(it++ >= training_iterations) {
+      updateSOM();
+      clearInterval(som_interval);
+      return;
+    }
+	  itdom.innerHTML = it;
+	  radius_decaying = radius * Math.exp(-it/time_constant);
+    learning_rate_decaying = learning_rate * Math.exp(-it/time_constant);
+    //learning_rate_decaying = learning_rate * Math.exp(-it/training_iterations);
+    
+    //pick a random input to train
+    var current=Math.floor(Math.random()*dimI)
+    var iv = inputs.slice(current*dimI, (current+1)*dimI);
+    // Determine the BMU
+    BMUIdx = getBMUIndex(weights, iv);
+    var coord1 = convertIndexToXY(BMUIdx, dimW);
+    data.coords[current] = coord1;
+    var widthSq = radius_decaying * radius_decaying;
+    for (var v in weights) {
+      var coord2 = convertIndexToXY(v, dimW);
+      var dist = getEucledianDistance(coord1, coord2);
+      // Determine if the weight is within the training radius
+      if (dist < widthSq) {
+        // console.log(dist, learning_rate_decaying, radius_decaying, it);
+        influence = Math.exp(-dist/(2*widthSq));
+        for (vidx = 0;vidx<weights[v].length;vidx++) {
+          weights[v][vidx] += influence * learning_rate_decaying * (iv[vidx] - weights[v][vidx]);
+        }
+      }
+    }
+//  }
+    if(it % 10 == 0) {
+      updateSOM();
+    }
+	}
+
+}
diff --git a/js/tsne.js b/js/tsne.js
new file mode 100644
index 0000000..57b3878
--- /dev/null
+++ b/js/tsne.js
@@ -0,0 +1,371 @@
+// create main global object
+var tsnejs = tsnejs || { REVISION: 'ALPHA' };
+
+(function(global) {
+  "use strict";
+
+  // utility function
+  var assert = function(condition, message) {
+    if (!condition) { throw message || "Assertion failed"; }
+  }
+
+  // syntax sugar
+  var getopt = function(opt, field, defaultval) {
+    if(opt.hasOwnProperty(field)) {
+      return opt[field];
+    } else {
+      return defaultval;
+    }
+  }
+
+  // return 0 mean unit standard deviation random number
+  var return_v = false;
+  var v_val = 0.0;
+  var gaussRandom = function() {
+    if(return_v) { 
+      return_v = false;
+      return v_val; 
+    }
+    var u = 2*Math.random()-1;
+    var v = 2*Math.random()-1;
+    var r = u*u + v*v;
+    if(r == 0 || r > 1) return gaussRandom();
+    var c = Math.sqrt(-2*Math.log(r)/r);
+    v_val = v*c; // cache this for next function call for efficiency
+    return_v = true;
+    return u*c;
+  }
+
+  // return random normal number
+  var randn = function(mu, std){ return mu+gaussRandom()*std; }
+
+  // utilitity that creates contiguous vector of zeros of size n
+  var zeros = function(n) {
+    if(typeof(n)==='undefined' || isNaN(n)) { return []; }
+    if(typeof ArrayBuffer === 'undefined') {
+      // lacking browser support
+      var arr = new Array(n);
+      for(var i=0;i<n;i++) { arr[i]= 0; }
+      return arr;
+    } else {
+      return new Float64Array(n); // typed arrays are faster
+    }
+  }
+
+  // utility that returns 2d array filled with random numbers
+  // or with value s, if provided
+  var randn2d = function(n,d,s) {
+    var uses = typeof s !== 'undefined';
+    var x = [];
+    for(var i=0;i<n;i++) {
+      var xhere = [];
+      for(var j=0;j<d;j++) { 
+        if(uses) {
+          xhere.push(s); 
+        } else {
+          xhere.push(randn(0.0, 1e-4)); 
+        }
+      }
+      x.push(xhere);
+    }
+    return x;
+  }
+
+  // compute L2 distance between two vectors
+  var L2 = function(x1, x2) {
+    var D = x1.length;
+    var d = 0;
+    for(var i=0;i<D;i++) { 
+      var x1i = x1[i];
+      var x2i = x2[i];
+      d += (x1i-x2i)*(x1i-x2i);
+    }
+    return d;
+  }
+
+  // compute pairwise distance in all vectors in X
+  var xtod = function(X) {
+    var N = X.length;
+    var dist = zeros(N * N); // allocate contiguous array
+    for(var i=0;i<N;i++) {
+      for(var j=i+1;j<N;j++) {
+        var d = L2(X[i], X[j]);
+        dist[i*N+j] = d;
+        dist[j*N+i] = d;
+      }
+    }
+    return dist;
+  }
+
+  // compute (p_{i|j} + p_{j|i})/(2n)
+  var d2p = function(D, perplexity, tol) {
+    var Nf = Math.sqrt(D.length); // this better be an integer
+    var N = Math.floor(Nf);
+    assert(N === Nf, "D should have square number of elements.");
+    var Htarget = Math.log(perplexity); // target entropy of distribution
+    var P = zeros(N * N); // temporary probability matrix
+
+    var prow = zeros(N); // a temporary storage compartment
+    for(var i=0;i<N;i++) {
+      var betamin = -Infinity;
+      var betamax = Infinity;
+      var beta = 1; // initial value of precision
+      var done = false;
+      var maxtries = 50;
+
+      // perform binary search to find a suitable precision beta
+      // so that the entropy of the distribution is appropriate
+      var num = 0;
+      while(!done) {
+        //debugger;
+
+        // compute entropy and kernel row with beta precision
+        var psum = 0.0;
+        for(var j=0;j<N;j++) {
+          var pj = Math.exp(- D[i*N+j] * beta);
+          if(i===j) { pj = 0; } // we dont care about diagonals
+          prow[j] = pj;
+          psum += pj;
+        }
+        // normalize p and compute entropy
+        var Hhere = 0.0;
+        for(var j=0;j<N;j++) {
+          var pj = prow[j] / psum;
+          prow[j] = pj;
+          if(pj > 1e-7) Hhere -= pj * Math.log(pj);
+        }
+
+        // adjust beta based on result
+        if(Hhere > Htarget) {
+          // entropy was too high (distribution too diffuse)
+          // so we need to increase the precision for more peaky distribution
+          betamin = beta; // move up the bounds
+          if(betamax === Infinity) { beta = beta * 2; }
+          else { beta = (beta + betamax) / 2; }
+
+        } else {
+          // converse case. make distrubtion less peaky
+          betamax = beta;
+          if(betamin === -Infinity) { beta = beta / 2; }
+          else { beta = (beta + betamin) / 2; }
+        }
+
+        // stopping conditions: too many tries or got a good precision
+        num++;
+        if(Math.abs(Hhere - Htarget) < tol) { done = true; }
+        if(num >= maxtries) { done = true; }
+      }
+
+      // console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
+      // copy over the final prow to P at row i
+      for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }
+
+    } // end loop over examples i
+
+    // symmetrize P and normalize it to sum to 1 over all ij
+    var Pout = zeros(N * N);
+    var N2 = N*2;
+    for(var i=0;i<N;i++) {
+      for(var j=0;j<N;j++) {
+        Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100);
+      }
+    }
+
+    return Pout;
+  }
+
+  // helper function
+  function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }
+
+  var tSNE = function(opt) {
+    var opt = opt || {};
+    this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
+    this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
+    this.epsilon = getopt(opt, "epsilon", 10); // learning rate
+
+    this.iter = 0;
+  }
+
+  tSNE.prototype = {
+
+    // this function takes a set of high-dimensional points
+    // and creates matrix P from them using gaussian kernel
+    initDataRaw: function(X) {
+      var N = X.length;
+      var D = X[0].length;
+      assert(N > 0, " X is empty? You must have some data!");
+      assert(D > 0, " X[0] is empty? Where is the data?");
+      var dists = xtod(X); // convert X to distances using gaussian kernel
+      this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
+      this.N = N; // back up the size of the dataset
+      this.initSolution(); // refresh this
+    },
+
+    // this function takes a given distance matrix and creates
+    // matrix P from them.
+    // D is assumed to be provided as a list of lists, and should be symmetric
+    initDataDist: function(D) {
+      var N = D.length;
+      assert(N > 0, " X is empty? You must have some data!");
+      // convert D to a (fast) typed array version
+      var dists = zeros(N * N); // allocate contiguous array
+      for(var i=0;i<N;i++) {
+        for(var j=i+1;j<N;j++) {
+          var d = D[i][j];
+          dists[i*N+j] = d;
+          dists[j*N+i] = d;
+        }
+      }
+      this.P = d2p(dists, this.perplexity, 1e-4);
+      this.N = N;
+      this.initSolution(); // refresh this
+    },
+
+    // (re)initializes the solution to random
+    initSolution: function() {
+      // generate random solution to t-SNE
+      this.Y = randn2d(this.N, this.dim); // the solution
+      this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions
+      this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator
+      this.iter = 0;
+    },
+
+    // return pointer to current solution
+    getSolution: function() {
+      return this.Y;
+    },
+
+    // perform a single step of optimization to improve the embedding
+    step: function() {
+      this.iter += 1;
+      var N = this.N;
+
+      var cg = this.costGrad(this.Y); // evaluate gradient
+      var cost = cg.cost;
+      var grad = cg.grad;
+
+      // perform gradient step
+      var ymean = zeros(this.dim);
+      for(var i=0;i<N;i++) {
+        for(var d=0;d<this.dim;d++) {
+          var gid = grad[i][d];
+          var sid = this.ystep[i][d];
+          var gainid = this.gains[i][d];
+
+          // compute gain update
+          var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
+          if(gainid < 0.01) gainid = 0.01; // clamp
+          this.gains[i][d] = newgain; // store for next turn
+
+          // compute momentum step direction
+          var momval = this.iter < 250 ? 0.5 : 0.8;
+          var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
+          this.ystep[i][d] = newsid; // remember the step we took
+
+          // step!
+          this.Y[i][d] += newsid; 
+
+          ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later
+        }
+      }
+
+      // reproject Y to be zero mean
+      for(var i=0;i<N;i++) {
+        for(var d=0;d<this.dim;d++) {
+          this.Y[i][d] -= ymean[d]/N;
+        }
+      }
+
+      //if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost);
+      return cost; // return current cost
+    },
+
+    // for debugging: gradient check
+    debugGrad: function() {
+      var N = this.N;
+
+      var cg = this.costGrad(this.Y); // evaluate gradient
+      var cost = cg.cost;
+      var grad = cg.grad;
+
+      var e = 1e-5;
+      for(var i=0;i<N;i++) {
+        for(var d=0;d<this.dim;d++) {
+          var yold = this.Y[i][d];
+
+          this.Y[i][d] = yold + e;
+          var cg0 = this.costGrad(this.Y);
+
+          this.Y[i][d] = yold - e;
+          var cg1 = this.costGrad(this.Y);
+          
+          var analytic = grad[i][d];
+          var numerical = (cg0.cost - cg1.cost) / ( 2 * e );
+          console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical);
+
+          this.Y[i][d] = yold;
+        }
+      }
+    },
+
+    // return cost and gradient, given an arrangement
+    costGrad: function(Y) {
+      var N = this.N;
+      var dim = this.dim; // dim of output space
+      var P = this.P;
+
+      var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima
+
+      // compute current Q distribution, unnormalized first
+      var Qu = zeros(N * N);
+      var qsum = 0.0;
+      for(var i=0;i<N;i++) {
+        for(var j=i+1;j<N;j++) {
+          var dsum = 0.0;
+          for(var d=0;d<dim;d++) {
+            var dhere = Y[i][d] - Y[j][d];
+            dsum += dhere * dhere;
+          }
+          var qu = 1.0 / (1.0 + dsum); // Student t-distribution
+          Qu[i*N+j] = qu;
+          Qu[j*N+i] = qu;
+          qsum += 2 * qu;
+        }
+      }
+      // normalize Q distribution to sum to 1
+      var NN = N*N;
+      var Q = zeros(NN);
+      for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); }
+
+      var cost = 0.0;
+      var grad = [];
+      for(var i=0;i<N;i++) {
+        var gsum = new Array(dim); // init grad for point i
+        for(var d=0;d<dim;d++) { gsum[d] = 0.0; }
+        for(var j=0;j<N;j++) {
+          cost += - P[i*N+j] * Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...)
+          var premult = 4 * (pmul * P[i*N+j] - Q[i*N+j]) * Qu[i*N+j];
+          for(var d=0;d<dim;d++) {
+            gsum[d] += premult * (Y[i][d] - Y[j][d]);
+          }
+        }
+        grad.push(gsum);
+      }
+
+      return {cost: cost, grad: grad};
+    }
+  }
+
+  global.tSNE = tSNE; // export tSNE class
+})(tsnejs);
+
+
+// export the library to window, or to module in nodejs
+(function(lib) {
+  "use strict";
+  if (typeof module === "undefined" || typeof module.exports === "undefined") {
+    window.tsnejs = lib; // in ordinary browser attach library to window
+  } else {
+    module.exports = lib; // in nodejs
+  }
+})(tsnejs);