Marc Kupietz | adaa163 | 2017-07-04 14:10:29 +0200 | [diff] [blame] | 1 | // create main global object |
| 2 | var tsnejs = tsnejs || { REVISION: 'ALPHA' }; |
| 3 | |
| 4 | (function(global) { |
| 5 | "use strict"; |
| 6 | |
| 7 | // utility function |
| 8 | var assert = function(condition, message) { |
| 9 | if (!condition) { throw message || "Assertion failed"; } |
| 10 | } |
| 11 | |
| 12 | // syntax sugar |
| 13 | var getopt = function(opt, field, defaultval) { |
| 14 | if(opt.hasOwnProperty(field)) { |
| 15 | return opt[field]; |
| 16 | } else { |
| 17 | return defaultval; |
| 18 | } |
| 19 | } |
| 20 | |
| 21 | // return 0 mean unit standard deviation random number |
| 22 | var return_v = false; |
| 23 | var v_val = 0.0; |
| 24 | var gaussRandom = function() { |
| 25 | if(return_v) { |
| 26 | return_v = false; |
| 27 | return v_val; |
| 28 | } |
| 29 | var u = 2*Math.random()-1; |
| 30 | var v = 2*Math.random()-1; |
| 31 | var r = u*u + v*v; |
| 32 | if(r == 0 || r > 1) return gaussRandom(); |
| 33 | var c = Math.sqrt(-2*Math.log(r)/r); |
| 34 | v_val = v*c; // cache this for next function call for efficiency |
| 35 | return_v = true; |
| 36 | return u*c; |
| 37 | } |
| 38 | |
| 39 | // return random normal number |
| 40 | var randn = function(mu, std){ return mu+gaussRandom()*std; } |
| 41 | |
| 42 | // utilitity that creates contiguous vector of zeros of size n |
| 43 | var zeros = function(n) { |
| 44 | if(typeof(n)==='undefined' || isNaN(n)) { return []; } |
| 45 | if(typeof ArrayBuffer === 'undefined') { |
| 46 | // lacking browser support |
| 47 | var arr = new Array(n); |
| 48 | for(var i=0;i<n;i++) { arr[i]= 0; } |
| 49 | return arr; |
| 50 | } else { |
| 51 | return new Float64Array(n); // typed arrays are faster |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | // utility that returns 2d array filled with random numbers |
| 56 | // or with value s, if provided |
| 57 | var randn2d = function(n,d,s) { |
| 58 | var uses = typeof s !== 'undefined'; |
| 59 | var x = []; |
| 60 | for(var i=0;i<n;i++) { |
| 61 | var xhere = []; |
| 62 | for(var j=0;j<d;j++) { |
| 63 | if(uses) { |
| 64 | xhere.push(s); |
| 65 | } else { |
| 66 | xhere.push(randn(0.0, 1e-4)); |
| 67 | } |
| 68 | } |
| 69 | x.push(xhere); |
| 70 | } |
| 71 | return x; |
| 72 | } |
| 73 | |
| 74 | // compute L2 distance between two vectors |
| 75 | var L2 = function(x1, x2) { |
| 76 | var D = x1.length; |
| 77 | var d = 0; |
| 78 | for(var i=0;i<D;i++) { |
| 79 | var x1i = x1[i]; |
| 80 | var x2i = x2[i]; |
| 81 | d += (x1i-x2i)*(x1i-x2i); |
| 82 | } |
| 83 | return d; |
| 84 | } |
| 85 | |
| 86 | // compute pairwise distance in all vectors in X |
| 87 | var xtod = function(X) { |
| 88 | var N = X.length; |
| 89 | var dist = zeros(N * N); // allocate contiguous array |
| 90 | for(var i=0;i<N;i++) { |
| 91 | for(var j=i+1;j<N;j++) { |
| 92 | var d = L2(X[i], X[j]); |
| 93 | dist[i*N+j] = d; |
| 94 | dist[j*N+i] = d; |
| 95 | } |
| 96 | } |
| 97 | return dist; |
| 98 | } |
| 99 | |
| 100 | // compute (p_{i|j} + p_{j|i})/(2n) |
| 101 | var d2p = function(D, perplexity, tol) { |
| 102 | var Nf = Math.sqrt(D.length); // this better be an integer |
| 103 | var N = Math.floor(Nf); |
| 104 | assert(N === Nf, "D should have square number of elements."); |
| 105 | var Htarget = Math.log(perplexity); // target entropy of distribution |
| 106 | var P = zeros(N * N); // temporary probability matrix |
| 107 | |
| 108 | var prow = zeros(N); // a temporary storage compartment |
| 109 | for(var i=0;i<N;i++) { |
| 110 | var betamin = -Infinity; |
| 111 | var betamax = Infinity; |
| 112 | var beta = 1; // initial value of precision |
| 113 | var done = false; |
| 114 | var maxtries = 50; |
| 115 | |
| 116 | // perform binary search to find a suitable precision beta |
| 117 | // so that the entropy of the distribution is appropriate |
| 118 | var num = 0; |
| 119 | while(!done) { |
| 120 | //debugger; |
| 121 | |
| 122 | // compute entropy and kernel row with beta precision |
| 123 | var psum = 0.0; |
| 124 | for(var j=0;j<N;j++) { |
| 125 | var pj = Math.exp(- D[i*N+j] * beta); |
| 126 | if(i===j) { pj = 0; } // we dont care about diagonals |
| 127 | prow[j] = pj; |
| 128 | psum += pj; |
| 129 | } |
| 130 | // normalize p and compute entropy |
| 131 | var Hhere = 0.0; |
| 132 | for(var j=0;j<N;j++) { |
| 133 | var pj = prow[j] / psum; |
| 134 | prow[j] = pj; |
| 135 | if(pj > 1e-7) Hhere -= pj * Math.log(pj); |
| 136 | } |
| 137 | |
| 138 | // adjust beta based on result |
| 139 | if(Hhere > Htarget) { |
| 140 | // entropy was too high (distribution too diffuse) |
| 141 | // so we need to increase the precision for more peaky distribution |
| 142 | betamin = beta; // move up the bounds |
| 143 | if(betamax === Infinity) { beta = beta * 2; } |
| 144 | else { beta = (beta + betamax) / 2; } |
| 145 | |
| 146 | } else { |
| 147 | // converse case. make distrubtion less peaky |
| 148 | betamax = beta; |
| 149 | if(betamin === -Infinity) { beta = beta / 2; } |
| 150 | else { beta = (beta + betamin) / 2; } |
| 151 | } |
| 152 | |
| 153 | // stopping conditions: too many tries or got a good precision |
| 154 | num++; |
| 155 | if(Math.abs(Hhere - Htarget) < tol) { done = true; } |
| 156 | if(num >= maxtries) { done = true; } |
| 157 | } |
| 158 | |
| 159 | // console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.'); |
| 160 | // copy over the final prow to P at row i |
| 161 | for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; } |
| 162 | |
| 163 | } // end loop over examples i |
| 164 | |
| 165 | // symmetrize P and normalize it to sum to 1 over all ij |
| 166 | var Pout = zeros(N * N); |
| 167 | var N2 = N*2; |
| 168 | for(var i=0;i<N;i++) { |
| 169 | for(var j=0;j<N;j++) { |
| 170 | Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | return Pout; |
| 175 | } |
| 176 | |
| 177 | // helper function |
| 178 | function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; } |
| 179 | |
| 180 | var tSNE = function(opt) { |
| 181 | var opt = opt || {}; |
| 182 | this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors |
| 183 | this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE |
| 184 | this.epsilon = getopt(opt, "epsilon", 10); // learning rate |
| 185 | |
| 186 | this.iter = 0; |
| 187 | } |
| 188 | |
| 189 | tSNE.prototype = { |
| 190 | |
| 191 | // this function takes a set of high-dimensional points |
| 192 | // and creates matrix P from them using gaussian kernel |
| 193 | initDataRaw: function(X) { |
| 194 | var N = X.length; |
| 195 | var D = X[0].length; |
| 196 | assert(N > 0, " X is empty? You must have some data!"); |
| 197 | assert(D > 0, " X[0] is empty? Where is the data?"); |
| 198 | var dists = xtod(X); // convert X to distances using gaussian kernel |
| 199 | this.P = d2p(dists, this.perplexity, 1e-4); // attach to object |
| 200 | this.N = N; // back up the size of the dataset |
| 201 | this.initSolution(); // refresh this |
| 202 | }, |
| 203 | |
| 204 | // this function takes a given distance matrix and creates |
| 205 | // matrix P from them. |
| 206 | // D is assumed to be provided as a list of lists, and should be symmetric |
| 207 | initDataDist: function(D) { |
| 208 | var N = D.length; |
| 209 | assert(N > 0, " X is empty? You must have some data!"); |
| 210 | // convert D to a (fast) typed array version |
| 211 | var dists = zeros(N * N); // allocate contiguous array |
| 212 | for(var i=0;i<N;i++) { |
| 213 | for(var j=i+1;j<N;j++) { |
| 214 | var d = D[i][j]; |
| 215 | dists[i*N+j] = d; |
| 216 | dists[j*N+i] = d; |
| 217 | } |
| 218 | } |
| 219 | this.P = d2p(dists, this.perplexity, 1e-4); |
| 220 | this.N = N; |
| 221 | this.initSolution(); // refresh this |
| 222 | }, |
| 223 | |
| 224 | // (re)initializes the solution to random |
| 225 | initSolution: function() { |
| 226 | // generate random solution to t-SNE |
| 227 | this.Y = randn2d(this.N, this.dim); // the solution |
| 228 | this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions |
| 229 | this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator |
| 230 | this.iter = 0; |
| 231 | }, |
| 232 | |
| 233 | // return pointer to current solution |
| 234 | getSolution: function() { |
| 235 | return this.Y; |
| 236 | }, |
| 237 | |
| 238 | // perform a single step of optimization to improve the embedding |
| 239 | step: function() { |
| 240 | this.iter += 1; |
| 241 | var N = this.N; |
| 242 | |
| 243 | var cg = this.costGrad(this.Y); // evaluate gradient |
| 244 | var cost = cg.cost; |
| 245 | var grad = cg.grad; |
| 246 | |
| 247 | // perform gradient step |
| 248 | var ymean = zeros(this.dim); |
| 249 | for(var i=0;i<N;i++) { |
| 250 | for(var d=0;d<this.dim;d++) { |
| 251 | var gid = grad[i][d]; |
| 252 | var sid = this.ystep[i][d]; |
| 253 | var gainid = this.gains[i][d]; |
| 254 | |
| 255 | // compute gain update |
| 256 | var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2; |
| 257 | if(gainid < 0.01) gainid = 0.01; // clamp |
| 258 | this.gains[i][d] = newgain; // store for next turn |
| 259 | |
| 260 | // compute momentum step direction |
| 261 | var momval = this.iter < 250 ? 0.5 : 0.8; |
| 262 | var newsid = momval * sid - this.epsilon * newgain * grad[i][d]; |
| 263 | this.ystep[i][d] = newsid; // remember the step we took |
| 264 | |
| 265 | // step! |
| 266 | this.Y[i][d] += newsid; |
| 267 | |
| 268 | ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | // reproject Y to be zero mean |
| 273 | for(var i=0;i<N;i++) { |
| 274 | for(var d=0;d<this.dim;d++) { |
| 275 | this.Y[i][d] -= ymean[d]/N; |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | //if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost); |
| 280 | return cost; // return current cost |
| 281 | }, |
| 282 | |
| 283 | // for debugging: gradient check |
| 284 | debugGrad: function() { |
| 285 | var N = this.N; |
| 286 | |
| 287 | var cg = this.costGrad(this.Y); // evaluate gradient |
| 288 | var cost = cg.cost; |
| 289 | var grad = cg.grad; |
| 290 | |
| 291 | var e = 1e-5; |
| 292 | for(var i=0;i<N;i++) { |
| 293 | for(var d=0;d<this.dim;d++) { |
| 294 | var yold = this.Y[i][d]; |
| 295 | |
| 296 | this.Y[i][d] = yold + e; |
| 297 | var cg0 = this.costGrad(this.Y); |
| 298 | |
| 299 | this.Y[i][d] = yold - e; |
| 300 | var cg1 = this.costGrad(this.Y); |
| 301 | |
| 302 | var analytic = grad[i][d]; |
| 303 | var numerical = (cg0.cost - cg1.cost) / ( 2 * e ); |
| 304 | console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical); |
| 305 | |
| 306 | this.Y[i][d] = yold; |
| 307 | } |
| 308 | } |
| 309 | }, |
| 310 | |
| 311 | // return cost and gradient, given an arrangement |
| 312 | costGrad: function(Y) { |
| 313 | var N = this.N; |
| 314 | var dim = this.dim; // dim of output space |
| 315 | var P = this.P; |
| 316 | |
| 317 | var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima |
| 318 | |
| 319 | // compute current Q distribution, unnormalized first |
| 320 | var Qu = zeros(N * N); |
| 321 | var qsum = 0.0; |
| 322 | for(var i=0;i<N;i++) { |
| 323 | for(var j=i+1;j<N;j++) { |
| 324 | var dsum = 0.0; |
| 325 | for(var d=0;d<dim;d++) { |
| 326 | var dhere = Y[i][d] - Y[j][d]; |
| 327 | dsum += dhere * dhere; |
| 328 | } |
| 329 | var qu = 1.0 / (1.0 + dsum); // Student t-distribution |
| 330 | Qu[i*N+j] = qu; |
| 331 | Qu[j*N+i] = qu; |
| 332 | qsum += 2 * qu; |
| 333 | } |
| 334 | } |
| 335 | // normalize Q distribution to sum to 1 |
| 336 | var NN = N*N; |
| 337 | var Q = zeros(NN); |
| 338 | for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); } |
| 339 | |
| 340 | var cost = 0.0; |
| 341 | var grad = []; |
| 342 | for(var i=0;i<N;i++) { |
| 343 | var gsum = new Array(dim); // init grad for point i |
| 344 | for(var d=0;d<dim;d++) { gsum[d] = 0.0; } |
| 345 | for(var j=0;j<N;j++) { |
| 346 | cost += - P[i*N+j] * Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...) |
| 347 | var premult = 4 * (pmul * P[i*N+j] - Q[i*N+j]) * Qu[i*N+j]; |
| 348 | for(var d=0;d<dim;d++) { |
| 349 | gsum[d] += premult * (Y[i][d] - Y[j][d]); |
| 350 | } |
| 351 | } |
| 352 | grad.push(gsum); |
| 353 | } |
| 354 | |
| 355 | return {cost: cost, grad: grad}; |
| 356 | } |
| 357 | } |
| 358 | |
| 359 | global.tSNE = tSNE; // export tSNE class |
| 360 | })(tsnejs); |
| 361 | |
| 362 | |
| 363 | // export the library to window, or to module in nodejs |
| 364 | (function(lib) { |
| 365 | "use strict"; |
| 366 | if (typeof module === "undefined" || typeof module.exports === "undefined") { |
| 367 | window.tsnejs = lib; // in ordinary browser attach library to window |
| 368 | } else { |
| 369 | module.exports = lib; // in nodejs |
| 370 | } |
| 371 | })(tsnejs); |