w2v-server: move js to folder
diff --git a/js/som.js b/js/som.js
new file mode 100644
index 0000000..ca16d28
--- /dev/null
+++ b/js/som.js
@@ -0,0 +1,286 @@
+// Javascript implementation pf Kohonen's Self Organizing Map
+// Based on http://www.ai-junkie.com/ann/som/som1.html
+
+var mapWidth = 800,
+ mapHeight = 800;
+
+function getDistance(weight, inputVector) {
+ var distance = 0;
+ for (var i = 0; i <weight.length; i++) {
+ distance += (inputVector[i] - weight[i]) * (inputVector[i] - weight[i]);
+ }
+ return Math.sqrt(distance);
+}
+
+function makeRandomWeights(vSize, eSize) {
+ var weights = [];
+ if(typeof ArrayBuffer === 'undefined') {
+ // lacking browser support
+ while (weights.length < vSize) {
+ var arr = new Array(eSize);
+ for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
+ weights.push(arr);
+ }
+ } else {
+ while (weights.length < vSize) {
+ var arr = new Float64Array(eSize);
+ for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
+ weights.push(arr);
+ }
+ }
+ return weights;
+}
+
+function getBMUIndex(weights, target) {
+ var BMUIndex = 0;
+ var bestScore = 99999;
+
+ for (i=0; i < weights.length; i++) {
+ distance = getDistance(weights[i], target);
+ if (distance < bestScore) {
+ bestScore = distance;
+ BMUIndex = i;
+ }
+ }
+ return BMUIndex;
+}
+
+function convertIndexToXY(idx, dimW) {
+ var x = parseInt(idx % dimW,10);
+ var y = parseInt((idx / dimW),10);
+ return [x,y];
+}
+
+
+function getEucledianDistance(coord1, coord2) {
+ return (coord1[0] - coord2[0]) * (coord1[0] - coord2[0]) + (coord1[1] - coord2[1]) * (coord1[1] - coord2[1]);
+}
+
+// utilitity that creates contiguous vector of zeros of size n
+ var zeros = function(n) {
+ if(typeof(n)==='undefined' || isNaN(n)) { return []; }
+ if(typeof ArrayBuffer === 'undefined') {
+ // lacking browser support
+ var arr = new Array(n);
+ for(var i=0;i<n;i++) { arr[i]= 0; }
+ return arr;
+ } else {
+ return new Float64Array(n); // typed arrays are faster
+ }
+ }
+
+// compute L2 distance between two vectors
+var L2 = function(x1, x2) {
+ var D = x1.length;
+ var d = 0;
+ for(var i=0;i<D;i++) {
+ var x1i = x1[i];
+ var x2i = x2[i];
+ d += (x1i-x2i)*(x1i-x2i);
+ }
+ return d;
+}
+
+// compute pairwise distance in all vectors in X
+var xtod = function(X) {
+ var N = X.length;
+ var dist = zeros(N * N); // allocate contiguous array
+ for(var i=0;i<N;i++) {
+ for(var j=i+1;j<N;j++) {
+ var d = L2(X[i], X[j]);
+ dist[i*N+j] = d;
+ dist[j*N+i] = d;
+ }
+ }
+ return dist;
+}
+
+function dotproduct(a,b) {
+ var n = 0, lim = Math.min(a.length,b.length);
+ for (var i = 0; i < lim; i++) n += a[i] * b[i];
+ return n;
+ }
+
+function vecsum(a,b) {
+ var lim = a.length;
+ var sum = new Array(lim);
+ for (var i = 0; i < lim; i++) sum[i] = a[i] + b[i];
+ return sum;
+ }
+
+function norm2(a) {var sumsqr = 0; for (var i = 0; i < a.length; i++) sumsqr += a[i]*a[i]; return Math.sqrt(sumsqr);}
+
+function cosine_sim(x, y) {
+ xnorm = norm2(x);
+ if(!xnorm) return 0;
+ ynorm = norm2(y);
+ if(!ynorm) return 0;
+ return dotproduct(x, y) / (xnorm * ynorm);
+}
+
+function makeSOM(data, training_iterations) {
+ var dimW = 6;
+ var dimH = 6;
+
+ var radius = (dimW * dimH) / 2;
+ var learning_rate = 1;
+ var time_constant = training_iterations / Math.log(radius);
+ var inputs = xtod(data.vecs);
+ var dimI = data.vecs.length;
+ var weights = makeRandomWeights(dimW * dimH, dimI);
+ var radius_decaying = 0;
+ var learning_rate_decaying = 0;
+ var svg;
+ var no_targets = (data.target.match(/.[ |]+./g) || []).length+1;
+// var avg, avgsim1, avgsim2, minsim;
+ var refIndex;
+ var colorScale;
+
+ if(no_targets > 1) {
+ refIndex=1;
+ colorScale = d3.scale.linear()
+ .range(['green', 'yellow', 'red']) // or use hex values
+ .domain([-1, 0, 1]);
+
+ // avg = vecsum(inputs.slice(0, dimI), inputs.slice(dimI, 2*dimI));
+ // avgsim1 = cosine_sim(inputs.slice(0, dimI), avg);
+ // avgsim2 = cosine_sim(inputs.slice(dimI, 2*dimI), avg);
+
+ $("#somcolor2").css("background-color", colorScale(0));
+ $("#somcolor1").css("background-color", colorScale(-1));
+ $("#somcolor3").css("background-color", colorScale(1));
+ } else {
+ refIndex = data.words.length-1;
+ colorScale = d3.scale.linear()
+ .range(['white', 'red'])
+ .domain([-1, 1]);
+ $("#somcolor1").css("background-color", colorScale(1));
+ $("#somcolor3").css("background-color", colorScale(-1));
+ }
+
+ $("#somword1").html(data.words[0]);
+ $("#somword2").html(data.words[refIndex]);
+ minsim = cosine_sim(inputs.slice(0, dimI), inputs.slice(refIndex*dimI, (refIndex+1)*dimI));
+
+ var itdom = document.getElementById("iterations");
+
+ var div = d3.select("#som2");
+
+ data.coords = [];
+ for(var i=0; i< data.words.length; i++) {
+ data.coords[i] = [Math.floor(dimW/2), Math.floor(dimH/2)];
+ }
+
+ svg = div.append("svg")
+ .attr("width", mapWidth)
+ .attr("height", mapHeight);
+
+ var rects = svg.selectAll(".r")
+ .data(weights)
+ .enter().append("rect")
+ .attr("class", "r")
+ .attr("width", mapWidth/dimW)
+ .attr("height", mapHeight/dimH)
+ .attr("fill", "white")
+ .attr("z-index", "-1")
+ .attr("x", function(d, i) { return (i % dimW) * (mapWidth/dimW);})
+ .attr("y", function(d, i) { return (Math.floor(i / dimW) * (mapWidth/dimW)); })
+
+
+ var g = svg.selectAll(".b")
+ .data(data.words)
+ .enter().append("g")
+ .attr("class", "u");
+ g.append("a")
+ .attr("xlink:href", function(word) {return data.urlprefix+word;})
+ .attr("title", function(d, i) {
+ return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
+ })
+ .append("text")
+ .attr("text-anchor", "bottom")
+ .attr("font-size", 12)
+ .attr("fill", function(d) {
+ if(data.target.indexOf(" "+d+" ") >= 0) {
+ return "blue";
+ } else {
+ return "#333"
+ }
+ })
+ .text(function(d) { return d; });
+
+ var som_interval = setInterval(somStep, 0);
+ var it=0;
+
+ function updateSOM() {
+ var oc = [];
+ for(var x = 0; x < dimW; x++) {
+ for(var y = 0; y < dimH; y++) {
+ oc[y*dimW+x]=1;
+ }
+ }
+ svg.selectAll('.u')
+ .data(data.coords)
+ .transition()
+ .attr("transform", function(d, i) {
+ return "translate(" +
+ (d[0]*(mapWidth/dimW)+4) + "," +
+ (d[1]*(mapHeight/dimH)+oc[d[1]*dimW+d[0]]++*14+4) + ")"; });
+
+ var colorFun = function(d, i) {
+ var sim1=cosine_sim(d, inputs.slice(0, dimI));
+ var sim2=cosine_sim(d, inputs.slice(dimI, 2*dimI));
+ var col;
+// col = (sim1-avgsim1)/(1-avgsim1)-(sim2-avgsim2)/(1-avgsim2);
+ col = (sim2-sim1)/(1-minsim);
+// console.log(Math.floor(i/dimW)+","+i%dimW+":"+(sim1-minsim)/(1-minsim)+ " " + (sim2-minsim)/(1-minsim) + "--> "+ col);
+ if(col > 1) col=1;
+ if(col < -1) col=-1;
+ return colorScale(col);
+ };
+
+ if(it>training_iterations*.6) {
+ svg.selectAll(".r")
+ .data(weights)
+ .transition()
+ .attr("fill", colorFun);
+ }
+ }
+
+ function somStep() {
+ if(it++ >= training_iterations) {
+ updateSOM();
+ clearInterval(som_interval);
+ return;
+ }
+ itdom.innerHTML = it;
+ radius_decaying = radius * Math.exp(-it/time_constant);
+ learning_rate_decaying = learning_rate * Math.exp(-it/time_constant);
+ //learning_rate_decaying = learning_rate * Math.exp(-it/training_iterations);
+
+ //pick a random input to train
+ var current=Math.floor(Math.random()*dimI)
+ var iv = inputs.slice(current*dimI, (current+1)*dimI);
+ // Determine the BMU
+ BMUIdx = getBMUIndex(weights, iv);
+ var coord1 = convertIndexToXY(BMUIdx, dimW);
+ data.coords[current] = coord1;
+ var widthSq = radius_decaying * radius_decaying;
+ for (var v in weights) {
+ var coord2 = convertIndexToXY(v, dimW);
+ var dist = getEucledianDistance(coord1, coord2);
+ // Determine if the weight is within the training radius
+ if (dist < widthSq) {
+ // console.log(dist, learning_rate_decaying, radius_decaying, it);
+ influence = Math.exp(-dist/(2*widthSq));
+ for (vidx = 0;vidx<weights[v].length;vidx++) {
+ weights[v][vidx] += influence * learning_rate_decaying * (iv[vidx] - weights[v][vidx]);
+ }
+ }
+ }
+// }
+ if(it % 10 == 0) {
+ updateSOM();
+ }
+ }
+
+}