| // Javascript implementation pf Kohonen's Self Organizing Map |
| // Based on http://www.ai-junkie.com/ann/som/som1.html |
| |
| var mapWidth = 800, |
| mapHeight = 800; |
| |
| function getDistance(weight, inputVector) { |
| var distance = 0; |
| for (var i = 0; i <weight.length; i++) { |
| distance += (inputVector[i] - weight[i]) * (inputVector[i] - weight[i]); |
| } |
| return Math.sqrt(distance); |
| } |
| |
| function makeRandomWeights(vSize, eSize) { |
| var weights = []; |
| if(typeof ArrayBuffer === 'undefined') { |
| // lacking browser support |
| while (weights.length < vSize) { |
| var arr = new Array(eSize); |
| for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); } |
| weights.push(arr); |
| } |
| } else { |
| while (weights.length < vSize) { |
| var arr = new Float64Array(eSize); |
| for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); } |
| weights.push(arr); |
| } |
| } |
| return weights; |
| } |
| |
| function getBMUIndex(weights, target) { |
| var BMUIndex = 0; |
| var bestScore = 99999; |
| |
| for (i=0; i < weights.length; i++) { |
| distance = getDistance(weights[i], target); |
| if (distance < bestScore) { |
| bestScore = distance; |
| BMUIndex = i; |
| } |
| } |
| return BMUIndex; |
| } |
| |
| function convertIndexToXY(idx, dimW) { |
| var x = parseInt(idx % dimW,10); |
| var y = parseInt((idx / dimW),10); |
| return [x,y]; |
| } |
| |
| |
| function getEucledianDistance(coord1, coord2) { |
| return (coord1[0] - coord2[0]) * (coord1[0] - coord2[0]) + (coord1[1] - coord2[1]) * (coord1[1] - coord2[1]); |
| } |
| |
| // utilitity that creates contiguous vector of zeros of size n |
| var zeros = function(n) { |
| if(typeof(n)==='undefined' || isNaN(n)) { return []; } |
| if(typeof ArrayBuffer === 'undefined') { |
| // lacking browser support |
| var arr = new Array(n); |
| for(var i=0;i<n;i++) { arr[i]= 0; } |
| return arr; |
| } else { |
| return new Float64Array(n); // typed arrays are faster |
| } |
| } |
| |
| // compute L2 distance between two vectors |
| var L2 = function(x1, x2) { |
| var D = x1.length; |
| var d = 0; |
| for(var i=0;i<D;i++) { |
| var x1i = x1[i]; |
| var x2i = x2[i]; |
| d += (x1i-x2i)*(x1i-x2i); |
| } |
| return d; |
| } |
| |
| // compute pairwise distance in all vectors in X |
| var xtod = function(X) { |
| var N = X.length; |
| var dist = zeros(N * N); // allocate contiguous array |
| for(var i=0;i<N;i++) { |
| for(var j=i+1;j<N;j++) { |
| var d = L2(X[i], X[j]); |
| dist[i*N+j] = d; |
| dist[j*N+i] = d; |
| } |
| } |
| return dist; |
| } |
| |
| function dotproduct(a,b) { |
| var n = 0, lim = Math.min(a.length,b.length); |
| for (var i = 0; i < lim; i++) n += a[i] * b[i]; |
| return n; |
| } |
| |
| function vecsum(a,b) { |
| var lim = a.length; |
| var sum = new Array(lim); |
| for (var i = 0; i < lim; i++) sum[i] = a[i] + b[i]; |
| return sum; |
| } |
| |
| function norm2(a) {var sumsqr = 0; for (var i = 0; i < a.length; i++) sumsqr += a[i]*a[i]; return Math.sqrt(sumsqr);} |
| |
| function cosine_sim(x, y) { |
| xnorm = norm2(x); |
| if(!xnorm) return 0; |
| ynorm = norm2(y); |
| if(!ynorm) return 0; |
| return dotproduct(x, y) / (xnorm * ynorm); |
| } |
| |
| function makeSOM(data, training_iterations) { |
| var dimW = 6; |
| var dimH = 6; |
| |
| var radius = (dimW * dimH) / 2; |
| var learning_rate = 1; |
| var time_constant = training_iterations / Math.log(radius); |
| var inputs = xtod(data.vecs); |
| var dimI = data.vecs.length; |
| var weights = makeRandomWeights(dimW * dimH, dimI); |
| var radius_decaying = 0; |
| var learning_rate_decaying = 0; |
| var svg; |
| var no_targets = (data.target.match(/.[ |]+./g) || []).length+1; |
| // var avg, avgsim1, avgsim2, minsim; |
| var refIndex; |
| var colorScale; |
| var urlprefix = new URLSearchParams(window.location.search); |
| urlprefix.delete("word"); |
| urlprefix.append("word",""); |
| |
| if(no_targets > 1) { |
| refIndex=1; |
| colorScale = d3.scale.linear() |
| .range(['green', 'yellow', 'red']) // or use hex values |
| .domain([-1, 0, 1]); |
| |
| // avg = vecsum(inputs.slice(0, dimI), inputs.slice(dimI, 2*dimI)); |
| // avgsim1 = cosine_sim(inputs.slice(0, dimI), avg); |
| // avgsim2 = cosine_sim(inputs.slice(dimI, 2*dimI), avg); |
| |
| $("#somcolor2").css("background-color", colorScale(0)); |
| $("#somcolor1").css("background-color", colorScale(-1)); |
| $("#somcolor3").css("background-color", colorScale(1)); |
| } else { |
| refIndex = data.words.length-1; |
| colorScale = d3.scale.linear() |
| .range(['white', 'red']) |
| .domain([-1, 1]); |
| $("#somcolor1").css("background-color", colorScale(1)); |
| $("#somcolor3").css("background-color", colorScale(-1)); |
| } |
| |
| $("#somword1").html(data.words[0]); |
| $("#somword2").html(data.words[refIndex]); |
| minsim = cosine_sim(inputs.slice(0, dimI), inputs.slice(refIndex*dimI, (refIndex+1)*dimI)); |
| |
| var itdom = document.getElementById("iterations"); |
| |
| var div = d3.select("#som2"); |
| |
| data.coords = []; |
| for(var i=0; i< data.words.length; i++) { |
| data.coords[i] = [Math.floor(dimW/2), Math.floor(dimH/2)]; |
| } |
| |
| svg = div.append("svg") |
| .attr("width", mapWidth) |
| .attr("height", mapHeight); |
| |
| var rects = svg.selectAll(".r") |
| .data(weights) |
| .enter().append("rect") |
| .attr("class", "r") |
| .attr("width", mapWidth/dimW) |
| .attr("height", mapHeight/dimH) |
| .attr("fill", "white") |
| .attr("z-index", "-1") |
| .attr("x", function(d, i) { return (i % dimW) * (mapWidth/dimW);}) |
| .attr("y", function(d, i) { return (Math.floor(i / dimW) * (mapWidth/dimW)); }) |
| |
| |
| var g = svg.selectAll(".b") |
| .data(data.words) |
| .enter().append("g") |
| .attr("class", "u"); |
| |
| g.append("svg:title") |
| .text(function(d, i) { |
| return "distance rank: "+ i +" "+"\nfrequency rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ","); |
| }); |
| |
| g.append("a") |
| .attr("xlink:href", function(word) {return "?"+urlprefix+word;}) |
| .append("text") |
| .attr("text-anchor", "bottom") |
| .attr("font-size", 12) |
| .attr("fill", function(d) { |
| if(data.target.indexOf(" "+d+" ") >= 0) { |
| return "blue"; |
| } else { |
| return "#333" |
| } |
| }) |
| .text(function(d) { return d; }); |
| |
| $('g.u a, g.tsnet a').on('mousedown', function(e) { |
| if (e.which === 2) { |
| e.preventDefault(); |
| e.stopPropagation(); |
| console.log("middle button clicked " + this.childNodes["0"].textContent); |
| queryKorAPCII(this.childNodes["0"].textContent); |
| return false; |
| } |
| }); |
| |
| var som_interval = setInterval(somStep, 0); |
| var it=0; |
| |
| function updateSOM() { |
| var oc = []; |
| for(var x = 0; x < dimW; x++) { |
| for(var y = 0; y < dimH; y++) { |
| oc[y*dimW+x]=1; |
| } |
| } |
| svg.selectAll('.u') |
| .data(data.coords) |
| .transition() |
| .attr("transform", function(d, i) { |
| return "translate(" + |
| (d[0]*(mapWidth/dimW)+4) + "," + |
| (d[1]*(mapHeight/dimH)+oc[d[1]*dimW+d[0]]++*14+4) + ")"; }); |
| |
| var colorFun = function(d, i) { |
| var sim1=cosine_sim(d, inputs.slice(0, dimI)); |
| var sim2=cosine_sim(d, inputs.slice(dimI, 2*dimI)); |
| var col; |
| // col = (sim1-avgsim1)/(1-avgsim1)-(sim2-avgsim2)/(1-avgsim2); |
| col = (sim2-sim1)/(1-minsim); |
| // console.log(Math.floor(i/dimW)+","+i%dimW+":"+(sim1-minsim)/(1-minsim)+ " " + (sim2-minsim)/(1-minsim) + "--> "+ col); |
| if(col > 1) col=1; |
| if(col < -1) col=-1; |
| return colorScale(col); |
| }; |
| |
| if(it>training_iterations*.6) { |
| svg.selectAll(".r") |
| .data(weights) |
| .transition() |
| .attr("fill", colorFun); |
| } |
| } |
| |
| function somStep() { |
| if(it++ >= training_iterations) { |
| updateSOM(); |
| clearInterval(som_interval); |
| return; |
| } |
| itdom.innerHTML = it; |
| radius_decaying = radius * Math.exp(-it/time_constant); |
| learning_rate_decaying = learning_rate * Math.exp(-it/time_constant); |
| //learning_rate_decaying = learning_rate * Math.exp(-it/training_iterations); |
| |
| //pick a random input to train |
| var current=Math.floor(Math.random()*dimI) |
| var iv = inputs.slice(current*dimI, (current+1)*dimI); |
| // Determine the BMU |
| BMUIdx = getBMUIndex(weights, iv); |
| var coord1 = convertIndexToXY(BMUIdx, dimW); |
| data.coords[current] = coord1; |
| var widthSq = radius_decaying * radius_decaying; |
| for (var v in weights) { |
| var coord2 = convertIndexToXY(v, dimW); |
| var dist = getEucledianDistance(coord1, coord2); |
| // Determine if the weight is within the training radius |
| if (dist < widthSq) { |
| // console.log(dist, learning_rate_decaying, radius_decaying, it); |
| influence = Math.exp(-dist/(2*widthSq)); |
| for (vidx = 0;vidx<weights[v].length;vidx++) { |
| weights[v][vidx] += influence * learning_rate_decaying * (iv[vidx] - weights[v][vidx]); |
| } |
| } |
| } |
| // } |
| if(it % 10 == 0) { |
| updateSOM(); |
| } |
| } |
| |
| } |