Marc Kupietz | adaa163 | 2017-07-04 14:10:29 +0200 | [diff] [blame] | 1 | // Javascript implementation pf Kohonen's Self Organizing Map |
| 2 | // Based on http://www.ai-junkie.com/ann/som/som1.html |
| 3 | |
| 4 | var mapWidth = 800, |
| 5 | mapHeight = 800; |
| 6 | |
| 7 | function getDistance(weight, inputVector) { |
| 8 | var distance = 0; |
| 9 | for (var i = 0; i <weight.length; i++) { |
| 10 | distance += (inputVector[i] - weight[i]) * (inputVector[i] - weight[i]); |
| 11 | } |
| 12 | return Math.sqrt(distance); |
| 13 | } |
| 14 | |
| 15 | function makeRandomWeights(vSize, eSize) { |
| 16 | var weights = []; |
| 17 | if(typeof ArrayBuffer === 'undefined') { |
| 18 | // lacking browser support |
| 19 | while (weights.length < vSize) { |
| 20 | var arr = new Array(eSize); |
| 21 | for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); } |
| 22 | weights.push(arr); |
| 23 | } |
| 24 | } else { |
| 25 | while (weights.length < vSize) { |
| 26 | var arr = new Float64Array(eSize); |
| 27 | for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); } |
| 28 | weights.push(arr); |
| 29 | } |
| 30 | } |
| 31 | return weights; |
| 32 | } |
| 33 | |
| 34 | function getBMUIndex(weights, target) { |
| 35 | var BMUIndex = 0; |
| 36 | var bestScore = 99999; |
| 37 | |
| 38 | for (i=0; i < weights.length; i++) { |
| 39 | distance = getDistance(weights[i], target); |
| 40 | if (distance < bestScore) { |
| 41 | bestScore = distance; |
| 42 | BMUIndex = i; |
| 43 | } |
| 44 | } |
| 45 | return BMUIndex; |
| 46 | } |
| 47 | |
| 48 | function convertIndexToXY(idx, dimW) { |
| 49 | var x = parseInt(idx % dimW,10); |
| 50 | var y = parseInt((idx / dimW),10); |
| 51 | return [x,y]; |
| 52 | } |
| 53 | |
| 54 | |
| 55 | function getEucledianDistance(coord1, coord2) { |
| 56 | return (coord1[0] - coord2[0]) * (coord1[0] - coord2[0]) + (coord1[1] - coord2[1]) * (coord1[1] - coord2[1]); |
| 57 | } |
| 58 | |
| 59 | // utilitity that creates contiguous vector of zeros of size n |
| 60 | var zeros = function(n) { |
| 61 | if(typeof(n)==='undefined' || isNaN(n)) { return []; } |
| 62 | if(typeof ArrayBuffer === 'undefined') { |
| 63 | // lacking browser support |
| 64 | var arr = new Array(n); |
| 65 | for(var i=0;i<n;i++) { arr[i]= 0; } |
| 66 | return arr; |
| 67 | } else { |
| 68 | return new Float64Array(n); // typed arrays are faster |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | // compute L2 distance between two vectors |
| 73 | var L2 = function(x1, x2) { |
| 74 | var D = x1.length; |
| 75 | var d = 0; |
| 76 | for(var i=0;i<D;i++) { |
| 77 | var x1i = x1[i]; |
| 78 | var x2i = x2[i]; |
| 79 | d += (x1i-x2i)*(x1i-x2i); |
| 80 | } |
| 81 | return d; |
| 82 | } |
| 83 | |
| 84 | // compute pairwise distance in all vectors in X |
| 85 | var xtod = function(X) { |
| 86 | var N = X.length; |
| 87 | var dist = zeros(N * N); // allocate contiguous array |
| 88 | for(var i=0;i<N;i++) { |
| 89 | for(var j=i+1;j<N;j++) { |
| 90 | var d = L2(X[i], X[j]); |
| 91 | dist[i*N+j] = d; |
| 92 | dist[j*N+i] = d; |
| 93 | } |
| 94 | } |
| 95 | return dist; |
| 96 | } |
| 97 | |
| 98 | function dotproduct(a,b) { |
| 99 | var n = 0, lim = Math.min(a.length,b.length); |
| 100 | for (var i = 0; i < lim; i++) n += a[i] * b[i]; |
| 101 | return n; |
| 102 | } |
| 103 | |
| 104 | function vecsum(a,b) { |
| 105 | var lim = a.length; |
| 106 | var sum = new Array(lim); |
| 107 | for (var i = 0; i < lim; i++) sum[i] = a[i] + b[i]; |
| 108 | return sum; |
| 109 | } |
| 110 | |
| 111 | function norm2(a) {var sumsqr = 0; for (var i = 0; i < a.length; i++) sumsqr += a[i]*a[i]; return Math.sqrt(sumsqr);} |
| 112 | |
| 113 | function cosine_sim(x, y) { |
| 114 | xnorm = norm2(x); |
| 115 | if(!xnorm) return 0; |
| 116 | ynorm = norm2(y); |
| 117 | if(!ynorm) return 0; |
| 118 | return dotproduct(x, y) / (xnorm * ynorm); |
| 119 | } |
| 120 | |
| 121 | function makeSOM(data, training_iterations) { |
| 122 | var dimW = 6; |
| 123 | var dimH = 6; |
| 124 | |
| 125 | var radius = (dimW * dimH) / 2; |
| 126 | var learning_rate = 1; |
| 127 | var time_constant = training_iterations / Math.log(radius); |
| 128 | var inputs = xtod(data.vecs); |
| 129 | var dimI = data.vecs.length; |
| 130 | var weights = makeRandomWeights(dimW * dimH, dimI); |
| 131 | var radius_decaying = 0; |
| 132 | var learning_rate_decaying = 0; |
| 133 | var svg; |
| 134 | var no_targets = (data.target.match(/.[ |]+./g) || []).length+1; |
| 135 | // var avg, avgsim1, avgsim2, minsim; |
| 136 | var refIndex; |
| 137 | var colorScale; |
Marc Kupietz | 8f9c86a | 2017-12-04 17:17:13 +0100 | [diff] [blame] | 138 | var urlprefix = new URLSearchParams(window.location.search); |
| 139 | urlprefix.delete("word"); |
| 140 | urlprefix.append("word",""); |
| 141 | |
Marc Kupietz | adaa163 | 2017-07-04 14:10:29 +0200 | [diff] [blame] | 142 | if(no_targets > 1) { |
| 143 | refIndex=1; |
| 144 | colorScale = d3.scale.linear() |
| 145 | .range(['green', 'yellow', 'red']) // or use hex values |
| 146 | .domain([-1, 0, 1]); |
| 147 | |
| 148 | // avg = vecsum(inputs.slice(0, dimI), inputs.slice(dimI, 2*dimI)); |
| 149 | // avgsim1 = cosine_sim(inputs.slice(0, dimI), avg); |
| 150 | // avgsim2 = cosine_sim(inputs.slice(dimI, 2*dimI), avg); |
| 151 | |
| 152 | $("#somcolor2").css("background-color", colorScale(0)); |
| 153 | $("#somcolor1").css("background-color", colorScale(-1)); |
| 154 | $("#somcolor3").css("background-color", colorScale(1)); |
| 155 | } else { |
| 156 | refIndex = data.words.length-1; |
| 157 | colorScale = d3.scale.linear() |
| 158 | .range(['white', 'red']) |
| 159 | .domain([-1, 1]); |
| 160 | $("#somcolor1").css("background-color", colorScale(1)); |
| 161 | $("#somcolor3").css("background-color", colorScale(-1)); |
| 162 | } |
| 163 | |
| 164 | $("#somword1").html(data.words[0]); |
| 165 | $("#somword2").html(data.words[refIndex]); |
| 166 | minsim = cosine_sim(inputs.slice(0, dimI), inputs.slice(refIndex*dimI, (refIndex+1)*dimI)); |
| 167 | |
| 168 | var itdom = document.getElementById("iterations"); |
| 169 | |
| 170 | var div = d3.select("#som2"); |
| 171 | |
| 172 | data.coords = []; |
| 173 | for(var i=0; i< data.words.length; i++) { |
| 174 | data.coords[i] = [Math.floor(dimW/2), Math.floor(dimH/2)]; |
| 175 | } |
| 176 | |
| 177 | svg = div.append("svg") |
| 178 | .attr("width", mapWidth) |
| 179 | .attr("height", mapHeight); |
| 180 | |
| 181 | var rects = svg.selectAll(".r") |
| 182 | .data(weights) |
| 183 | .enter().append("rect") |
| 184 | .attr("class", "r") |
| 185 | .attr("width", mapWidth/dimW) |
| 186 | .attr("height", mapHeight/dimH) |
| 187 | .attr("fill", "white") |
| 188 | .attr("z-index", "-1") |
| 189 | .attr("x", function(d, i) { return (i % dimW) * (mapWidth/dimW);}) |
| 190 | .attr("y", function(d, i) { return (Math.floor(i / dimW) * (mapWidth/dimW)); }) |
| 191 | |
| 192 | |
| 193 | var g = svg.selectAll(".b") |
| 194 | .data(data.words) |
| 195 | .enter().append("g") |
| 196 | .attr("class", "u"); |
Marc Kupietz | 6c8b59a | 2018-03-12 12:26:25 +0100 | [diff] [blame] | 197 | |
| 198 | g.append("svg:title") |
| 199 | .text(function(d, i) { |
| 200 | return "distance rank: "+ i +" "+"\nfrequency rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ","); |
| 201 | }); |
| 202 | |
| 203 | g.append("a") |
Marc Kupietz | 8f9c86a | 2017-12-04 17:17:13 +0100 | [diff] [blame] | 204 | .attr("xlink:href", function(word) {return "?"+urlprefix+word;}) |
Marc Kupietz | adaa163 | 2017-07-04 14:10:29 +0200 | [diff] [blame] | 205 | .append("text") |
| 206 | .attr("text-anchor", "bottom") |
| 207 | .attr("font-size", 12) |
Marc Kupietz | 4750d87 | 2019-02-20 09:43:53 +0100 | [diff] [blame^] | 208 | .attr("class", function(d, i) { |
| 209 | var c="somtext"; |
| 210 | if(data.target.indexOf(" "+d+" ") >= 0) { |
| 211 | c += " target"; |
| 212 | } |
| 213 | if(data.ranks[i] >= data.mergedEnd) { |
| 214 | c += " merged"; |
| 215 | } |
| 216 | return c; |
| 217 | }) |
Marc Kupietz | adaa163 | 2017-07-04 14:10:29 +0200 | [diff] [blame] | 218 | .text(function(d) { return d; }); |
| 219 | |
Marc Kupietz | b347c9a | 2018-03-19 16:47:10 +0100 | [diff] [blame] | 220 | $('g.u a, g.tsnet a, td.paradigmator a').on('mousedown', function(e) { |
Marc Kupietz | b7fdd70 | 2018-03-12 12:25:32 +0100 | [diff] [blame] | 221 | if (e.which === 2) { |
| 222 | e.preventDefault(); |
| 223 | e.stopPropagation(); |
| 224 | console.log("middle button clicked " + this.childNodes["0"].textContent); |
| 225 | queryKorAPCII(this.childNodes["0"].textContent); |
| 226 | return false; |
| 227 | } |
| 228 | }); |
| 229 | |
Marc Kupietz | adaa163 | 2017-07-04 14:10:29 +0200 | [diff] [blame] | 230 | var som_interval = setInterval(somStep, 0); |
| 231 | var it=0; |
| 232 | |
| 233 | function updateSOM() { |
| 234 | var oc = []; |
| 235 | for(var x = 0; x < dimW; x++) { |
| 236 | for(var y = 0; y < dimH; y++) { |
| 237 | oc[y*dimW+x]=1; |
| 238 | } |
| 239 | } |
| 240 | svg.selectAll('.u') |
| 241 | .data(data.coords) |
| 242 | .transition() |
| 243 | .attr("transform", function(d, i) { |
| 244 | return "translate(" + |
| 245 | (d[0]*(mapWidth/dimW)+4) + "," + |
| 246 | (d[1]*(mapHeight/dimH)+oc[d[1]*dimW+d[0]]++*14+4) + ")"; }); |
| 247 | |
| 248 | var colorFun = function(d, i) { |
| 249 | var sim1=cosine_sim(d, inputs.slice(0, dimI)); |
| 250 | var sim2=cosine_sim(d, inputs.slice(dimI, 2*dimI)); |
| 251 | var col; |
| 252 | // col = (sim1-avgsim1)/(1-avgsim1)-(sim2-avgsim2)/(1-avgsim2); |
| 253 | col = (sim2-sim1)/(1-minsim); |
| 254 | // console.log(Math.floor(i/dimW)+","+i%dimW+":"+(sim1-minsim)/(1-minsim)+ " " + (sim2-minsim)/(1-minsim) + "--> "+ col); |
| 255 | if(col > 1) col=1; |
| 256 | if(col < -1) col=-1; |
| 257 | return colorScale(col); |
| 258 | }; |
| 259 | |
| 260 | if(it>training_iterations*.6) { |
| 261 | svg.selectAll(".r") |
| 262 | .data(weights) |
| 263 | .transition() |
| 264 | .attr("fill", colorFun); |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | function somStep() { |
| 269 | if(it++ >= training_iterations) { |
| 270 | updateSOM(); |
| 271 | clearInterval(som_interval); |
| 272 | return; |
| 273 | } |
| 274 | itdom.innerHTML = it; |
| 275 | radius_decaying = radius * Math.exp(-it/time_constant); |
| 276 | learning_rate_decaying = learning_rate * Math.exp(-it/time_constant); |
| 277 | //learning_rate_decaying = learning_rate * Math.exp(-it/training_iterations); |
| 278 | |
| 279 | //pick a random input to train |
| 280 | var current=Math.floor(Math.random()*dimI) |
| 281 | var iv = inputs.slice(current*dimI, (current+1)*dimI); |
| 282 | // Determine the BMU |
| 283 | BMUIdx = getBMUIndex(weights, iv); |
| 284 | var coord1 = convertIndexToXY(BMUIdx, dimW); |
| 285 | data.coords[current] = coord1; |
| 286 | var widthSq = radius_decaying * radius_decaying; |
| 287 | for (var v in weights) { |
| 288 | var coord2 = convertIndexToXY(v, dimW); |
| 289 | var dist = getEucledianDistance(coord1, coord2); |
| 290 | // Determine if the weight is within the training radius |
| 291 | if (dist < widthSq) { |
| 292 | // console.log(dist, learning_rate_decaying, radius_decaying, it); |
| 293 | influence = Math.exp(-dist/(2*widthSq)); |
| 294 | for (vidx = 0;vidx<weights[v].length;vidx++) { |
| 295 | weights[v][vidx] += influence * learning_rate_decaying * (iv[vidx] - weights[v][vidx]); |
| 296 | } |
| 297 | } |
| 298 | } |
| 299 | // } |
| 300 | if(it % 10 == 0) { |
| 301 | updateSOM(); |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | } |