blob: e496d367fbaecc13480ea6a04a8a114d569e9bde [file] [log] [blame]
Marc Kupietzadaa1632017-07-04 14:10:29 +02001// Javascript implementation pf Kohonen's Self Organizing Map
2// Based on http://www.ai-junkie.com/ann/som/som1.html
3
4var mapWidth = 800,
5 mapHeight = 800;
6
7function getDistance(weight, inputVector) {
8 var distance = 0;
9 for (var i = 0; i <weight.length; i++) {
10 distance += (inputVector[i] - weight[i]) * (inputVector[i] - weight[i]);
11 }
12 return Math.sqrt(distance);
13}
14
15function makeRandomWeights(vSize, eSize) {
16 var weights = [];
17 if(typeof ArrayBuffer === 'undefined') {
18 // lacking browser support
19 while (weights.length < vSize) {
20 var arr = new Array(eSize);
21 for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
22 weights.push(arr);
23 }
24 } else {
25 while (weights.length < vSize) {
26 var arr = new Float64Array(eSize);
27 for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
28 weights.push(arr);
29 }
30 }
31 return weights;
32}
33
34function getBMUIndex(weights, target) {
35 var BMUIndex = 0;
36 var bestScore = 99999;
37
38 for (i=0; i < weights.length; i++) {
39 distance = getDistance(weights[i], target);
40 if (distance < bestScore) {
41 bestScore = distance;
42 BMUIndex = i;
43 }
44 }
45 return BMUIndex;
46}
47
48function convertIndexToXY(idx, dimW) {
49 var x = parseInt(idx % dimW,10);
50 var y = parseInt((idx / dimW),10);
51 return [x,y];
52}
53
54
55function getEucledianDistance(coord1, coord2) {
56 return (coord1[0] - coord2[0]) * (coord1[0] - coord2[0]) + (coord1[1] - coord2[1]) * (coord1[1] - coord2[1]);
57}
58
59// utilitity that creates contiguous vector of zeros of size n
60 var zeros = function(n) {
61 if(typeof(n)==='undefined' || isNaN(n)) { return []; }
62 if(typeof ArrayBuffer === 'undefined') {
63 // lacking browser support
64 var arr = new Array(n);
65 for(var i=0;i<n;i++) { arr[i]= 0; }
66 return arr;
67 } else {
68 return new Float64Array(n); // typed arrays are faster
69 }
70 }
71
72// compute L2 distance between two vectors
73var L2 = function(x1, x2) {
74 var D = x1.length;
75 var d = 0;
76 for(var i=0;i<D;i++) {
77 var x1i = x1[i];
78 var x2i = x2[i];
79 d += (x1i-x2i)*(x1i-x2i);
80 }
81 return d;
82}
83
84// compute pairwise distance in all vectors in X
85var xtod = function(X) {
86 var N = X.length;
87 var dist = zeros(N * N); // allocate contiguous array
88 for(var i=0;i<N;i++) {
89 for(var j=i+1;j<N;j++) {
90 var d = L2(X[i], X[j]);
91 dist[i*N+j] = d;
92 dist[j*N+i] = d;
93 }
94 }
95 return dist;
96}
97
98function dotproduct(a,b) {
99 var n = 0, lim = Math.min(a.length,b.length);
100 for (var i = 0; i < lim; i++) n += a[i] * b[i];
101 return n;
102 }
103
104function vecsum(a,b) {
105 var lim = a.length;
106 var sum = new Array(lim);
107 for (var i = 0; i < lim; i++) sum[i] = a[i] + b[i];
108 return sum;
109 }
110
111function norm2(a) {var sumsqr = 0; for (var i = 0; i < a.length; i++) sumsqr += a[i]*a[i]; return Math.sqrt(sumsqr);}
112
113function cosine_sim(x, y) {
114 xnorm = norm2(x);
115 if(!xnorm) return 0;
116 ynorm = norm2(y);
117 if(!ynorm) return 0;
118 return dotproduct(x, y) / (xnorm * ynorm);
119}
120
121function makeSOM(data, training_iterations) {
122 var dimW = 6;
123 var dimH = 6;
124
125 var radius = (dimW * dimH) / 2;
126 var learning_rate = 1;
127 var time_constant = training_iterations / Math.log(radius);
128 var inputs = xtod(data.vecs);
129 var dimI = data.vecs.length;
130 var weights = makeRandomWeights(dimW * dimH, dimI);
131 var radius_decaying = 0;
132 var learning_rate_decaying = 0;
133 var svg;
134 var no_targets = (data.target.match(/.[ |]+./g) || []).length+1;
135// var avg, avgsim1, avgsim2, minsim;
136 var refIndex;
137 var colorScale;
Marc Kupietz8f9c86a2017-12-04 17:17:13 +0100138 var urlprefix = new URLSearchParams(window.location.search);
139 urlprefix.delete("word");
140 urlprefix.append("word","");
141
Marc Kupietzadaa1632017-07-04 14:10:29 +0200142 if(no_targets > 1) {
143 refIndex=1;
144 colorScale = d3.scale.linear()
145 .range(['green', 'yellow', 'red']) // or use hex values
146 .domain([-1, 0, 1]);
147
148 // avg = vecsum(inputs.slice(0, dimI), inputs.slice(dimI, 2*dimI));
149 // avgsim1 = cosine_sim(inputs.slice(0, dimI), avg);
150 // avgsim2 = cosine_sim(inputs.slice(dimI, 2*dimI), avg);
151
152 $("#somcolor2").css("background-color", colorScale(0));
153 $("#somcolor1").css("background-color", colorScale(-1));
154 $("#somcolor3").css("background-color", colorScale(1));
155 } else {
156 refIndex = data.words.length-1;
157 colorScale = d3.scale.linear()
158 .range(['white', 'red'])
159 .domain([-1, 1]);
160 $("#somcolor1").css("background-color", colorScale(1));
161 $("#somcolor3").css("background-color", colorScale(-1));
162 }
163
164 $("#somword1").html(data.words[0]);
165 $("#somword2").html(data.words[refIndex]);
166 minsim = cosine_sim(inputs.slice(0, dimI), inputs.slice(refIndex*dimI, (refIndex+1)*dimI));
167
168 var itdom = document.getElementById("iterations");
169
170 var div = d3.select("#som2");
171
172 data.coords = [];
173 for(var i=0; i< data.words.length; i++) {
174 data.coords[i] = [Math.floor(dimW/2), Math.floor(dimH/2)];
175 }
176
177 svg = div.append("svg")
178 .attr("width", mapWidth)
179 .attr("height", mapHeight);
180
181 var rects = svg.selectAll(".r")
182 .data(weights)
183 .enter().append("rect")
184 .attr("class", "r")
185 .attr("width", mapWidth/dimW)
186 .attr("height", mapHeight/dimH)
187 .attr("fill", "white")
188 .attr("z-index", "-1")
189 .attr("x", function(d, i) { return (i % dimW) * (mapWidth/dimW);})
190 .attr("y", function(d, i) { return (Math.floor(i / dimW) * (mapWidth/dimW)); })
191
192
193 var g = svg.selectAll(".b")
194 .data(data.words)
195 .enter().append("g")
196 .attr("class", "u");
197 g.append("a")
Marc Kupietz8f9c86a2017-12-04 17:17:13 +0100198 .attr("xlink:href", function(word) {return "?"+urlprefix+word;})
Marc Kupietzadaa1632017-07-04 14:10:29 +0200199 .attr("title", function(d, i) {
200 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
201 })
202 .append("text")
203 .attr("text-anchor", "bottom")
204 .attr("font-size", 12)
205 .attr("fill", function(d) {
206 if(data.target.indexOf(" "+d+" ") >= 0) {
207 return "blue";
208 } else {
209 return "#333"
210 }
211 })
212 .text(function(d) { return d; });
213
214 var som_interval = setInterval(somStep, 0);
215 var it=0;
216
217 function updateSOM() {
218 var oc = [];
219 for(var x = 0; x < dimW; x++) {
220 for(var y = 0; y < dimH; y++) {
221 oc[y*dimW+x]=1;
222 }
223 }
224 svg.selectAll('.u')
225 .data(data.coords)
226 .transition()
227 .attr("transform", function(d, i) {
228 return "translate(" +
229 (d[0]*(mapWidth/dimW)+4) + "," +
230 (d[1]*(mapHeight/dimH)+oc[d[1]*dimW+d[0]]++*14+4) + ")"; });
231
232 var colorFun = function(d, i) {
233 var sim1=cosine_sim(d, inputs.slice(0, dimI));
234 var sim2=cosine_sim(d, inputs.slice(dimI, 2*dimI));
235 var col;
236// col = (sim1-avgsim1)/(1-avgsim1)-(sim2-avgsim2)/(1-avgsim2);
237 col = (sim2-sim1)/(1-minsim);
238// console.log(Math.floor(i/dimW)+","+i%dimW+":"+(sim1-minsim)/(1-minsim)+ " " + (sim2-minsim)/(1-minsim) + "--> "+ col);
239 if(col > 1) col=1;
240 if(col < -1) col=-1;
241 return colorScale(col);
242 };
243
244 if(it>training_iterations*.6) {
245 svg.selectAll(".r")
246 .data(weights)
247 .transition()
248 .attr("fill", colorFun);
249 }
250 }
251
252 function somStep() {
253 if(it++ >= training_iterations) {
254 updateSOM();
255 clearInterval(som_interval);
256 return;
257 }
258 itdom.innerHTML = it;
259 radius_decaying = radius * Math.exp(-it/time_constant);
260 learning_rate_decaying = learning_rate * Math.exp(-it/time_constant);
261 //learning_rate_decaying = learning_rate * Math.exp(-it/training_iterations);
262
263 //pick a random input to train
264 var current=Math.floor(Math.random()*dimI)
265 var iv = inputs.slice(current*dimI, (current+1)*dimI);
266 // Determine the BMU
267 BMUIdx = getBMUIndex(weights, iv);
268 var coord1 = convertIndexToXY(BMUIdx, dimW);
269 data.coords[current] = coord1;
270 var widthSq = radius_decaying * radius_decaying;
271 for (var v in weights) {
272 var coord2 = convertIndexToXY(v, dimW);
273 var dist = getEucledianDistance(coord1, coord2);
274 // Determine if the weight is within the training radius
275 if (dist < widthSq) {
276 // console.log(dist, learning_rate_decaying, radius_decaying, it);
277 influence = Math.exp(-dist/(2*widthSq));
278 for (vidx = 0;vidx<weights[v].length;vidx++) {
279 weights[v][vidx] += influence * learning_rate_decaying * (iv[vidx] - weights[v][vidx]);
280 }
281 }
282 }
283// }
284 if(it % 10 == 0) {
285 updateSOM();
286 }
287 }
288
289}