blob: ca16d280ec2c88e7f3429cb4c2368810e7142333 [file] [log] [blame]
Marc Kupietzadaa1632017-07-04 14:10:29 +02001// Javascript implementation pf Kohonen's Self Organizing Map
2// Based on http://www.ai-junkie.com/ann/som/som1.html
3
4var mapWidth = 800,
5 mapHeight = 800;
6
7function getDistance(weight, inputVector) {
8 var distance = 0;
9 for (var i = 0; i <weight.length; i++) {
10 distance += (inputVector[i] - weight[i]) * (inputVector[i] - weight[i]);
11 }
12 return Math.sqrt(distance);
13}
14
15function makeRandomWeights(vSize, eSize) {
16 var weights = [];
17 if(typeof ArrayBuffer === 'undefined') {
18 // lacking browser support
19 while (weights.length < vSize) {
20 var arr = new Array(eSize);
21 for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
22 weights.push(arr);
23 }
24 } else {
25 while (weights.length < vSize) {
26 var arr = new Float64Array(eSize);
27 for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
28 weights.push(arr);
29 }
30 }
31 return weights;
32}
33
34function getBMUIndex(weights, target) {
35 var BMUIndex = 0;
36 var bestScore = 99999;
37
38 for (i=0; i < weights.length; i++) {
39 distance = getDistance(weights[i], target);
40 if (distance < bestScore) {
41 bestScore = distance;
42 BMUIndex = i;
43 }
44 }
45 return BMUIndex;
46}
47
48function convertIndexToXY(idx, dimW) {
49 var x = parseInt(idx % dimW,10);
50 var y = parseInt((idx / dimW),10);
51 return [x,y];
52}
53
54
55function getEucledianDistance(coord1, coord2) {
56 return (coord1[0] - coord2[0]) * (coord1[0] - coord2[0]) + (coord1[1] - coord2[1]) * (coord1[1] - coord2[1]);
57}
58
59// utilitity that creates contiguous vector of zeros of size n
60 var zeros = function(n) {
61 if(typeof(n)==='undefined' || isNaN(n)) { return []; }
62 if(typeof ArrayBuffer === 'undefined') {
63 // lacking browser support
64 var arr = new Array(n);
65 for(var i=0;i<n;i++) { arr[i]= 0; }
66 return arr;
67 } else {
68 return new Float64Array(n); // typed arrays are faster
69 }
70 }
71
72// compute L2 distance between two vectors
73var L2 = function(x1, x2) {
74 var D = x1.length;
75 var d = 0;
76 for(var i=0;i<D;i++) {
77 var x1i = x1[i];
78 var x2i = x2[i];
79 d += (x1i-x2i)*(x1i-x2i);
80 }
81 return d;
82}
83
84// compute pairwise distance in all vectors in X
85var xtod = function(X) {
86 var N = X.length;
87 var dist = zeros(N * N); // allocate contiguous array
88 for(var i=0;i<N;i++) {
89 for(var j=i+1;j<N;j++) {
90 var d = L2(X[i], X[j]);
91 dist[i*N+j] = d;
92 dist[j*N+i] = d;
93 }
94 }
95 return dist;
96}
97
98function dotproduct(a,b) {
99 var n = 0, lim = Math.min(a.length,b.length);
100 for (var i = 0; i < lim; i++) n += a[i] * b[i];
101 return n;
102 }
103
104function vecsum(a,b) {
105 var lim = a.length;
106 var sum = new Array(lim);
107 for (var i = 0; i < lim; i++) sum[i] = a[i] + b[i];
108 return sum;
109 }
110
111function norm2(a) {var sumsqr = 0; for (var i = 0; i < a.length; i++) sumsqr += a[i]*a[i]; return Math.sqrt(sumsqr);}
112
113function cosine_sim(x, y) {
114 xnorm = norm2(x);
115 if(!xnorm) return 0;
116 ynorm = norm2(y);
117 if(!ynorm) return 0;
118 return dotproduct(x, y) / (xnorm * ynorm);
119}
120
121function makeSOM(data, training_iterations) {
122 var dimW = 6;
123 var dimH = 6;
124
125 var radius = (dimW * dimH) / 2;
126 var learning_rate = 1;
127 var time_constant = training_iterations / Math.log(radius);
128 var inputs = xtod(data.vecs);
129 var dimI = data.vecs.length;
130 var weights = makeRandomWeights(dimW * dimH, dimI);
131 var radius_decaying = 0;
132 var learning_rate_decaying = 0;
133 var svg;
134 var no_targets = (data.target.match(/.[ |]+./g) || []).length+1;
135// var avg, avgsim1, avgsim2, minsim;
136 var refIndex;
137 var colorScale;
138
139 if(no_targets > 1) {
140 refIndex=1;
141 colorScale = d3.scale.linear()
142 .range(['green', 'yellow', 'red']) // or use hex values
143 .domain([-1, 0, 1]);
144
145 // avg = vecsum(inputs.slice(0, dimI), inputs.slice(dimI, 2*dimI));
146 // avgsim1 = cosine_sim(inputs.slice(0, dimI), avg);
147 // avgsim2 = cosine_sim(inputs.slice(dimI, 2*dimI), avg);
148
149 $("#somcolor2").css("background-color", colorScale(0));
150 $("#somcolor1").css("background-color", colorScale(-1));
151 $("#somcolor3").css("background-color", colorScale(1));
152 } else {
153 refIndex = data.words.length-1;
154 colorScale = d3.scale.linear()
155 .range(['white', 'red'])
156 .domain([-1, 1]);
157 $("#somcolor1").css("background-color", colorScale(1));
158 $("#somcolor3").css("background-color", colorScale(-1));
159 }
160
161 $("#somword1").html(data.words[0]);
162 $("#somword2").html(data.words[refIndex]);
163 minsim = cosine_sim(inputs.slice(0, dimI), inputs.slice(refIndex*dimI, (refIndex+1)*dimI));
164
165 var itdom = document.getElementById("iterations");
166
167 var div = d3.select("#som2");
168
169 data.coords = [];
170 for(var i=0; i< data.words.length; i++) {
171 data.coords[i] = [Math.floor(dimW/2), Math.floor(dimH/2)];
172 }
173
174 svg = div.append("svg")
175 .attr("width", mapWidth)
176 .attr("height", mapHeight);
177
178 var rects = svg.selectAll(".r")
179 .data(weights)
180 .enter().append("rect")
181 .attr("class", "r")
182 .attr("width", mapWidth/dimW)
183 .attr("height", mapHeight/dimH)
184 .attr("fill", "white")
185 .attr("z-index", "-1")
186 .attr("x", function(d, i) { return (i % dimW) * (mapWidth/dimW);})
187 .attr("y", function(d, i) { return (Math.floor(i / dimW) * (mapWidth/dimW)); })
188
189
190 var g = svg.selectAll(".b")
191 .data(data.words)
192 .enter().append("g")
193 .attr("class", "u");
194 g.append("a")
195 .attr("xlink:href", function(word) {return data.urlprefix+word;})
196 .attr("title", function(d, i) {
197 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
198 })
199 .append("text")
200 .attr("text-anchor", "bottom")
201 .attr("font-size", 12)
202 .attr("fill", function(d) {
203 if(data.target.indexOf(" "+d+" ") >= 0) {
204 return "blue";
205 } else {
206 return "#333"
207 }
208 })
209 .text(function(d) { return d; });
210
211 var som_interval = setInterval(somStep, 0);
212 var it=0;
213
214 function updateSOM() {
215 var oc = [];
216 for(var x = 0; x < dimW; x++) {
217 for(var y = 0; y < dimH; y++) {
218 oc[y*dimW+x]=1;
219 }
220 }
221 svg.selectAll('.u')
222 .data(data.coords)
223 .transition()
224 .attr("transform", function(d, i) {
225 return "translate(" +
226 (d[0]*(mapWidth/dimW)+4) + "," +
227 (d[1]*(mapHeight/dimH)+oc[d[1]*dimW+d[0]]++*14+4) + ")"; });
228
229 var colorFun = function(d, i) {
230 var sim1=cosine_sim(d, inputs.slice(0, dimI));
231 var sim2=cosine_sim(d, inputs.slice(dimI, 2*dimI));
232 var col;
233// col = (sim1-avgsim1)/(1-avgsim1)-(sim2-avgsim2)/(1-avgsim2);
234 col = (sim2-sim1)/(1-minsim);
235// console.log(Math.floor(i/dimW)+","+i%dimW+":"+(sim1-minsim)/(1-minsim)+ " " + (sim2-minsim)/(1-minsim) + "--> "+ col);
236 if(col > 1) col=1;
237 if(col < -1) col=-1;
238 return colorScale(col);
239 };
240
241 if(it>training_iterations*.6) {
242 svg.selectAll(".r")
243 .data(weights)
244 .transition()
245 .attr("fill", colorFun);
246 }
247 }
248
249 function somStep() {
250 if(it++ >= training_iterations) {
251 updateSOM();
252 clearInterval(som_interval);
253 return;
254 }
255 itdom.innerHTML = it;
256 radius_decaying = radius * Math.exp(-it/time_constant);
257 learning_rate_decaying = learning_rate * Math.exp(-it/time_constant);
258 //learning_rate_decaying = learning_rate * Math.exp(-it/training_iterations);
259
260 //pick a random input to train
261 var current=Math.floor(Math.random()*dimI)
262 var iv = inputs.slice(current*dimI, (current+1)*dimI);
263 // Determine the BMU
264 BMUIdx = getBMUIndex(weights, iv);
265 var coord1 = convertIndexToXY(BMUIdx, dimW);
266 data.coords[current] = coord1;
267 var widthSq = radius_decaying * radius_decaying;
268 for (var v in weights) {
269 var coord2 = convertIndexToXY(v, dimW);
270 var dist = getEucledianDistance(coord1, coord2);
271 // Determine if the weight is within the training radius
272 if (dist < widthSq) {
273 // console.log(dist, learning_rate_decaying, radius_decaying, it);
274 influence = Math.exp(-dist/(2*widthSq));
275 for (vidx = 0;vidx<weights[v].length;vidx++) {
276 weights[v][vidx] += influence * learning_rate_decaying * (iv[vidx] - weights[v][vidx]);
277 }
278 }
279 }
280// }
281 if(it % 10 == 0) {
282 updateSOM();
283 }
284 }
285
286}