blob: 7786ae2d488c6400fb0be70482d4ceb8af05a4e7 [file] [log] [blame]
Marc Kupietzadaa1632017-07-04 14:10:29 +02001// Javascript implementation pf Kohonen's Self Organizing Map
2// Based on http://www.ai-junkie.com/ann/som/som1.html
3
4var mapWidth = 800,
5 mapHeight = 800;
6
7function getDistance(weight, inputVector) {
8 var distance = 0;
9 for (var i = 0; i <weight.length; i++) {
10 distance += (inputVector[i] - weight[i]) * (inputVector[i] - weight[i]);
11 }
12 return Math.sqrt(distance);
13}
14
15function makeRandomWeights(vSize, eSize) {
16 var weights = [];
17 if(typeof ArrayBuffer === 'undefined') {
18 // lacking browser support
19 while (weights.length < vSize) {
20 var arr = new Array(eSize);
21 for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
22 weights.push(arr);
23 }
24 } else {
25 while (weights.length < vSize) {
26 var arr = new Float64Array(eSize);
27 for(var i = 0; i < eSize; i++) { arr[i]= Math.random(); }
28 weights.push(arr);
29 }
30 }
31 return weights;
32}
33
34function getBMUIndex(weights, target) {
35 var BMUIndex = 0;
36 var bestScore = 99999;
37
38 for (i=0; i < weights.length; i++) {
39 distance = getDistance(weights[i], target);
40 if (distance < bestScore) {
41 bestScore = distance;
42 BMUIndex = i;
43 }
44 }
45 return BMUIndex;
46}
47
48function convertIndexToXY(idx, dimW) {
49 var x = parseInt(idx % dimW,10);
50 var y = parseInt((idx / dimW),10);
51 return [x,y];
52}
53
54
55function getEucledianDistance(coord1, coord2) {
56 return (coord1[0] - coord2[0]) * (coord1[0] - coord2[0]) + (coord1[1] - coord2[1]) * (coord1[1] - coord2[1]);
57}
58
59// utilitity that creates contiguous vector of zeros of size n
60 var zeros = function(n) {
61 if(typeof(n)==='undefined' || isNaN(n)) { return []; }
62 if(typeof ArrayBuffer === 'undefined') {
63 // lacking browser support
64 var arr = new Array(n);
65 for(var i=0;i<n;i++) { arr[i]= 0; }
66 return arr;
67 } else {
68 return new Float64Array(n); // typed arrays are faster
69 }
70 }
71
72// compute L2 distance between two vectors
73var L2 = function(x1, x2) {
74 var D = x1.length;
75 var d = 0;
76 for(var i=0;i<D;i++) {
77 var x1i = x1[i];
78 var x2i = x2[i];
79 d += (x1i-x2i)*(x1i-x2i);
80 }
81 return d;
82}
83
84// compute pairwise distance in all vectors in X
85var xtod = function(X) {
86 var N = X.length;
87 var dist = zeros(N * N); // allocate contiguous array
88 for(var i=0;i<N;i++) {
89 for(var j=i+1;j<N;j++) {
90 var d = L2(X[i], X[j]);
91 dist[i*N+j] = d;
92 dist[j*N+i] = d;
93 }
94 }
95 return dist;
96}
97
98function dotproduct(a,b) {
99 var n = 0, lim = Math.min(a.length,b.length);
100 for (var i = 0; i < lim; i++) n += a[i] * b[i];
101 return n;
102 }
103
104function vecsum(a,b) {
105 var lim = a.length;
106 var sum = new Array(lim);
107 for (var i = 0; i < lim; i++) sum[i] = a[i] + b[i];
108 return sum;
109 }
110
111function norm2(a) {var sumsqr = 0; for (var i = 0; i < a.length; i++) sumsqr += a[i]*a[i]; return Math.sqrt(sumsqr);}
112
113function cosine_sim(x, y) {
114 xnorm = norm2(x);
115 if(!xnorm) return 0;
116 ynorm = norm2(y);
117 if(!ynorm) return 0;
118 return dotproduct(x, y) / (xnorm * ynorm);
119}
120
121function makeSOM(data, training_iterations) {
122 var dimW = 6;
123 var dimH = 6;
124
125 var radius = (dimW * dimH) / 2;
126 var learning_rate = 1;
127 var time_constant = training_iterations / Math.log(radius);
128 var inputs = xtod(data.vecs);
129 var dimI = data.vecs.length;
130 var weights = makeRandomWeights(dimW * dimH, dimI);
131 var radius_decaying = 0;
132 var learning_rate_decaying = 0;
133 var svg;
134 var no_targets = (data.target.match(/.[ |]+./g) || []).length+1;
135// var avg, avgsim1, avgsim2, minsim;
136 var refIndex;
137 var colorScale;
Marc Kupietz8f9c86a2017-12-04 17:17:13 +0100138 var urlprefix = new URLSearchParams(window.location.search);
139 urlprefix.delete("word");
140 urlprefix.append("word","");
141
Marc Kupietzadaa1632017-07-04 14:10:29 +0200142 if(no_targets > 1) {
143 refIndex=1;
144 colorScale = d3.scale.linear()
145 .range(['green', 'yellow', 'red']) // or use hex values
146 .domain([-1, 0, 1]);
147
148 // avg = vecsum(inputs.slice(0, dimI), inputs.slice(dimI, 2*dimI));
149 // avgsim1 = cosine_sim(inputs.slice(0, dimI), avg);
150 // avgsim2 = cosine_sim(inputs.slice(dimI, 2*dimI), avg);
151
152 $("#somcolor2").css("background-color", colorScale(0));
153 $("#somcolor1").css("background-color", colorScale(-1));
154 $("#somcolor3").css("background-color", colorScale(1));
155 } else {
156 refIndex = data.words.length-1;
157 colorScale = d3.scale.linear()
158 .range(['white', 'red'])
159 .domain([-1, 1]);
160 $("#somcolor1").css("background-color", colorScale(1));
161 $("#somcolor3").css("background-color", colorScale(-1));
162 }
163
164 $("#somword1").html(data.words[0]);
165 $("#somword2").html(data.words[refIndex]);
166 minsim = cosine_sim(inputs.slice(0, dimI), inputs.slice(refIndex*dimI, (refIndex+1)*dimI));
167
168 var itdom = document.getElementById("iterations");
169
170 var div = d3.select("#som2");
171
172 data.coords = [];
173 for(var i=0; i< data.words.length; i++) {
174 data.coords[i] = [Math.floor(dimW/2), Math.floor(dimH/2)];
175 }
176
177 svg = div.append("svg")
178 .attr("width", mapWidth)
179 .attr("height", mapHeight);
180
181 var rects = svg.selectAll(".r")
182 .data(weights)
183 .enter().append("rect")
184 .attr("class", "r")
185 .attr("width", mapWidth/dimW)
186 .attr("height", mapHeight/dimH)
187 .attr("fill", "white")
188 .attr("z-index", "-1")
189 .attr("x", function(d, i) { return (i % dimW) * (mapWidth/dimW);})
190 .attr("y", function(d, i) { return (Math.floor(i / dimW) * (mapWidth/dimW)); })
191
192
193 var g = svg.selectAll(".b")
194 .data(data.words)
195 .enter().append("g")
196 .attr("class", "u");
Marc Kupietz6c8b59a2018-03-12 12:26:25 +0100197
198 g.append("svg:title")
199 .text(function(d, i) {
200 return "distance rank: "+ i +" "+"\nfrequency rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
201 });
202
203 g.append("a")
Marc Kupietz8f9c86a2017-12-04 17:17:13 +0100204 .attr("xlink:href", function(word) {return "?"+urlprefix+word;})
Marc Kupietzadaa1632017-07-04 14:10:29 +0200205 .append("text")
206 .attr("text-anchor", "bottom")
207 .attr("font-size", 12)
Marc Kupietz4750d872019-02-20 09:43:53 +0100208 .attr("class", function(d, i) {
209 var c="somtext";
210 if(data.target.indexOf(" "+d+" ") >= 0) {
211 c += " target";
212 }
213 if(data.ranks[i] >= data.mergedEnd) {
214 c += " merged";
215 }
216 return c;
217 })
Marc Kupietzadaa1632017-07-04 14:10:29 +0200218 .text(function(d) { return d; });
219
Marc Kupietzb347c9a2018-03-19 16:47:10 +0100220 $('g.u a, g.tsnet a, td.paradigmator a').on('mousedown', function(e) {
Marc Kupietzb7fdd702018-03-12 12:25:32 +0100221 if (e.which === 2) {
222 e.preventDefault();
223 e.stopPropagation();
224 console.log("middle button clicked " + this.childNodes["0"].textContent);
225 queryKorAPCII(this.childNodes["0"].textContent);
226 return false;
227 }
228 });
229
Marc Kupietzadaa1632017-07-04 14:10:29 +0200230 var som_interval = setInterval(somStep, 0);
231 var it=0;
232
233 function updateSOM() {
234 var oc = [];
235 for(var x = 0; x < dimW; x++) {
236 for(var y = 0; y < dimH; y++) {
237 oc[y*dimW+x]=1;
238 }
239 }
240 svg.selectAll('.u')
241 .data(data.coords)
242 .transition()
243 .attr("transform", function(d, i) {
244 return "translate(" +
245 (d[0]*(mapWidth/dimW)+4) + "," +
246 (d[1]*(mapHeight/dimH)+oc[d[1]*dimW+d[0]]++*14+4) + ")"; });
247
248 var colorFun = function(d, i) {
249 var sim1=cosine_sim(d, inputs.slice(0, dimI));
250 var sim2=cosine_sim(d, inputs.slice(dimI, 2*dimI));
251 var col;
252// col = (sim1-avgsim1)/(1-avgsim1)-(sim2-avgsim2)/(1-avgsim2);
253 col = (sim2-sim1)/(1-minsim);
254// console.log(Math.floor(i/dimW)+","+i%dimW+":"+(sim1-minsim)/(1-minsim)+ " " + (sim2-minsim)/(1-minsim) + "--> "+ col);
255 if(col > 1) col=1;
256 if(col < -1) col=-1;
257 return colorScale(col);
258 };
259
260 if(it>training_iterations*.6) {
261 svg.selectAll(".r")
262 .data(weights)
263 .transition()
264 .attr("fill", colorFun);
265 }
266 }
267
268 function somStep() {
269 if(it++ >= training_iterations) {
270 updateSOM();
271 clearInterval(som_interval);
272 return;
273 }
274 itdom.innerHTML = it;
275 radius_decaying = radius * Math.exp(-it/time_constant);
276 learning_rate_decaying = learning_rate * Math.exp(-it/time_constant);
277 //learning_rate_decaying = learning_rate * Math.exp(-it/training_iterations);
278
279 //pick a random input to train
280 var current=Math.floor(Math.random()*dimI)
281 var iv = inputs.slice(current*dimI, (current+1)*dimI);
282 // Determine the BMU
283 BMUIdx = getBMUIndex(weights, iv);
284 var coord1 = convertIndexToXY(BMUIdx, dimW);
285 data.coords[current] = coord1;
286 var widthSq = radius_decaying * radius_decaying;
287 for (var v in weights) {
288 var coord2 = convertIndexToXY(v, dimW);
289 var dist = getEucledianDistance(coord1, coord2);
290 // Determine if the weight is within the training radius
291 if (dist < widthSq) {
292 // console.log(dist, learning_rate_decaying, radius_decaying, it);
293 influence = Math.exp(-dist/(2*widthSq));
294 for (vidx = 0;vidx<weights[v].length;vidx++) {
295 weights[v][vidx] += influence * learning_rate_decaying * (iv[vidx] - weights[v][vidx]);
296 }
297 }
298 }
299// }
300 if(it % 10 == 0) {
301 updateSOM();
302 }
303 }
304
305}