blob: 9e78ea6271677174675aaa4aa5e060f9177eb9c0 [file] [log] [blame]
Marc Kupietz83305222016-04-28 09:57:22 +02001<!DOCTYPE html>
2<html>
3 <head>
4 <title>DeReKo-Word-Vector-Distances: <%= $word %></title>
Marc Kupietz80bd7b92017-07-04 16:25:54 +02005 <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
Marc Kupietz6dbadd12017-11-29 16:43:33 +01006 <link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Condensed" rel="stylesheet">
Marc Kupietz80bd7b92017-07-04 16:25:54 +02007 <script src="http://code.jquery.com/jquery-latest.min.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +01008 <script src = "https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
9 <script src = "https://cdn.datatables.net/fixedcolumns/3.2.3/js/dataTables.fixedColumns.min.js"></script>
10 <link rel="stylesheet" href="https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css">
Marc Kupietz80bd7b92017-07-04 16:25:54 +020011 <script
Marc Kupietz6dbadd12017-11-29 16:43:33 +010012 src="http://code.jquery.com/ui/1.12.1/jquery-ui.min.js"
Marc Kupietz34c08172017-11-29 17:08:47 +010013 integrity="sha256-VazP97ZCwtekAsvgPBSUwPFKdrwD3unUfSGVYrahUqU="
14 crossorigin="anonymous"></script>
Marc Kupietz80bd7b92017-07-04 16:25:54 +020015 <script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010016 $('#firstable').hide();
17 $(document).ready(function() {
18 $("#xxxtabs").tabs( {
19 "show": function(event, ui) {
20 var oTable = $('div.dataTables_scrollBody>table.display', ui.panel).dataTable();
21 if ( oTable.length > 0 ) {
22 oTable.fnAdjustColumnSizing();
23 }
24 }
25 } );
Marc Kupietz694610d2017-11-25 18:30:03 +010026
Marc Kupietzdab9f222017-11-29 14:22:59 +010027 $("input").bind("keydown", function(event) {
28 // track enter key
29 var keycode = (event.keyCode ? event.keyCode : (event.which ? event.which : event.charCode));
30 if (keycode == 13) { // keycode for enter key
31 // force the 'Enter Key' to implicitly click the Update button
32 document.getElementById('SEARCH').click();
33 return false;
34 } else {
35 return true;
36 }});
37
Marc Kupietz4abcd682017-11-28 20:51:08 +010038 $(".selector").tabs({ active: 1 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010039
Marc Kupietz4abcd682017-11-28 20:51:08 +010040 $('#firsttable').DataTable({
41 "sScrollY": "780px",
42 "bScrollCollapse": true,
43 "bPaginate": false,
44 "bJQueryUI": true,
45 "dom": '<"top">rt<"bottom"flp><"clear">',
46 "aoColumnDefs": [
47 { "sWidth": "10%", "aTargets": [ -1 ] }
48 ]
49 } );
Marc Kupietz0af83e32017-11-27 09:31:37 +010050
Marc Kupietzd64f3f22017-11-30 12:07:42 +010051 var t = $('#secondtable').DataTable({
Marc Kupietz4abcd682017-11-28 20:51:08 +010052 "sScrollY": "800px",
53 "bScrollCollapse": true,
54 "bPaginate": false,
55 "bJQueryUI": true,
56 "dom": '<"top">rt<"bottom"flp><"clear">',
Marc Kupietzd64f3f22017-11-30 12:07:42 +010057 "columnDefs": [
58 { "searchable": false,
59 "orderable": false,
60 "targets": 0
61 },
Marc Kupietza77acce2017-11-30 16:59:07 +010062 { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4, 5, 6 ] },
63 { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 7 ] },
Marc Kupietzd64f3f22017-11-30 12:07:42 +010064 ],
Marc Kupietza77acce2017-11-30 16:59:07 +010065 "order": [[ 4, 'desc' ]],
Marc Kupietz4abcd682017-11-28 20:51:08 +010066 } );
Marc Kupietzd64f3f22017-11-30 12:07:42 +010067 t.on( 'order.dt search.dt', function () {
68 t.column(0, {order:'applied'}).nodes().each( function (cell, i) {
69 cell.innerHTML = i+1;
70 } );
71 } ).draw();
72
Marc Kupietz4abcd682017-11-28 20:51:08 +010073 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010074
Marc Kupietzd64f3f22017-11-30 12:07:42 +010075
76
Marc Kupietzdab9f222017-11-29 14:22:59 +010077 $(function(){
78 $("#dropdownoptions").dialog({
79 title: "Options",
80 autoOpen: false,
81 modal: false,
82 draggable: false,
83 height: "auto",
84 width: "auto",
85 resizable: false,
86 buttons: {
87 "Cancel": function() {
88 $( this ).dialog( "close" );
89 },
90 "Apply": function() {
91 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
92 }
93 }
94 });
95 });
96
97 $(function(){
98 $("#SEARCH").click(function() {
99 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
100 });
101 });
102
103 $(function(){
104 $("#showoptions").click(function(){
105 $("#dropdownoptions").dialog("open");
106 var target = $(this);
107 $("#dropdownoptions").dialog("widget").position({
108 my: 'left bottom',
109 at: 'left bottom',
110 of: target
111 });
112 });
113 });
114
Marc Kupietz4abcd682017-11-28 20:51:08 +0100115 $( function() {
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100116 $( "#no_iterations" ).spinner({
Marc Kupietz4abcd682017-11-28 20:51:08 +0100117 spin: function( event, ui ) {
118 if ( ui.value < 1000 ) {
119 $( this ).spinner( "value", 1000 );
120 return false;
121 } else if ( ui.value > 10000 ) {
122 $( this ).spinner( "value", 10000 );
123 return false;
124 }
125 }
126 });
127 } );
Marc Kupietz3305b0a2017-11-27 10:46:20 +0100128
Marc Kupietz4abcd682017-11-28 20:51:08 +0100129 $( function() {
130 $( "#neighbours" ).spinner({
131 spin: function( event, ui ) {
132 if ( ui.value < 0 ) {
133 $( this ).spinner( "value", 0 );
134 return false;
135 } else if ( ui.value > 200 ) {
136 $( this ).spinner( "value", 200 );
137 return false;
138 }
139 }
140 });
141 } );
142
143 $( function() {
144 $( "#cutoff" ).spinner({
145 spin: function( event, ui ) {
146 if ( ui.value < 100000 ) {
147 $( this ).spinner( "value", 100000 );
148 return false;
149 } else if ( ui.value > 2000000 ) {
150 $( this ).spinner( "value", 2000000 );
151 return false;
152 }
153 }
154 });
155 } );
156
157 $( function() {
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100158 $( "#tabs" ).tabs().addClass('tabs-min');
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100159 } );
Marc Kupietz4abcd682017-11-28 20:51:08 +0100160
161 $( function() {
162 $( ".controlgroup-vertical" ).controlgroup({
163 "direction": "vertical"
164 });
165 } );
166
167 $(function() {
168 $( document ).tooltip({
169 content: function() {
170 return $(this).attr('title');
171 }}
172 )
Marc Kupietz83305222016-04-28 09:57:22 +0200173 })
Marc Kupietz694610d2017-11-25 18:30:03 +0100174
Marc Kupietz83305222016-04-28 09:57:22 +0200175 </script>
176 <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
Marc Kupietz554aff52017-11-09 14:42:09 +0100177 <script src="/derekovecs/js/tsne.js"></script>
178 <script src="/derekovecs/js/som.js"></script>
179 <script src="/derekovecs/js/labeler.js"></script>
Marc Kupietz83305222016-04-28 09:57:22 +0200180 <style>
181 body, input {
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100182 font-family: Lato, sans-serif;
Marc Kupietz83305222016-04-28 09:57:22 +0200183 font-size: 11pt;
184 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100185
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100186 h1, h2, h3 {
187 margin: 5px 10px 0 0;
188 color: rgb(246,168,0);
189 font-family: "Univers LT Std 47 Cn Lt", "Univers LT Std 67 Cn Lt", "Roboto Condensed", "Univers LT Std 67 Cn Bold", "UniversLTStd-BoldCn", "Times", 'League Gothic', Impact, sans-serif;
190 font-weight: bold;
191 line-height: 1.35;
192 letter-spacing: normal;
193 text-transform: uppercase;
194 text-shadow: none;
Marc Kupietz34c08172017-11-29 17:08:47 +0100195 word-wrap: break-word;
196 }
197
198
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100199 showoptions, #SEARCH {
200 margin-left: 10px;
201 margin-right: 10px;
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100202 }
203
Marc Kupietzdab9f222017-11-29 14:22:59 +0100204 .tabs-left-vertical .ui-tabs-nav {
205 position: absolute;
206 width: 21em;
207 transform: translate(-100%,0%) rotate(-90deg);
208 transform-origin: 100% 0%;
209 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100210
Marc Kupietzdab9f222017-11-29 14:22:59 +0100211 .tabs-left-vertical .ui-tabs-nav li {
212 float: right;
213 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100214
Marc Kupietzdab9f222017-11-29 14:22:59 +0100215 .tabs-left-vertical .ui-tabs-panel {
216 padding-left: 3.5em;
217 }
218
219 .tabs-left-vertical .ui-tabs-panel {
220 height: 20em;
Marc Kupietz34c08172017-11-29 17:08:47 +0100221 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100222
Marc Kupietz34c08172017-11-29 17:08:47 +0100223 .mono {
224 font-family: "DejaVu Sans Mono", Inconsolata, SourceCodePro, Courier;
225 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100226
Marc Kupietz34c08172017-11-29 17:08:47 +0100227 .ui-tooltip-content {
228 font-size: 9pt;
229 color: #222222;
230 }
Marc Kupietzf4b49392016-04-28 10:49:56 +0200231
Marc Kupietz34c08172017-11-29 17:08:47 +0100232 svg > .ui-tooltip-content {
233 font-size: 8pt;
234 color: #222222;
235 }
236
237 a.merged {
238 color: green;
239 fill: green;
240 }
241
242 #first a {
243 text-decoration: none;
244 }
245
246 a.marked, #first a.marked {
247 text-decoration: underline;
248 }
249
250 a.target {
251 color: red;
252 fill: red;
253 }
Marc Kupietz694610d2017-11-25 18:30:03 +0100254
Marc Kupietz4abcd682017-11-28 20:51:08 +0100255 table.display {
256 width: 40% important!;
257 margin: 1; /* <- works for me this way ****/
258 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100259
Marc Kupietz4abcd682017-11-28 20:51:08 +0100260 table.dataTable thead th, table.dataTable thead td, table.dataTable tbody td {
261 padding: 2px 2px;
262 // border-bottom: 1px solid #111;
263 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100264
Marc Kupietz34c08172017-11-29 17:08:47 +0100265 #collocators {
266 margin-bottom: 15px;
267 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100268
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100269 #header {
270 width: 100%;
271 // border: 1px solid red;
272 overflow: hidden; /* will contain if #first is longer than #second */
273 }
274
Marc Kupietz34c08172017-11-29 17:08:47 +0100275 #topwrapper {
276 width: 100%;
277 // border: 1px solid red;
278 overflow: hidden; /* will contain if #first is longer than #second */
279 }
280
281 #wrapper {
282 // border: 1px solid red;
283 overflow: hidden; /* will contain if #first is longer than #second */
284 }
285
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100286 #pagetitle {
287 max-width: 460px;
288 margin-right: 20px;
289 float: left;
290 overflow: hidden; /* if you don't want #second to wrap below #first */
291 // border: 1px solid green;
292 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100293
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100294 #options {
295 float: left;
296 width: 800px;
297 margin: 10px;
298 overflow: hidden; /* if you don't want #second to wrap below #first */
299 }
300
301 #word {
302 width: 50%;
303 }
304
Marc Kupietz34c08172017-11-29 17:08:47 +0100305 #first {
306 margin-right: 20px;
307 float: left;
308 overflow: hidden; /* if you don't want #second to wrap below #first */
309 // border: 1px solid green;
310 }
311 #tabs {
312 margin-right: 20px;
313 overflow: hidden; /* if you don't want #second to wrap below #first */
314 }
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100315
316 .tabs-min {
317 background: transparent;
318 border: none;
319 }
320
321 .tabs-min .ui-widget-header {
322 background: transparent;
323 border: none;
324 border-bottom: 1px solid #c0c0c0;
325 -moz-border-radius: 0px;
326 -webkit-border-radius: 0px;
327 border-radius: 0px;
328 }
329
330 .tabs-min .ui-tabs-nav .ui-state-default {
331 background: transparent;
332 border: none;
333 }
334
335 .tabs-min .ui-tabs-nav .ui-state-active {
336 background: transparent url(img/uiTabsArrow.png) no-repeat bottom center;
337 border: none;
338 }
339
340 .tabs-min .ui-tabs-nav .ui-state-default a {
341 color: #c0c0c0;
342 }
343
344 .tabs-min .ui-tabs-nav .ui-state-active a {
345 color: rgb(246,168,0);
346 }
347
Marc Kupietz4abcd682017-11-28 20:51:08 +0100348 #embed {
349 max-width: 802px;
350 border: 1px solid #333;
351 }
352
Marc Kupietz34c08172017-11-29 17:08:47 +0100353 #second {
354 min-width: 800px;
Marc Kupietzdab9f222017-11-29 14:22:59 +0100355 // border: 1px solid #333;
Marc Kupietz34c08172017-11-29 17:08:47 +0100356 overflow: hidden; /* if you don't want #second to wrap below #first */
357 }
358 #som2 svg {
359 border: 1px solid #333;
360 }
Marc Kupietz83305222016-04-28 09:57:22 +0200361
Marc Kupietz34c08172017-11-29 17:08:47 +0100362 #cost {
363 font-size: 8pt;
364 color: #222222;
365 margin-top: 4px;
366 margin-bottom: 12px;
367 }
Marc Kupietz83305222016-04-28 09:57:22 +0200368
Marc Kupietz34c08172017-11-29 17:08:47 +0100369 #sominfo1, #sominfo {
370 font-size: 8pt;
371 color: #222222;
372 margin-top: 0px;
373 }
Marc Kupietz83305222016-04-28 09:57:22 +0200374
Marc Kupietz34c08172017-11-29 17:08:47 +0100375 #somcolor1, #somcolor2, #somcolor3 {
376 display: inline-block;
377 height: 10px;
378 width: 10px;
379 }
Marc Kupietz83305222016-04-28 09:57:22 +0200380
Marc Kupietz34c08172017-11-29 17:08:47 +0100381 #third {
382 border: 1px solid #333;
383 }
Marc Kupietz83305222016-04-28 09:57:22 +0200384
385 </style>
386 <script>
387
388 var opt = {epsilon: <%= $epsilon %>, perplexity: <%= $perplexity %>},
389 mapWidth = 800, // width map
390 mapHeight = 800,
391 jitterRadius = 7;
392
393 var T = new tsnejs.tSNE(opt); // create a tSNE instance
394
395 var Y;
396
397 var data;
398 var labeler;
399
400 function applyJitter() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100401 svg.selectAll('.tsnet')
402 .data(labels)
403 .transition()
404 .duration(50)
405 .attr("transform", function(d, i) {
406 T.Y[i][0] = (d.x - mapWidth/2 - tx)/ss/20;
407 T.Y[i][1] = (d.y - mapHeight/2 - ty)/ss/20;
408 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100409 (d.x) + "," +
410 (d.y) + ")";
Marc Kupietz4abcd682017-11-28 20:51:08 +0100411 });
Marc Kupietz83305222016-04-28 09:57:22 +0200412 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100413
Marc Kupietz83305222016-04-28 09:57:22 +0200414 function updateEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100415 var Y = T.getSolution();
416 svg.selectAll('.tsnet')
417 .data(data.words)
Marc Kupietz34c08172017-11-29 17:08:47 +0100418 .attr("transform", function(d, i) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100419 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100420 ((Y[i][0]*20*ss + tx) + mapWidth/2) + "," +
421 ((Y[i][1]*20*ss + ty) + mapHeight/2) + ")"; });
Marc Kupietz83305222016-04-28 09:57:22 +0200422 }
423
424 var svg;
425 var labels = [];
426 var anchor_array = [];
427 var text;
428
429 function drawEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100430 $("#embed").empty();
431 var div = d3.select("#embed");
Marc Kupietz34c08172017-11-29 17:08:47 +0100432
Marc Kupietz4abcd682017-11-28 20:51:08 +0100433 // get min and max in each column of Y
434 var Y = T.Y;
Marc Kupietz34c08172017-11-29 17:08:47 +0100435
Marc Kupietz4abcd682017-11-28 20:51:08 +0100436 svg = div.append("svg") // svg is global
437 .attr("width", mapWidth)
438 .attr("height", mapHeight);
Marc Kupietz34c08172017-11-29 17:08:47 +0100439
Marc Kupietz4abcd682017-11-28 20:51:08 +0100440 var g = svg.selectAll(".b")
441 .data(data.words)
442 .enter().append("g")
443 .attr("class", "tsnet");
Marc Kupietz34c08172017-11-29 17:08:47 +0100444
Marc Kupietz4abcd682017-11-28 20:51:08 +0100445 g.append("a")
446 .attr("xlink:href", function(word) {
447 return (data.urlprefix+word);})
448 .attr("class", function(d, i) {
449 var res="";
450 if(data.marked[i]) {
451 res="marked ";
452 }
453 if(data.target.indexOf(" "+d+" ") >= 0) {
454 return res+"target";
455 } else if(data.ranks[i] < data.mergedEnd) {
456 return res+"merged";
457 } else {
458 return res;
459 }
460 })
461 .attr("title", function(d, i) {
462 if(data.mergedEnd > 0) {
463 if(data.ranks[i] >= data.mergedEnd) {
464 return "rank: "+i +" "+"freq. rank: "+(data.ranks[i]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
465 } else {
466 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") + " (merged vocab)";
Marc Kupietz34c08172017-11-29 17:08:47 +0100467 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100468 } else {
469 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
470 }
471 })
472 .append("text")
473 .attr("text-anchor", "top")
474 .attr("font-size", 12)
475 .text(function(d) { return d; });
Marc Kupietz34c08172017-11-29 17:08:47 +0100476
Marc Kupietz4abcd682017-11-28 20:51:08 +0100477 var zoomListener = d3.behavior.zoom()
478 .scaleExtent([0.1, 10])
479 .center([0,0])
480 .on("zoom", zoomHandler);
481 zoomListener(svg);
Marc Kupietz83305222016-04-28 09:57:22 +0200482 }
483
484 var tx=0, ty=0;
485 var ss=1;
486 var iter_id=-1;
487
488 function zoomHandler() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100489 tx = d3.event.translate[0];
490 ty = d3.event.translate[1];
491 ss = d3.event.scale;
492 updateEmbedding();
Marc Kupietz83305222016-04-28 09:57:22 +0200493 }
494
495 var stepnum = 0;
496
497 function stopStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100498 clearInterval(iter_id);
499 text = svg.selectAll("text");
Marc Kupietz34c08172017-11-29 17:08:47 +0100500
Marc Kupietz4abcd682017-11-28 20:51:08 +0100501 // jitter function needs different data and co-ordinate representation
502 labels = d3.range(data.words.length).map(function(i) {
503 var x = (T.Y[i][0]*20*ss + tx) + mapWidth/2;
504 var y = (T.Y[i][1]*20*ss + ty) + mapHeight/2;
505 anchor_array.push({x: x, y: y, r: jitterRadius});
506 return {
507 x: x,
508 y: y,
509 name: data.words[i]
510 };
511 });
Marc Kupietz34c08172017-11-29 17:08:47 +0100512
Marc Kupietz4abcd682017-11-28 20:51:08 +0100513 // get the actual label bounding boxes for the jitter function
514 var index = 0;
515 text.each(function() {
516 labels[index].width = this.getBBox().width;
517 labels[index].height = this.getBBox().height;
518 index += 1;
519 });
Marc Kupietz83305222016-04-28 09:57:22 +0200520
Marc Kupietz34c08172017-11-29 17:08:47 +0100521
Marc Kupietz4abcd682017-11-28 20:51:08 +0100522 // setTimeout(updateEmbedding, 1);
523 // setTimeout(
524 labeler = d3.labeler()
525 .label(labels)
526 .anchor(anchor_array)
527 .width(mapWidth)
528 .height(mapHeight)
529 .update(applyJitter);
530 // .start(1000);
Marc Kupietz83305222016-04-28 09:57:22 +0200531
Marc Kupietz4abcd682017-11-28 20:51:08 +0100532 iter_id = setInterval(jitterStep, 1);
Marc Kupietz83305222016-04-28 09:57:22 +0200533 }
534
535 var jitter_i=0;
536
537 function jitterStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100538 if(jitter_i++ > 100) {
539 clearInterval(iter_id);
540 } else {
541 labeler.start2(10);
542 applyJitter();
543 }
Marc Kupietz83305222016-04-28 09:57:22 +0200544 }
545
546 var last_cost=1000;
547
548 function step() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100549 var i = T.iter;
Marc Kupietz34c08172017-11-29 17:08:47 +0100550
Marc Kupietz4abcd682017-11-28 20:51:08 +0100551 if(i > <%= $no_iterations %>) {
552 stopStep();
553 } else {
554 var cost = Math.round(T.step() * 100000) / 100000; // do a few steps
555 $("#cost").html("tsne iteration " + i + ", cost: " + cost.toFixed(5));
556 if(i % 250 == 0 && cost >= last_cost) {
557 stopStep();
558 } else {
559 last_cost = cost;
560 updateEmbedding();
561 }
562 }
Marc Kupietz83305222016-04-28 09:57:22 +0200563 }
564
565 function showMap(j) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100566 data=j;
567 T.iter=0;
568 T.initDataRaw(data.vecs); // init embedding
569 drawEmbedding(); // draw initial embedding
Marc Kupietz78114532017-11-29 17:00:16 +0100570
Marc Kupietz4abcd682017-11-28 20:51:08 +0100571 if(iter_id >= 0) {
572 clearInterval(iter_id);
573 }
574 //T.debugGrad();
575 iter_id = setInterval(step, 1);
Marc Kupietz78114532017-11-29 17:00:16 +0100576 if(true) { // (<%= $show_som %>) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100577 makeSOM(j, <%= $no_iterations %>);
578 }
Marc Kupietz83305222016-04-28 09:57:22 +0200579 }
Marc Kupietz39179ab2017-07-04 16:28:06 +0200580 var queryword;
581
582 function onload() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100583 queryword = document.getElementById('word');
Marc Kupietz39179ab2017-07-04 16:28:06 +0200584 }
585
586 function queryKorAP() {
587 window.open('http://korap.ids-mannheim.de/kalamar/?q='+queryword.value, 'KorAP');
588 }
Marc Kupietz4dc270c2017-11-24 10:17:12 +0100589
590 function queryKorAPCII(query) {
591 window.open('http://korap.ids-mannheim.de/kalamar/?ql=cosmas2&q='+query, 'KorAP');
592 }
Marc Kupietz83305222016-04-28 09:57:22 +0200593 </script>
594 </head>
Marc Kupietz39179ab2017-07-04 16:28:06 +0200595 <body onload="onload()">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100596 <div id="header">
597 <div id="pagetitle">
598 <h1>DeReKo-Vectors</h1>
599 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100600 <div id="options" class="widget">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100601 <form id="queryform">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100602 <input id="word" type="text" name="word" placeholder="Word(s) to be searched" value="<%= $word %>"
603 title="When looking for multiple words use spaces as separators to search around the average vector and | as separator to get the neighbours for each word."/>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100604 <input id="SEARCH" type="button" value="SEARCH">
605 <input type="button" id="showoptions" name="showoptions" value="Options" />
606 </form>
607 <div id="dropdownoptions" style="display: hidden">
608 <form id="optionsform">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100609 <div class="controlgroup-vertical">
610 <label for="cutoff">cut-off</label>
611 <input id="cutoff" type="text" name="cutoff" size="10" value="<%= $cutoff %>" title="Only consider the most frequent x word forms.">
612 <label for="dedupe">dedupe</label>
613 <input id="dedupe" type="checkbox" name="dedupe" value="1" <%= ($dedupe ? "checked" : "") %> title="radically filter out any near-duplicates">
614 % if($mergedEnd > 0) {
Marc Kupietz78114532017-11-29 17:00:16 +0100615 <label for="sbf">backw.</label>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100616 <input id="sbf" type="checkbox" name="sbf" value="1" <%= ($searchBaseVocabFirst ? "checked" : "") %> title="If checkecked base vocabulary will be searched first. Otherwise merged vocabulray will be searched first.">
617 % }
618 <label for="neighbours">max. neighbours:</label>
619 <input id="neighbours" size="4" name="n" value="<%= $no_nbs %>">
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100620 <label for="no_iterations">max. iterations</label>
621 <input id="no_iterations" name="N" size="4" value="<%= $no_iterations %>">
Marc Kupietz78114532017-11-29 17:00:16 +0100622 <!-- <label for="dosom">SOM</label>
623 <input id="dosom" type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100624 % if($collocators) {
625 <label for="sortby">window/sort</label>
626 <select id="sortby" name="sort">
627 <option value="0" <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
Marc Kupietza77acce2017-11-30 16:59:07 +0100628 <!-- <option value="1" <%= ($sort==1? "selected":"") %>>any single position</option>
629 <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100630 </select>
631 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100632 <input type="button" value="→ KorAP" onclick="queryKorAP();" title="query word with KorAP"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100633 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100634 </form>
635 </div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100636 </div>
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100637 </div>
638 <div id="topwrapper">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100639 <div id="tabs">
640 <ul>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100641 <li><a href="#tabs-1">Semantics (TSNE-map)</a></li>
642 <li><a href="#tabs-2">Semantics (SOM)</a></li>
643 <li><a href="#tabs-3">Syntagmatic (collocators)</a></li>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100644 </ul>
645 <div id="tabs-1">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100646 % if($lists && (@$lists) > 0 && (@$lists)[0]) {
647 <div id="wrapper">
648 <div id="first" style="width:220px">
649 <table class="display compact nowrap" id="firsttable">
650 <thead>
651 <tr>
652 <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th>
653 </tr>
654 </thead>
655 <tbody>
656 % my $j=0; my @words; my @vecs; my @ranks; my @marked;
657 % for my $list (@$lists) {
658 % my $i=0; while($list) {
659 % my $item = (@$list)[$i];
660 % my $c = ($collocators? (@$collocators)[$i] : 0);
661 % last if(!$c && !$item);
662 <tr>
663 <td align="right">
664 <%= ++$i %>.
665 </td>
666 % if($item) {
667 % if(!grep{$_ eq $item->{word}} @words) {
668 % push @vecs, $item->{vector};
669 % push @words, $item->{word};
670 % push @ranks, $item->{rank};
671 % push @marked, ($marked->{$item->{word}}? 1 : 0);
Marc Kupietz4abcd682017-11-28 20:51:08 +0100672 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100673 <td align="right">
674 <%= sprintf("%.3f", $item->{dist}) %>
675 </td>
676 <td>
Marc Kupietz34c08172017-11-29 17:08:47 +0100677 % my $class = ($marked->{$item->{word}}? "marked " : "");
678 % my $r = $item->{rank};
Marc Kupietzdab9f222017-11-29 14:22:59 +0100679 % if($r < $mergedEnd) {
680 % $class .= "merged";
681 % $r .= " (merged vocab)";
682 % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
683 % $r -= $mergedEnd;
684 % }
685 <a class="<%= $class =%>"
686 title="freq. rank: <%= $r =%>"
687 href="<%= url_with->query([word => $item->{word}]) =%>">
688 <%= $item->{word} =%>
689 </a>
690 </td>
691 % } else {
692 <td colspan="2"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100693 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100694 </tr>
695 % last if($i >= 100);
696 % }
697 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100698 </tbody>
699 </table>
700 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100701 <script>
702 % use Mojo::ByteStream 'b';
703 % my $urlprefix = url_with->query([word=>'']);
704 $(window).load(function() {
705 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked, urlprefix => $urlprefix})); %>);
706 });
707 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100708 % } else {
709 <div id="wrapper">
710 <p>
711 ERROR: "<%= $word %>" not found in vocabluary.
712 </p>
713 </div>
714 % }
715 <div id="second">
716 <div id="embed">
717 </div>
718 <div id="cost">
719 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100720 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100721 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100722 </div>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100723 <div id="tabs-2">
724 <div id="som2" style="width: 800; height: 800px">
725 </div>
726 <div id="sominfo1"><span id="somcolor1"> </span> <span id="somword1"> </span> <span id="somcolor2"> </span> <span id="somword2"> </span> <span id="somcolor3"> </span></div>
727 <div id="sominfo">SOM iteration <span id="iterations">0</span></div>
728 </div>
729 <div id="tabs-3">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100730 <div id="second" style="width:500px">
731 <table class="display compact nowrap" id="secondtable">
732 <thead>
733 <tr>
734 % if($collocators) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100735 <th>#</th>
Marc Kupietza77acce2017-11-30 16:59:07 +0100736 <th align="right" title="The columns (c) around the target are considered for summation are marked with *.">w'</th>
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100737 <th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
Marc Kupietza77acce2017-11-30 16:59:07 +0100738 <th title="Co-normalized raw activation sum of the collocator in the selected columns." align="right">⊥Σa</th>
739 <th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σc</th>
740 <th title="Sum of the column normalized activations over the selected colunns." align="right">Σ(a/c)</th>
741 <th title="Sum of the activations over the whole window normalized by the total window sum (no auto-focus)." align="right">Σa/Σw</th>
742 <th align="left">collocator</th>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100743 % }
744 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100745 </thead>
746 <tbody>
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100747 % for(my $i=0; $i < (@$collocators); $i++) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100748 % my $c = ($collocators? (@$collocators)[$i] : 0);
749 <tr>
750 <td align="right">
751 <%= $i %>
752 </td>
753 % if($c) {
754 <td align="right">
755 <span class="mono"><%= bitvec2window( $c->{pos} ) %></span>
756 </td>
757 <td align="right">
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100758 <%= sprintf("%.3f", $c->{max}) %>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100759 </td>
760 <td align="right">
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100761 <%= sprintf("%.3e", $c->{conorm}) %>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100762 </td>
763 <td align="right">
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100764 <%= sprintf("%.3e", $c->{prob}) %>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100765 </td>
Marc Kupietza77acce2017-11-30 16:59:07 +0100766 <td align="right">
767 <%= sprintf("%.3e", $c->{cprob}) %>
768 </td>
769 <td align="right">
770 <%= sprintf("%.3e", $c->{overall}) %>
771 </td>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100772 <td align="left">
773 <a onclick="<%= sprintf("queryKorAPCII('%s /w5 %s')", $c->{word}, $word) =%>"
774 title="freq. rank: <%= $c->{rank} =%>">
775 <%= $c->{word} %>
776 </a>
777 </td>
778 % } else {
779 <td colspan="5"/>
780 % }
781 </tr>
782 % }
783 </tbody>
784 </table>
785 </div> <!-- - tab2 -->
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100786 </div> <!-- tabs -->
787 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100788 </div> <!-- topwrapper -->
789 <div style="clear: both;"></div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100790 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100791 % if($training_args) {
792 <p>
793 Word vector model trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters: <pre><%= $training_args %></pre>
794 </p>
Marc Kupietz83305222016-04-28 09:57:22 +0200795 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100796 </body>
Marc Kupietz83305222016-04-28 09:57:22 +0200797</html>