blob: 9656c56b3fe733f085074c296156c30befc4f4bb [file] [log] [blame]
Marc Kupietz83305222016-04-28 09:57:22 +02001<!DOCTYPE html>
2<html>
3 <head>
4 <title>DeReKo-Word-Vector-Distances: <%= $word %></title>
Marc Kupietz80bd7b92017-07-04 16:25:54 +02005 <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
Marc Kupietz6dbadd12017-11-29 16:43:33 +01006 <link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Condensed" rel="stylesheet">
Marc Kupietz80bd7b92017-07-04 16:25:54 +02007 <script src="http://code.jquery.com/jquery-latest.min.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +01008 <script src = "https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
9 <script src = "https://cdn.datatables.net/fixedcolumns/3.2.3/js/dataTables.fixedColumns.min.js"></script>
Marc Kupietz22796142017-12-01 13:19:15 +010010 <script src = "https://cdn.datatables.net/plug-ins/1.10.16/sorting/scientific.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010011 <link rel="stylesheet" href="https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css">
Marc Kupietz80bd7b92017-07-04 16:25:54 +020012 <script
Marc Kupietz6dbadd12017-11-29 16:43:33 +010013 src="http://code.jquery.com/ui/1.12.1/jquery-ui.min.js"
Marc Kupietzb6c615d2017-12-02 10:38:20 +010014 integrity="sha256-VazP97ZCwtekAsvgPBSUwPFKdrwD3unUfSGVYrahUqU="
15 crossorigin="anonymous"></script>
Marc Kupietz80bd7b92017-07-04 16:25:54 +020016 <script>
Marc Kupietza6e08f02017-12-01 22:06:21 +010017 var urlParams = new URLSearchParams(window.location.search);
18
Marc Kupietz4abcd682017-11-28 20:51:08 +010019 $('#firstable').hide();
20 $(document).ready(function() {
Marc Kupietz694610d2017-11-25 18:30:03 +010021
Marc Kupietzb6c615d2017-12-02 10:38:20 +010022 $("input").bind("keydown", function(event) {
23 // track enter key
24 var keycode = (event.keyCode ? event.keyCode : (event.which ? event.which : event.charCode));
25 if (keycode == 13) { // keycode for enter key
26 // force the 'Enter Key' to implicitly click the Update button
27 document.getElementById('SEARCH').click();
28 return false;
29 } else {
30 return true;
31 }});
Marc Kupietzdab9f222017-11-29 14:22:59 +010032
Marc Kupietzb6c615d2017-12-02 10:38:20 +010033 $(".selector").tabs({ active: 1 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010034
Marc Kupietz896c9092017-12-02 14:40:43 +010035 $(function(){
36 $("#SEARCH").click(function() {
37 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
38 });
39 });
Marc Kupietz2f6b74a2017-12-01 13:20:21 +010040
Marc Kupietzb6c615d2017-12-02 10:38:20 +010041 $('#firsttable').DataTable({
42 "sScrollY": "780px",
43 "bScrollCollapse": true,
44 "bPaginate": false,
45 "bJQueryUI": true,
46 "dom": '<"top">rt<"bottom"flp><"clear">',
47 "aoColumnDefs": [
48 { "sWidth": "10%", "aTargets": [ -1 ] }
49 ]
50 } );
51
52 $( "#first" ).clone().prependTo( "#tabs-2" );
53
54 function changeCharColor(txt, heat) {
55 var newText = "";
56 for (var i=0, l=txt.length; i<l; i++) {
Marc Kupietz728b8ed2017-12-02 11:13:49 +010057 newText += (i == 5 ? txt.charAt(i) : '<span style="background-color:'+getHeatColor(heat[i]/maxHeat)+'">'+txt.charAt(i)+'</span>');
Marc Kupietzb6c615d2017-12-02 10:38:20 +010058 }
59 return newText;
60 }
61
62 function getHeatColor(value) {
63 var hue=((1-value)*120).toString(10);
Marc Kupietz728b8ed2017-12-02 11:13:49 +010064 return ["hsl(",hue,",90%,70%)"].join("");
Marc Kupietzb6c615d2017-12-02 10:38:20 +010065 }
66
67 function bitvec2window(n, heat) {
68 var str = n.toString(2).padStart(10, "0")
69 .replace(/^([0-9]{5})/, '$1x')
70 .replace(/0/g, '·')
71 .replace(/1/g, '+');
72 return changeCharColor(str, heat);
73 }
74
75 var collocatorData = <%= b(Mojo::JSON::to_json($collocators)) %>;
76 var maxHeat; // = Math.max.apply(Math,collocatorData.map(function(o){return o.cprob;}))
Marc Kupietzb6c615d2017-12-02 10:38:20 +010077
78 if (collocatorData != null) {
Marc Kupietz896c9092017-12-02 14:40:43 +010079 maxHeat = Math.max.apply(Math,collocatorData.map(function(o){return Math.max.apply(Math,o.heat);}))
Marc Kupietzb6c615d2017-12-02 10:38:20 +010080 var t = $('#secondtable').DataTable({
81 data: collocatorData,
82 "sScrollY": "800px",
83 "bScrollCollapse": true,
84 "bPaginate": false,
85 "bJQueryUI": true,
86 "dom": '<"top">rt<"bottom"flp><"clear">',
87 "columns": [
88 { "data": "rank", type: "allnumeric" },
89 { "data": "pos", width: "7%", sClass: "dt-center mono compact", render: function ( data, type, row ) {return bitvec2window(data, row.heat) }},
90 { "data": "max", render: function ( data, type, row ) {return data.toFixed(3) }},
91 { "data": "conorm", render: function ( data, type, row ) {return data.toFixed(3) }},
92 { "data": "prob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
93 { "data": "cprob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
94 { "data": "overall", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
95 { "data": "word", sClass: "collocator" }
96 ],
97 "columnDefs": [
98 { className: "dt-right", "targets": [0,2,3,4,5,6] },
99 { className: "dt-center", "targets": [ 1] },
100 { "searchable": false,
101 "orderable": false,
102 "targets": 0
103 },
104 { "type": "scientific", targets: [2,3,4,5,6] },
105 { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4, 5, 6 ] },
106 { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 7 ] },
107 ],
108 "order": [[ 4, 'desc' ]],
Marc Kupietz4abcd682017-11-28 20:51:08 +0100109 } );
Marc Kupietz0af83e32017-11-27 09:31:37 +0100110
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100111 t.on( 'order.dt search.dt', function () {
112 t.column(0, {order:'applied'}).nodes().each( function (cell, i) {
113 cell.innerHTML = i+1;
114 } );
115 } ).draw();
116 }
117 $("#tabs").css("visibility", "visible"); // now we can show the tabs
Marc Kupietz6e2fc102017-12-01 22:07:23 +0100118
Marc Kupietz4abcd682017-11-28 20:51:08 +0100119 });
Marc Kupietz6e2fc102017-12-01 22:07:23 +0100120
Marc Kupietzdab9f222017-11-29 14:22:59 +0100121 $(function(){
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100122 $("#dropdownoptions").dialog({
123 title: "Options",
124 autoOpen: false,
125 modal: false,
126 draggable: false,
127 height: "auto",
128 width: "auto",
129 resizable: false,
130 buttons: {
131 "Cancel": function() {
132 $( this ).dialog( "close" );
133 },
134 "Apply": function() {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100135 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100136 }
137 }
138 });
Marc Kupietzdab9f222017-11-29 14:22:59 +0100139 });
140
141 $(function(){
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100142 $("td.collocator").click(function(){
143 queryKorAPCII(this.textContent + " /w5 " + urlParams.get('word'));
144 });
145 });
146
147 $(function(){
148 $("#showoptions").click(function(){
149 $("#dropdownoptions").dialog("open");
150 var target = $(this);
151 $("#dropdownoptions").dialog("widget").position({
152 my: 'left bottom',
153 at: 'left bottom',
154 of: target
Marc Kupietzdab9f222017-11-29 14:22:59 +0100155 });
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100156 });
Marc Kupietzdab9f222017-11-29 14:22:59 +0100157 });
158
Marc Kupietz4abcd682017-11-28 20:51:08 +0100159 $( function() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100160 $( "#no_iterations" ).spinner({
161 spin: function( event, ui ) {
162 if ( ui.value < 1000 ) {
163 $( this ).spinner( "value", 1000 );
164 return false;
165 } else if ( ui.value > 10000 ) {
166 $( this ).spinner( "value", 10000 );
167 return false;
168 }
169 }
170 });
Marc Kupietz4abcd682017-11-28 20:51:08 +0100171 } );
Marc Kupietz3305b0a2017-11-27 10:46:20 +0100172
Marc Kupietz4abcd682017-11-28 20:51:08 +0100173 $( function() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100174 $( "#neighbours" ).spinner({
175 spin: function( event, ui ) {
176 if ( ui.value < 0 ) {
177 $( this ).spinner( "value", 0 );
178 return false;
179 } else if ( ui.value > 200 ) {
180 $( this ).spinner( "value", 200 );
181 return false;
182 }
183 }
184 });
Marc Kupietz4abcd682017-11-28 20:51:08 +0100185 } );
186
187 $( function() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100188 $( "#cutoff" ).spinner({
189 spin: function( event, ui ) {
190 if ( ui.value < 100000 ) {
191 $( this ).spinner( "value", 100000 );
192 return false;
193 } else if ( ui.value > 2000000 ) {
194 $( this ).spinner( "value", 2000000 );
195 return false;
196 }
197 }
198 });
Marc Kupietz4abcd682017-11-28 20:51:08 +0100199 } );
200
Marc Kupietz6e2fc102017-12-01 22:07:23 +0100201 var tabactivated = {}
Marc Kupietz4abcd682017-11-28 20:51:08 +0100202 $( function() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100203 $( "#tabs" ).tabs().addClass('tabs-min');
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100204 } );
Marc Kupietz4abcd682017-11-28 20:51:08 +0100205
206 $( function() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100207 $( ".controlgroup-vertical" ).controlgroup({
208 "direction": "vertical"
209 });
Marc Kupietz4abcd682017-11-28 20:51:08 +0100210 } );
211
212 $(function() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100213 $( document ).tooltip({
214 content: function() {
215 return $(this).attr('title');
216 }}
217 )
Marc Kupietz6e2fc102017-12-01 22:07:23 +0100218 })
Marc Kupietz694610d2017-11-25 18:30:03 +0100219
Marc Kupietz83305222016-04-28 09:57:22 +0200220 </script>
221 <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
Marc Kupietz554aff52017-11-09 14:42:09 +0100222 <script src="/derekovecs/js/tsne.js"></script>
223 <script src="/derekovecs/js/som.js"></script>
224 <script src="/derekovecs/js/labeler.js"></script>
Marc Kupietz83305222016-04-28 09:57:22 +0200225 <style>
226 body, input {
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100227 font-family: Lato, sans-serif;
Marc Kupietz83305222016-04-28 09:57:22 +0200228 font-size: 11pt;
229 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100230
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100231 h1, h2, h3 {
232 margin: 5px 10px 0 0;
233 color: rgb(246,168,0);
234 font-family: "Univers LT Std 47 Cn Lt", "Univers LT Std 67 Cn Lt", "Roboto Condensed", "Univers LT Std 67 Cn Bold", "UniversLTStd-BoldCn", "Times", 'League Gothic', Impact, sans-serif;
235 font-weight: bold;
236 line-height: 1.35;
237 letter-spacing: normal;
238 text-transform: uppercase;
239 text-shadow: none;
Marc Kupietz34c08172017-11-29 17:08:47 +0100240 word-wrap: break-word;
241 }
242
243
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100244 showoptions, #SEARCH {
245 margin-left: 10px;
246 margin-right: 10px;
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100247 }
248
Marc Kupietzdab9f222017-11-29 14:22:59 +0100249 .tabs-left-vertical .ui-tabs-nav {
250 position: absolute;
251 width: 21em;
252 transform: translate(-100%,0%) rotate(-90deg);
253 transform-origin: 100% 0%;
254 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100255
Marc Kupietzdab9f222017-11-29 14:22:59 +0100256 .tabs-left-vertical .ui-tabs-nav li {
257 float: right;
258 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100259
Marc Kupietzdab9f222017-11-29 14:22:59 +0100260 .tabs-left-vertical .ui-tabs-panel {
261 padding-left: 3.5em;
262 }
263
264 .tabs-left-vertical .ui-tabs-panel {
265 height: 20em;
Marc Kupietz34c08172017-11-29 17:08:47 +0100266 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100267
Marc Kupietz34c08172017-11-29 17:08:47 +0100268 .mono {
269 font-family: "DejaVu Sans Mono", Inconsolata, SourceCodePro, Courier;
270 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100271
Marc Kupietz34c08172017-11-29 17:08:47 +0100272 .ui-tooltip-content {
273 font-size: 9pt;
274 color: #222222;
275 }
Marc Kupietzf4b49392016-04-28 10:49:56 +0200276
Marc Kupietz34c08172017-11-29 17:08:47 +0100277 svg > .ui-tooltip-content {
278 font-size: 8pt;
279 color: #222222;
280 }
281
282 a.merged {
283 color: green;
284 fill: green;
285 }
286
287 #first a {
288 text-decoration: none;
289 }
290
291 a.marked, #first a.marked {
292 text-decoration: underline;
293 }
294
295 a.target {
296 color: red;
297 fill: red;
298 }
Marc Kupietz694610d2017-11-25 18:30:03 +0100299
Marc Kupietz4abcd682017-11-28 20:51:08 +0100300 table.display {
301 width: 40% important!;
302 margin: 1; /* <- works for me this way ****/
303 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100304
Marc Kupietz4abcd682017-11-28 20:51:08 +0100305 table.dataTable thead th, table.dataTable thead td, table.dataTable tbody td {
306 padding: 2px 2px;
307 // border-bottom: 1px solid #111;
308 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100309
Marc Kupietz34c08172017-11-29 17:08:47 +0100310 #collocators {
311 margin-bottom: 15px;
312 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100313
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100314 #header {
315 width: 100%;
316 // border: 1px solid red;
317 overflow: hidden; /* will contain if #first is longer than #second */
318 }
319
Marc Kupietz34c08172017-11-29 17:08:47 +0100320 #topwrapper {
321 width: 100%;
322 // border: 1px solid red;
323 overflow: hidden; /* will contain if #first is longer than #second */
324 }
325
326 #wrapper {
327 // border: 1px solid red;
328 overflow: hidden; /* will contain if #first is longer than #second */
329 }
330
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100331 #pagetitle {
332 max-width: 460px;
333 margin-right: 20px;
334 float: left;
335 overflow: hidden; /* if you don't want #second to wrap below #first */
336 // border: 1px solid green;
337 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100338
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100339 #options {
340 float: left;
341 width: 800px;
342 margin: 10px;
343 overflow: hidden; /* if you don't want #second to wrap below #first */
344 }
345
346 #word {
347 width: 50%;
348 }
349
Marc Kupietz34c08172017-11-29 17:08:47 +0100350 #first {
351 margin-right: 20px;
352 float: left;
353 overflow: hidden; /* if you don't want #second to wrap below #first */
354 // border: 1px solid green;
355 }
356 #tabs {
357 margin-right: 20px;
358 overflow: hidden; /* if you don't want #second to wrap below #first */
359 }
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100360
361 .tabs-min {
362 background: transparent;
363 border: none;
364 }
365
366 .tabs-min .ui-widget-header {
367 background: transparent;
368 border: none;
369 border-bottom: 1px solid #c0c0c0;
370 -moz-border-radius: 0px;
371 -webkit-border-radius: 0px;
372 border-radius: 0px;
373 }
374
375 .tabs-min .ui-tabs-nav .ui-state-default {
376 background: transparent;
377 border: none;
378 }
379
380 .tabs-min .ui-tabs-nav .ui-state-active {
381 background: transparent url(img/uiTabsArrow.png) no-repeat bottom center;
382 border: none;
383 }
384
385 .tabs-min .ui-tabs-nav .ui-state-default a {
386 color: #c0c0c0;
387 }
388
389 .tabs-min .ui-tabs-nav .ui-state-active a {
390 color: rgb(246,168,0);
391 }
392
Marc Kupietz4abcd682017-11-28 20:51:08 +0100393 #embed {
394 max-width: 802px;
395 border: 1px solid #333;
396 }
397
Marc Kupietz34c08172017-11-29 17:08:47 +0100398 #second {
399 min-width: 800px;
Marc Kupietzdab9f222017-11-29 14:22:59 +0100400 // border: 1px solid #333;
Marc Kupietz34c08172017-11-29 17:08:47 +0100401 overflow: hidden; /* if you don't want #second to wrap below #first */
402 }
403 #som2 svg {
404 border: 1px solid #333;
405 }
Marc Kupietz83305222016-04-28 09:57:22 +0200406
Marc Kupietz34c08172017-11-29 17:08:47 +0100407 #cost {
408 font-size: 8pt;
409 color: #222222;
410 margin-top: 4px;
411 margin-bottom: 12px;
412 }
Marc Kupietz83305222016-04-28 09:57:22 +0200413
Marc Kupietz34c08172017-11-29 17:08:47 +0100414 #sominfo1, #sominfo {
415 font-size: 8pt;
416 color: #222222;
417 margin-top: 0px;
418 }
Marc Kupietz83305222016-04-28 09:57:22 +0200419
Marc Kupietz34c08172017-11-29 17:08:47 +0100420 #somcolor1, #somcolor2, #somcolor3 {
421 display: inline-block;
422 height: 10px;
423 width: 10px;
424 }
Marc Kupietz83305222016-04-28 09:57:22 +0200425
Marc Kupietz34c08172017-11-29 17:08:47 +0100426 #third {
427 border: 1px solid #333;
428 }
Marc Kupietz83305222016-04-28 09:57:22 +0200429
430 </style>
431 <script>
432
433 var opt = {epsilon: <%= $epsilon %>, perplexity: <%= $perplexity %>},
434 mapWidth = 800, // width map
435 mapHeight = 800,
436 jitterRadius = 7;
437
438 var T = new tsnejs.tSNE(opt); // create a tSNE instance
439
440 var Y;
441
442 var data;
443 var labeler;
444
445 function applyJitter() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100446 svg.selectAll('.tsnet')
447 .data(labels)
448 .transition()
449 .duration(50)
450 .attr("transform", function(d, i) {
451 T.Y[i][0] = (d.x - mapWidth/2 - tx)/ss/20;
452 T.Y[i][1] = (d.y - mapHeight/2 - ty)/ss/20;
453 return "translate(" +
454 (d.x) + "," +
455 (d.y) + ")";
456 });
Marc Kupietz83305222016-04-28 09:57:22 +0200457 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100458
Marc Kupietz83305222016-04-28 09:57:22 +0200459 function updateEmbedding() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100460 var Y = T.getSolution();
461 svg.selectAll('.tsnet')
462 .data(data.words)
463 .attr("transform", function(d, i) {
464 return "translate(" +
465 ((Y[i][0]*20*ss + tx) + mapWidth/2) + "," +
466 ((Y[i][1]*20*ss + ty) + mapHeight/2) + ")"; });
Marc Kupietz83305222016-04-28 09:57:22 +0200467 }
468
469 var svg;
470 var labels = [];
471 var anchor_array = [];
472 var text;
473
474 function drawEmbedding() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100475 $("#embed").empty();
476 var div = d3.select("#embed");
Marc Kupietz34c08172017-11-29 17:08:47 +0100477
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100478 // get min and max in each column of Y
479 var Y = T.Y;
Marc Kupietz34c08172017-11-29 17:08:47 +0100480
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100481 svg = div.append("svg") // svg is global
482 .attr("width", mapWidth)
483 .attr("height", mapHeight);
Marc Kupietz34c08172017-11-29 17:08:47 +0100484
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100485 var g = svg.selectAll(".b")
486 .data(data.words)
487 .enter().append("g")
488 .attr("class", "tsnet");
Marc Kupietz34c08172017-11-29 17:08:47 +0100489
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100490 g.append("a")
491 .attr("xlink:href", function(word) {
492 return (data.urlprefix+word);})
493 .attr("class", function(d, i) {
494 var res="";
495 if(data.marked[i]) {
496 res="marked ";
497 }
498 if(data.target.indexOf(" "+d+" ") >= 0) {
499 return res+"target";
500 } else if(data.ranks[i] < data.mergedEnd) {
501 return res+"merged";
502 } else {
503 return res;
504 }
505 })
506 .attr("title", function(d, i) {
507 if(data.mergedEnd > 0) {
508 if(data.ranks[i] >= data.mergedEnd) {
509 return "rank: "+i +" "+"freq. rank: "+(data.ranks[i]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
510 } else {
511 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") + " (merged vocab)";
512 }
513 } else {
514 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
515 }
516 })
517 .append("text")
518 .attr("text-anchor", "top")
519 .attr("font-size", 12)
520 .text(function(d) { return d; });
Marc Kupietz34c08172017-11-29 17:08:47 +0100521
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100522 var zoomListener = d3.behavior.zoom()
523 .scaleExtent([0.1, 10])
524 .center([0,0])
525 .on("zoom", zoomHandler);
526 zoomListener(svg);
Marc Kupietz83305222016-04-28 09:57:22 +0200527 }
528
529 var tx=0, ty=0;
530 var ss=1;
531 var iter_id=-1;
532
533 function zoomHandler() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100534 tx = d3.event.translate[0];
535 ty = d3.event.translate[1];
536 ss = d3.event.scale;
537 updateEmbedding();
Marc Kupietz83305222016-04-28 09:57:22 +0200538 }
539
540 var stepnum = 0;
541
542 function stopStep() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100543 clearInterval(iter_id);
544 text = svg.selectAll("text");
Marc Kupietz34c08172017-11-29 17:08:47 +0100545
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100546 // jitter function needs different data and co-ordinate representation
547 labels = d3.range(data.words.length).map(function(i) {
548 var x = (T.Y[i][0]*20*ss + tx) + mapWidth/2;
549 var y = (T.Y[i][1]*20*ss + ty) + mapHeight/2;
550 anchor_array.push({x: x, y: y, r: jitterRadius});
551 return {
552 x: x,
553 y: y,
554 name: data.words[i]
555 };
556 });
Marc Kupietz34c08172017-11-29 17:08:47 +0100557
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100558 // get the actual label bounding boxes for the jitter function
559 var index = 0;
560 text.each(function() {
561 labels[index].width = this.getBBox().width;
562 labels[index].height = this.getBBox().height;
563 index += 1;
564 });
Marc Kupietz83305222016-04-28 09:57:22 +0200565
Marc Kupietz34c08172017-11-29 17:08:47 +0100566
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100567 // setTimeout(updateEmbedding, 1);
568 // setTimeout(
569 labeler = d3.labeler()
570 .label(labels)
571 .anchor(anchor_array)
572 .width(mapWidth)
573 .height(mapHeight)
574 .update(applyJitter);
575 // .start(1000);
Marc Kupietz83305222016-04-28 09:57:22 +0200576
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100577 iter_id = setInterval(jitterStep, 1);
Marc Kupietz83305222016-04-28 09:57:22 +0200578 }
579
580 var jitter_i=0;
581
582 function jitterStep() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100583 if(jitter_i++ > 100) {
584 clearInterval(iter_id);
585 } else {
586 labeler.start2(10);
587 applyJitter();
588 }
Marc Kupietz83305222016-04-28 09:57:22 +0200589 }
590
591 var last_cost=1000;
592
593 function step() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100594 var i = T.iter;
Marc Kupietz34c08172017-11-29 17:08:47 +0100595
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100596 if(i > <%= $no_iterations %>) {
597 stopStep();
598 } else {
599 var cost = Math.round(T.step() * 100000) / 100000; // do a few steps
600 $("#cost").html("tsne iteration " + i + ", cost: " + cost.toFixed(5));
601 if(i % 250 == 0 && cost >= last_cost) {
602 stopStep();
Marc Kupietz4abcd682017-11-28 20:51:08 +0100603 } else {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100604 last_cost = cost;
605 updateEmbedding();
Marc Kupietz4abcd682017-11-28 20:51:08 +0100606 }
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100607 }
Marc Kupietz83305222016-04-28 09:57:22 +0200608 }
609
610 function showMap(j) {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100611 data=j;
612 T.iter=0;
613 T.initDataRaw(data.vecs); // init embedding
614 drawEmbedding(); // draw initial embedding
Marc Kupietz78114532017-11-29 17:00:16 +0100615
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100616 if(iter_id >= 0) {
617 clearInterval(iter_id);
618 }
619 //T.debugGrad();
620 iter_id = setInterval(step, 1);
621 if(true) { // (<%= $show_som %>) {
622 makeSOM(j, <%= $no_iterations %>);
623 }
Marc Kupietz83305222016-04-28 09:57:22 +0200624 }
Marc Kupietz39179ab2017-07-04 16:28:06 +0200625 var queryword;
626
627 function onload() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100628 queryword = document.getElementById('word');
Marc Kupietz39179ab2017-07-04 16:28:06 +0200629 }
630
631 function queryKorAP() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100632 window.open('http://korap.ids-mannheim.de/kalamar/?q='+queryword.value, 'KorAP');
Marc Kupietz39179ab2017-07-04 16:28:06 +0200633 }
Marc Kupietz4dc270c2017-11-24 10:17:12 +0100634
635 function queryKorAPCII(query) {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100636 window.open('http://korap.ids-mannheim.de/kalamar/?ql=cosmas2&q='+query, 'KorAP');
Marc Kupietz4dc270c2017-11-24 10:17:12 +0100637 }
Marc Kupietz83305222016-04-28 09:57:22 +0200638 </script>
639 </head>
Marc Kupietz39179ab2017-07-04 16:28:06 +0200640 <body onload="onload()">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100641 <div id="header">
642 <div id="pagetitle">
643 <h1>DeReKo-Vectors</h1>
644 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100645 <div id="options" class="widget">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100646 <form id="queryform">
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100647 <input id="word" type="text" name="word" placeholder="Word(s) to be searched" value="<%= $word %>"
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100648 title="When looking for multiple words use spaces as separators to search around the average vector and | as separator to get the neighbours for each word."/>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100649 <input id="SEARCH" type="button" value="SEARCH">
650 <input type="button" id="showoptions" name="showoptions" value="Options" />
651 </form>
Marc Kupietz6e2fc102017-12-01 22:07:23 +0100652 <div id="dropdownoptions" style="display: none">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100653 <form id="optionsform">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100654 <div class="controlgroup-vertical">
655 <label for="cutoff">cut-off</label>
656 <input id="cutoff" type="text" name="cutoff" size="10" value="<%= $cutoff %>" title="Only consider the most frequent x word forms.">
657 <label for="dedupe">dedupe</label>
658 <input id="dedupe" type="checkbox" name="dedupe" value="1" <%= ($dedupe ? "checked" : "") %> title="radically filter out any near-duplicates">
659 % if($mergedEnd > 0) {
Marc Kupietz78114532017-11-29 17:00:16 +0100660 <label for="sbf">backw.</label>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100661 <input id="sbf" type="checkbox" name="sbf" value="1" <%= ($searchBaseVocabFirst ? "checked" : "") %> title="If checkecked base vocabulary will be searched first. Otherwise merged vocabulray will be searched first.">
662 % }
663 <label for="neighbours">max. neighbours:</label>
664 <input id="neighbours" size="4" name="n" value="<%= $no_nbs %>">
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100665 <label for="no_iterations">max. iterations</label>
666 <input id="no_iterations" name="N" size="4" value="<%= $no_iterations %>">
Marc Kupietz78114532017-11-29 17:00:16 +0100667 <!-- <label for="dosom">SOM</label>
668 <input id="dosom" type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100669 % if($collocators) {
670 <label for="sortby">window/sort</label>
671 <select id="sortby" name="sort">
672 <option value="0" <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
Marc Kupietza77acce2017-11-30 16:59:07 +0100673 <!-- <option value="1" <%= ($sort==1? "selected":"") %>>any single position</option>
674 <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100675 </select>
676 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100677 <input type="button" value="→ KorAP" onclick="queryKorAP();" title="query word with KorAP"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100678 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100679 </form>
680 </div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100681 </div>
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100682 </div>
683 <div id="topwrapper">
Marc Kupietz6e2fc102017-12-01 22:07:23 +0100684 <div style="visibility: hidden;" id="tabs">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100685 <ul>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100686 <li><a href="#tabs-1">Semantics (TSNE-map)</a></li>
687 <li><a href="#tabs-2">Semantics (SOM)</a></li>
688 <li><a href="#tabs-3">Syntagmatic (collocators)</a></li>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100689 </ul>
690 <div id="tabs-1">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100691 % if($lists && (@$lists) > 0 && (@$lists)[0]) {
692 <div id="wrapper">
Marc Kupietzaaae0152017-12-01 23:31:56 +0100693 <div id="first" style="width: 320px">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100694 <table class="display compact nowrap" id="firsttable">
695 <thead>
696 <tr>
Marc Kupietzaaae0152017-12-01 23:31:56 +0100697 <th align="right">#</th><th align="right">cos</th><th align="left">similars</th>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100698 </tr>
699 </thead>
700 <tbody>
701 % my $j=0; my @words; my @vecs; my @ranks; my @marked;
702 % for my $list (@$lists) {
703 % my $i=0; while($list) {
704 % my $item = (@$list)[$i];
705 % my $c = ($collocators? (@$collocators)[$i] : 0);
706 % last if(!$c && !$item);
707 <tr>
708 <td align="right">
709 <%= ++$i %>.
710 </td>
711 % if($item) {
712 % if(!grep{$_ eq $item->{word}} @words) {
713 % push @vecs, $item->{vector};
714 % push @words, $item->{word};
715 % push @ranks, $item->{rank};
716 % push @marked, ($marked->{$item->{word}}? 1 : 0);
Marc Kupietz4abcd682017-11-28 20:51:08 +0100717 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100718 <td align="right">
719 <%= sprintf("%.3f", $item->{dist}) %>
720 </td>
721 <td>
Marc Kupietz34c08172017-11-29 17:08:47 +0100722 % my $class = ($marked->{$item->{word}}? "marked " : "");
723 % my $r = $item->{rank};
Marc Kupietzdab9f222017-11-29 14:22:59 +0100724 % if($r < $mergedEnd) {
725 % $class .= "merged";
726 % $r .= " (merged vocab)";
727 % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
728 % $r -= $mergedEnd;
729 % }
730 <a class="<%= $class =%>"
731 title="freq. rank: <%= $r =%>"
732 href="<%= url_with->query([word => $item->{word}]) =%>">
733 <%= $item->{word} =%>
734 </a>
735 </td>
736 % } else {
737 <td colspan="2"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100738 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100739 </tr>
Marc Kupietzaaae0152017-12-01 23:31:56 +0100740 % last if($i >= $no_nbs);
Marc Kupietzdab9f222017-11-29 14:22:59 +0100741 % }
742 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100743 </tbody>
744 </table>
745 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100746 <script>
747 % use Mojo::ByteStream 'b';
748 % my $urlprefix = url_with->query([word=>'']);
749 $(window).load(function() {
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100750 var vecs = <%= b(Mojo::JSON::to_json($lists->[0])) %>;
751 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked, urlprefix => $urlprefix})); %>);
Marc Kupietzdab9f222017-11-29 14:22:59 +0100752 });
753 </script>
Marc Kupietz896c9092017-12-02 14:40:43 +0100754 <div id="second">
755 <div id="embed">
756 </div>
757 <div id="cost">
758 </div>
759 </div>
760 </div>
761 % } elsif($word !~ /^\s*$/) {
762 <div id="wrapper">
763 <div id="not-found-dialog" title="Not found">
764 <p>ERROR: "<%= $word %>" not found in vocabluary.</p>
765 <p>If you are sure you have spelled the word as intended, you can try to increase the cutoff parameter in the options menu.</p>
766 </div>
767 <script>
768 $( function() {
769 $( "#not-found-dialog" ).dialog({
770 autoOpen: true,
771 modal: true,
772 draggable: false,
773 height: "auto",
774 width: "auto",
775 resizable: false,
776 buttons: {
777 "OK": function() {
778 $( this ).dialog( "close" );
779 },
780 "Apply": function() {
781 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100782 }
Marc Kupietz896c9092017-12-02 14:40:43 +0100783 }
Marc Kupietz2f6b74a2017-12-01 13:20:21 +0100784 });
Marc Kupietz896c9092017-12-02 14:40:43 +0100785 });
786 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100787 </div>
788 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100789 </div>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100790 <div id="tabs-2">
791 <div id="som2" style="width: 800; height: 800px">
792 </div>
793 <div id="sominfo1"><span id="somcolor1"> </span> <span id="somword1"> </span> <span id="somcolor2"> </span> <span id="somword2"> </span> <span id="somcolor3"> </span></div>
794 <div id="sominfo">SOM iteration <span id="iterations">0</span></div>
795 </div>
796 <div id="tabs-3">
Marc Kupietz6e2fc102017-12-01 22:07:23 +0100797 <div style="width: 800px" id="secondt">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100798 <table class="display compact nowrap" id="secondtable">
799 <thead>
800 <tr>
801 % if($collocators) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100802 <th>#</th>
Marc Kupietz22796142017-12-01 13:19:15 +0100803 <th align="center" title="The columns (c) around the target are considered for summation are marked with *.">w'</th>
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100804 <th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
Marc Kupietza77acce2017-11-30 16:59:07 +0100805 <th title="Co-normalized raw activation sum of the collocator in the selected columns." align="right">⊥Σa</th>
806 <th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σc</th>
807 <th title="Sum of the column normalized activations over the selected colunns." align="right">Σ(a/c)</th>
808 <th title="Sum of the activations over the whole window normalized by the total window sum (no auto-focus)." align="right">Σa/Σw</th>
809 <th align="left">collocator</th>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100810 % }
811 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100812 </thead>
813 <tbody>
Marc Kupietz22796142017-12-01 13:19:15 +0100814 <tr>
815 <td align="right">
816 </td>
Marc Kupietzb6c615d2017-12-02 10:38:20 +0100817 <td align="right">
818 </td>
819 <td align="right">
820 </td>
821 <td align="right">
822 </td>
823 <td align="right">
824 </td>
825 <td align="right">
826 </td>
827 <td align="right">
828 </td>
829 <td align="left">
830 </td>
Marc Kupietz22796142017-12-01 13:19:15 +0100831 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100832 </tbody>
833 </table>
834 </div> <!-- - tab2 -->
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100835 </div> <!-- tabs -->
836 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100837 </div> <!-- topwrapper -->
838 <div style="clear: both;"></div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100839 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100840 % if($training_args) {
841 <p>
842 Word vector model trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters: <pre><%= $training_args %></pre>
843 </p>
Marc Kupietz83305222016-04-28 09:57:22 +0200844 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100845 </body>
Marc Kupietz83305222016-04-28 09:57:22 +0200846</html>