blob: c84126234239f6d95bf50bae3daf2992203ac46b [file] [log] [blame]
Marc Kupietz83305222016-04-28 09:57:22 +02001<!DOCTYPE html>
2<html>
3 <head>
4 <title>DeReKo-Word-Vector-Distances: <%= $word %></title>
Marc Kupietz80bd7b92017-07-04 16:25:54 +02005 <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
Marc Kupietz6dbadd12017-11-29 16:43:33 +01006 <link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Condensed" rel="stylesheet">
Marc Kupietz80bd7b92017-07-04 16:25:54 +02007 <script src="http://code.jquery.com/jquery-latest.min.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +01008 <script src = "https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
9 <script src = "https://cdn.datatables.net/fixedcolumns/3.2.3/js/dataTables.fixedColumns.min.js"></script>
Marc Kupietz22796142017-12-01 13:19:15 +010010 <script src = "https://cdn.datatables.net/plug-ins/1.10.16/sorting/scientific.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010011 <link rel="stylesheet" href="https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css">
Marc Kupietz80bd7b92017-07-04 16:25:54 +020012 <script
Marc Kupietz6dbadd12017-11-29 16:43:33 +010013 src="http://code.jquery.com/ui/1.12.1/jquery-ui.min.js"
Marc Kupietz34c08172017-11-29 17:08:47 +010014 integrity="sha256-VazP97ZCwtekAsvgPBSUwPFKdrwD3unUfSGVYrahUqU="
15 crossorigin="anonymous"></script>
Marc Kupietz80bd7b92017-07-04 16:25:54 +020016 <script>
Marc Kupietza6e08f02017-12-01 22:06:21 +010017 var urlParams = new URLSearchParams(window.location.search);
18
Marc Kupietz4abcd682017-11-28 20:51:08 +010019 $('#firstable').hide();
20 $(document).ready(function() {
Marc Kupietz694610d2017-11-25 18:30:03 +010021
Marc Kupietzdab9f222017-11-29 14:22:59 +010022 $("input").bind("keydown", function(event) {
23 // track enter key
24 var keycode = (event.keyCode ? event.keyCode : (event.which ? event.which : event.charCode));
25 if (keycode == 13) { // keycode for enter key
26 // force the 'Enter Key' to implicitly click the Update button
27 document.getElementById('SEARCH').click();
28 return false;
29 } else {
30 return true;
31 }});
32
Marc Kupietz4abcd682017-11-28 20:51:08 +010033 $(".selector").tabs({ active: 1 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010034
Marc Kupietz2f6b74a2017-12-01 13:20:21 +010035
Marc Kupietz4abcd682017-11-28 20:51:08 +010036 $('#firsttable').DataTable({
37 "sScrollY": "780px",
38 "bScrollCollapse": true,
39 "bPaginate": false,
40 "bJQueryUI": true,
41 "dom": '<"top">rt<"bottom"flp><"clear">',
42 "aoColumnDefs": [
43 { "sWidth": "10%", "aTargets": [ -1 ] }
44 ]
45 } );
Marc Kupietz0af83e32017-11-27 09:31:37 +010046
Marc Kupietz22796142017-12-01 13:19:15 +010047 function bitvec2window(n) {
48 var str = n.toString(2).padStart(10, "0")
49 .replace(/^([0-9]{5})/, '$1x')
50 .replace(/0/g, '·')
51 .replace(/1/g, '+');
52 return str;
53 }
54
55 var collocatorData = <%= b(Mojo::JSON::to_json($collocators)) %>;
56
57 if (collocatorData != null) {
58 var t = $('#secondtable').DataTable({
59 data: collocatorData,
60 "sScrollY": "800px",
61 "bScrollCollapse": true,
62 "bPaginate": false,
63 "bJQueryUI": true,
64 "dom": '<"top">rt<"bottom"flp><"clear">',
65 "columns": [
66 { "data": "rank", type: "allnumeric" },
67 { "data": "pos", width: "7%", sClass: "dt-center mono compact", render: function ( data, type, row ) {return bitvec2window(data) }},
68 { "data": "max", render: function ( data, type, row ) {return data.toFixed(3) }},
69 { "data": "conorm", render: function ( data, type, row ) {return data.toFixed(3) }},
70 { "data": "prob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
71 { "data": "cprob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
72 { "data": "overall", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
73 { "data": "word", sClass: "collocator" }
74 ],
75 "columnDefs": [
76 { className: "dt-right", "targets": [0,2,3,4,5,6] },
77 { className: "dt-center", "targets": [ 1] },
78 { "searchable": false,
79 "orderable": false,
80 "targets": 0
81 },
82 { "type": "scientific", targets: [2,3,4,5,6] },
83 { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4, 5, 6 ] },
84 { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 7 ] },
85 ],
86 "order": [[ 4, 'desc' ]],
Marc Kupietzd64f3f22017-11-30 12:07:42 +010087 } );
Marc Kupietz22796142017-12-01 13:19:15 +010088 t.on( 'order.dt search.dt', function () {
89 t.column(0, {order:'applied'}).nodes().each( function (cell, i) {
90 cell.innerHTML = i+1;
91 } );
92 } ).draw();
93 }
Marc Kupietz4abcd682017-11-28 20:51:08 +010094 });
Marc Kupietzdab9f222017-11-29 14:22:59 +010095 $(function(){
96 $("#dropdownoptions").dialog({
97 title: "Options",
98 autoOpen: false,
99 modal: false,
100 draggable: false,
101 height: "auto",
102 width: "auto",
103 resizable: false,
104 buttons: {
105 "Cancel": function() {
106 $( this ).dialog( "close" );
107 },
108 "Apply": function() {
109 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
110 }
111 }
112 });
113 });
114
115 $(function(){
116 $("#SEARCH").click(function() {
117 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
118 });
119 });
120
121 $(function(){
Marc Kupietz22796142017-12-01 13:19:15 +0100122 $("td.collocator").click(function(){
Marc Kupietza6e08f02017-12-01 22:06:21 +0100123 queryKorAPCII(this.textContent + " /w5 " + urlParams.get('word'));
Marc Kupietz22796142017-12-01 13:19:15 +0100124 });
125 });
126
127 $(function(){
Marc Kupietzdab9f222017-11-29 14:22:59 +0100128 $("#showoptions").click(function(){
129 $("#dropdownoptions").dialog("open");
130 var target = $(this);
131 $("#dropdownoptions").dialog("widget").position({
132 my: 'left bottom',
133 at: 'left bottom',
134 of: target
135 });
136 });
137 });
138
Marc Kupietz4abcd682017-11-28 20:51:08 +0100139 $( function() {
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100140 $( "#no_iterations" ).spinner({
Marc Kupietz4abcd682017-11-28 20:51:08 +0100141 spin: function( event, ui ) {
142 if ( ui.value < 1000 ) {
143 $( this ).spinner( "value", 1000 );
144 return false;
145 } else if ( ui.value > 10000 ) {
146 $( this ).spinner( "value", 10000 );
147 return false;
148 }
149 }
150 });
151 } );
Marc Kupietz3305b0a2017-11-27 10:46:20 +0100152
Marc Kupietz4abcd682017-11-28 20:51:08 +0100153 $( function() {
154 $( "#neighbours" ).spinner({
155 spin: function( event, ui ) {
156 if ( ui.value < 0 ) {
157 $( this ).spinner( "value", 0 );
158 return false;
159 } else if ( ui.value > 200 ) {
160 $( this ).spinner( "value", 200 );
161 return false;
162 }
163 }
164 });
165 } );
166
167 $( function() {
168 $( "#cutoff" ).spinner({
169 spin: function( event, ui ) {
170 if ( ui.value < 100000 ) {
171 $( this ).spinner( "value", 100000 );
172 return false;
173 } else if ( ui.value > 2000000 ) {
174 $( this ).spinner( "value", 2000000 );
175 return false;
176 }
177 }
178 });
179 } );
180
181 $( function() {
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100182 $( "#tabs" ).tabs().addClass('tabs-min');
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100183 } );
Marc Kupietz4abcd682017-11-28 20:51:08 +0100184
185 $( function() {
186 $( ".controlgroup-vertical" ).controlgroup({
187 "direction": "vertical"
188 });
189 } );
190
191 $(function() {
192 $( document ).tooltip({
193 content: function() {
194 return $(this).attr('title');
195 }}
196 )
Marc Kupietz83305222016-04-28 09:57:22 +0200197 })
Marc Kupietz694610d2017-11-25 18:30:03 +0100198
Marc Kupietz83305222016-04-28 09:57:22 +0200199 </script>
200 <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
Marc Kupietz554aff52017-11-09 14:42:09 +0100201 <script src="/derekovecs/js/tsne.js"></script>
202 <script src="/derekovecs/js/som.js"></script>
203 <script src="/derekovecs/js/labeler.js"></script>
Marc Kupietz83305222016-04-28 09:57:22 +0200204 <style>
205 body, input {
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100206 font-family: Lato, sans-serif;
Marc Kupietz83305222016-04-28 09:57:22 +0200207 font-size: 11pt;
208 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100209
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100210 h1, h2, h3 {
211 margin: 5px 10px 0 0;
212 color: rgb(246,168,0);
213 font-family: "Univers LT Std 47 Cn Lt", "Univers LT Std 67 Cn Lt", "Roboto Condensed", "Univers LT Std 67 Cn Bold", "UniversLTStd-BoldCn", "Times", 'League Gothic', Impact, sans-serif;
214 font-weight: bold;
215 line-height: 1.35;
216 letter-spacing: normal;
217 text-transform: uppercase;
218 text-shadow: none;
Marc Kupietz34c08172017-11-29 17:08:47 +0100219 word-wrap: break-word;
220 }
221
222
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100223 showoptions, #SEARCH {
224 margin-left: 10px;
225 margin-right: 10px;
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100226 }
227
Marc Kupietzdab9f222017-11-29 14:22:59 +0100228 .tabs-left-vertical .ui-tabs-nav {
229 position: absolute;
230 width: 21em;
231 transform: translate(-100%,0%) rotate(-90deg);
232 transform-origin: 100% 0%;
233 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100234
Marc Kupietzdab9f222017-11-29 14:22:59 +0100235 .tabs-left-vertical .ui-tabs-nav li {
236 float: right;
237 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100238
Marc Kupietzdab9f222017-11-29 14:22:59 +0100239 .tabs-left-vertical .ui-tabs-panel {
240 padding-left: 3.5em;
241 }
242
243 .tabs-left-vertical .ui-tabs-panel {
244 height: 20em;
Marc Kupietz34c08172017-11-29 17:08:47 +0100245 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100246
Marc Kupietz34c08172017-11-29 17:08:47 +0100247 .mono {
248 font-family: "DejaVu Sans Mono", Inconsolata, SourceCodePro, Courier;
249 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100250
Marc Kupietz34c08172017-11-29 17:08:47 +0100251 .ui-tooltip-content {
252 font-size: 9pt;
253 color: #222222;
254 }
Marc Kupietzf4b49392016-04-28 10:49:56 +0200255
Marc Kupietz34c08172017-11-29 17:08:47 +0100256 svg > .ui-tooltip-content {
257 font-size: 8pt;
258 color: #222222;
259 }
260
261 a.merged {
262 color: green;
263 fill: green;
264 }
265
266 #first a {
267 text-decoration: none;
268 }
269
270 a.marked, #first a.marked {
271 text-decoration: underline;
272 }
273
274 a.target {
275 color: red;
276 fill: red;
277 }
Marc Kupietz694610d2017-11-25 18:30:03 +0100278
Marc Kupietz4abcd682017-11-28 20:51:08 +0100279 table.display {
280 width: 40% important!;
281 margin: 1; /* <- works for me this way ****/
282 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100283
Marc Kupietz4abcd682017-11-28 20:51:08 +0100284 table.dataTable thead th, table.dataTable thead td, table.dataTable tbody td {
285 padding: 2px 2px;
286 // border-bottom: 1px solid #111;
287 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100288
Marc Kupietz34c08172017-11-29 17:08:47 +0100289 #collocators {
290 margin-bottom: 15px;
291 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100292
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100293 #header {
294 width: 100%;
295 // border: 1px solid red;
296 overflow: hidden; /* will contain if #first is longer than #second */
297 }
298
Marc Kupietz34c08172017-11-29 17:08:47 +0100299 #topwrapper {
300 width: 100%;
301 // border: 1px solid red;
302 overflow: hidden; /* will contain if #first is longer than #second */
303 }
304
305 #wrapper {
306 // border: 1px solid red;
307 overflow: hidden; /* will contain if #first is longer than #second */
308 }
309
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100310 #pagetitle {
311 max-width: 460px;
312 margin-right: 20px;
313 float: left;
314 overflow: hidden; /* if you don't want #second to wrap below #first */
315 // border: 1px solid green;
316 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100317
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100318 #options {
319 float: left;
320 width: 800px;
321 margin: 10px;
322 overflow: hidden; /* if you don't want #second to wrap below #first */
323 }
324
325 #word {
326 width: 50%;
327 }
328
Marc Kupietz34c08172017-11-29 17:08:47 +0100329 #first {
330 margin-right: 20px;
331 float: left;
332 overflow: hidden; /* if you don't want #second to wrap below #first */
333 // border: 1px solid green;
334 }
335 #tabs {
336 margin-right: 20px;
337 overflow: hidden; /* if you don't want #second to wrap below #first */
338 }
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100339
340 .tabs-min {
341 background: transparent;
342 border: none;
343 }
344
345 .tabs-min .ui-widget-header {
346 background: transparent;
347 border: none;
348 border-bottom: 1px solid #c0c0c0;
349 -moz-border-radius: 0px;
350 -webkit-border-radius: 0px;
351 border-radius: 0px;
352 }
353
354 .tabs-min .ui-tabs-nav .ui-state-default {
355 background: transparent;
356 border: none;
357 }
358
359 .tabs-min .ui-tabs-nav .ui-state-active {
360 background: transparent url(img/uiTabsArrow.png) no-repeat bottom center;
361 border: none;
362 }
363
364 .tabs-min .ui-tabs-nav .ui-state-default a {
365 color: #c0c0c0;
366 }
367
368 .tabs-min .ui-tabs-nav .ui-state-active a {
369 color: rgb(246,168,0);
370 }
371
Marc Kupietz4abcd682017-11-28 20:51:08 +0100372 #embed {
373 max-width: 802px;
374 border: 1px solid #333;
375 }
376
Marc Kupietz34c08172017-11-29 17:08:47 +0100377 #second {
378 min-width: 800px;
Marc Kupietzdab9f222017-11-29 14:22:59 +0100379 // border: 1px solid #333;
Marc Kupietz34c08172017-11-29 17:08:47 +0100380 overflow: hidden; /* if you don't want #second to wrap below #first */
381 }
382 #som2 svg {
383 border: 1px solid #333;
384 }
Marc Kupietz83305222016-04-28 09:57:22 +0200385
Marc Kupietz34c08172017-11-29 17:08:47 +0100386 #cost {
387 font-size: 8pt;
388 color: #222222;
389 margin-top: 4px;
390 margin-bottom: 12px;
391 }
Marc Kupietz83305222016-04-28 09:57:22 +0200392
Marc Kupietz34c08172017-11-29 17:08:47 +0100393 #sominfo1, #sominfo {
394 font-size: 8pt;
395 color: #222222;
396 margin-top: 0px;
397 }
Marc Kupietz83305222016-04-28 09:57:22 +0200398
Marc Kupietz34c08172017-11-29 17:08:47 +0100399 #somcolor1, #somcolor2, #somcolor3 {
400 display: inline-block;
401 height: 10px;
402 width: 10px;
403 }
Marc Kupietz83305222016-04-28 09:57:22 +0200404
Marc Kupietz34c08172017-11-29 17:08:47 +0100405 #third {
406 border: 1px solid #333;
407 }
Marc Kupietz83305222016-04-28 09:57:22 +0200408
409 </style>
410 <script>
411
412 var opt = {epsilon: <%= $epsilon %>, perplexity: <%= $perplexity %>},
413 mapWidth = 800, // width map
414 mapHeight = 800,
415 jitterRadius = 7;
416
417 var T = new tsnejs.tSNE(opt); // create a tSNE instance
418
419 var Y;
420
421 var data;
422 var labeler;
423
424 function applyJitter() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100425 svg.selectAll('.tsnet')
426 .data(labels)
427 .transition()
428 .duration(50)
429 .attr("transform", function(d, i) {
430 T.Y[i][0] = (d.x - mapWidth/2 - tx)/ss/20;
431 T.Y[i][1] = (d.y - mapHeight/2 - ty)/ss/20;
432 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100433 (d.x) + "," +
434 (d.y) + ")";
Marc Kupietz4abcd682017-11-28 20:51:08 +0100435 });
Marc Kupietz83305222016-04-28 09:57:22 +0200436 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100437
Marc Kupietz83305222016-04-28 09:57:22 +0200438 function updateEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100439 var Y = T.getSolution();
440 svg.selectAll('.tsnet')
441 .data(data.words)
Marc Kupietz34c08172017-11-29 17:08:47 +0100442 .attr("transform", function(d, i) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100443 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100444 ((Y[i][0]*20*ss + tx) + mapWidth/2) + "," +
445 ((Y[i][1]*20*ss + ty) + mapHeight/2) + ")"; });
Marc Kupietz83305222016-04-28 09:57:22 +0200446 }
447
448 var svg;
449 var labels = [];
450 var anchor_array = [];
451 var text;
452
453 function drawEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100454 $("#embed").empty();
455 var div = d3.select("#embed");
Marc Kupietz34c08172017-11-29 17:08:47 +0100456
Marc Kupietz4abcd682017-11-28 20:51:08 +0100457 // get min and max in each column of Y
458 var Y = T.Y;
Marc Kupietz34c08172017-11-29 17:08:47 +0100459
Marc Kupietz4abcd682017-11-28 20:51:08 +0100460 svg = div.append("svg") // svg is global
461 .attr("width", mapWidth)
462 .attr("height", mapHeight);
Marc Kupietz34c08172017-11-29 17:08:47 +0100463
Marc Kupietz4abcd682017-11-28 20:51:08 +0100464 var g = svg.selectAll(".b")
465 .data(data.words)
466 .enter().append("g")
467 .attr("class", "tsnet");
Marc Kupietz34c08172017-11-29 17:08:47 +0100468
Marc Kupietz4abcd682017-11-28 20:51:08 +0100469 g.append("a")
470 .attr("xlink:href", function(word) {
471 return (data.urlprefix+word);})
472 .attr("class", function(d, i) {
473 var res="";
474 if(data.marked[i]) {
475 res="marked ";
476 }
477 if(data.target.indexOf(" "+d+" ") >= 0) {
478 return res+"target";
479 } else if(data.ranks[i] < data.mergedEnd) {
480 return res+"merged";
481 } else {
482 return res;
483 }
484 })
485 .attr("title", function(d, i) {
486 if(data.mergedEnd > 0) {
487 if(data.ranks[i] >= data.mergedEnd) {
488 return "rank: "+i +" "+"freq. rank: "+(data.ranks[i]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
489 } else {
490 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") + " (merged vocab)";
Marc Kupietz34c08172017-11-29 17:08:47 +0100491 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100492 } else {
493 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
494 }
495 })
496 .append("text")
497 .attr("text-anchor", "top")
498 .attr("font-size", 12)
499 .text(function(d) { return d; });
Marc Kupietz34c08172017-11-29 17:08:47 +0100500
Marc Kupietz4abcd682017-11-28 20:51:08 +0100501 var zoomListener = d3.behavior.zoom()
502 .scaleExtent([0.1, 10])
503 .center([0,0])
504 .on("zoom", zoomHandler);
505 zoomListener(svg);
Marc Kupietz83305222016-04-28 09:57:22 +0200506 }
507
508 var tx=0, ty=0;
509 var ss=1;
510 var iter_id=-1;
511
512 function zoomHandler() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100513 tx = d3.event.translate[0];
514 ty = d3.event.translate[1];
515 ss = d3.event.scale;
516 updateEmbedding();
Marc Kupietz83305222016-04-28 09:57:22 +0200517 }
518
519 var stepnum = 0;
520
521 function stopStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100522 clearInterval(iter_id);
523 text = svg.selectAll("text");
Marc Kupietz34c08172017-11-29 17:08:47 +0100524
Marc Kupietz4abcd682017-11-28 20:51:08 +0100525 // jitter function needs different data and co-ordinate representation
526 labels = d3.range(data.words.length).map(function(i) {
527 var x = (T.Y[i][0]*20*ss + tx) + mapWidth/2;
528 var y = (T.Y[i][1]*20*ss + ty) + mapHeight/2;
529 anchor_array.push({x: x, y: y, r: jitterRadius});
530 return {
531 x: x,
532 y: y,
533 name: data.words[i]
534 };
535 });
Marc Kupietz34c08172017-11-29 17:08:47 +0100536
Marc Kupietz4abcd682017-11-28 20:51:08 +0100537 // get the actual label bounding boxes for the jitter function
538 var index = 0;
539 text.each(function() {
540 labels[index].width = this.getBBox().width;
541 labels[index].height = this.getBBox().height;
542 index += 1;
543 });
Marc Kupietz83305222016-04-28 09:57:22 +0200544
Marc Kupietz34c08172017-11-29 17:08:47 +0100545
Marc Kupietz4abcd682017-11-28 20:51:08 +0100546 // setTimeout(updateEmbedding, 1);
547 // setTimeout(
548 labeler = d3.labeler()
549 .label(labels)
550 .anchor(anchor_array)
551 .width(mapWidth)
552 .height(mapHeight)
553 .update(applyJitter);
554 // .start(1000);
Marc Kupietz83305222016-04-28 09:57:22 +0200555
Marc Kupietz4abcd682017-11-28 20:51:08 +0100556 iter_id = setInterval(jitterStep, 1);
Marc Kupietz83305222016-04-28 09:57:22 +0200557 }
558
559 var jitter_i=0;
560
561 function jitterStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100562 if(jitter_i++ > 100) {
563 clearInterval(iter_id);
564 } else {
565 labeler.start2(10);
566 applyJitter();
567 }
Marc Kupietz83305222016-04-28 09:57:22 +0200568 }
569
570 var last_cost=1000;
571
572 function step() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100573 var i = T.iter;
Marc Kupietz34c08172017-11-29 17:08:47 +0100574
Marc Kupietz4abcd682017-11-28 20:51:08 +0100575 if(i > <%= $no_iterations %>) {
576 stopStep();
577 } else {
578 var cost = Math.round(T.step() * 100000) / 100000; // do a few steps
579 $("#cost").html("tsne iteration " + i + ", cost: " + cost.toFixed(5));
580 if(i % 250 == 0 && cost >= last_cost) {
581 stopStep();
582 } else {
583 last_cost = cost;
584 updateEmbedding();
585 }
586 }
Marc Kupietz83305222016-04-28 09:57:22 +0200587 }
588
589 function showMap(j) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100590 data=j;
591 T.iter=0;
592 T.initDataRaw(data.vecs); // init embedding
593 drawEmbedding(); // draw initial embedding
Marc Kupietz78114532017-11-29 17:00:16 +0100594
Marc Kupietz4abcd682017-11-28 20:51:08 +0100595 if(iter_id >= 0) {
596 clearInterval(iter_id);
597 }
598 //T.debugGrad();
599 iter_id = setInterval(step, 1);
Marc Kupietz78114532017-11-29 17:00:16 +0100600 if(true) { // (<%= $show_som %>) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100601 makeSOM(j, <%= $no_iterations %>);
602 }
Marc Kupietz83305222016-04-28 09:57:22 +0200603 }
Marc Kupietz39179ab2017-07-04 16:28:06 +0200604 var queryword;
605
606 function onload() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100607 queryword = document.getElementById('word');
Marc Kupietz39179ab2017-07-04 16:28:06 +0200608 }
609
610 function queryKorAP() {
611 window.open('http://korap.ids-mannheim.de/kalamar/?q='+queryword.value, 'KorAP');
612 }
Marc Kupietz4dc270c2017-11-24 10:17:12 +0100613
614 function queryKorAPCII(query) {
615 window.open('http://korap.ids-mannheim.de/kalamar/?ql=cosmas2&q='+query, 'KorAP');
616 }
Marc Kupietz83305222016-04-28 09:57:22 +0200617 </script>
618 </head>
Marc Kupietz39179ab2017-07-04 16:28:06 +0200619 <body onload="onload()">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100620 <div id="header">
621 <div id="pagetitle">
622 <h1>DeReKo-Vectors</h1>
623 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100624 <div id="options" class="widget">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100625 <form id="queryform">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100626 <input id="word" type="text" name="word" placeholder="Word(s) to be searched" value="<%= $word %>"
627 title="When looking for multiple words use spaces as separators to search around the average vector and | as separator to get the neighbours for each word."/>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100628 <input id="SEARCH" type="button" value="SEARCH">
629 <input type="button" id="showoptions" name="showoptions" value="Options" />
630 </form>
631 <div id="dropdownoptions" style="display: hidden">
632 <form id="optionsform">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100633 <div class="controlgroup-vertical">
634 <label for="cutoff">cut-off</label>
635 <input id="cutoff" type="text" name="cutoff" size="10" value="<%= $cutoff %>" title="Only consider the most frequent x word forms.">
636 <label for="dedupe">dedupe</label>
637 <input id="dedupe" type="checkbox" name="dedupe" value="1" <%= ($dedupe ? "checked" : "") %> title="radically filter out any near-duplicates">
638 % if($mergedEnd > 0) {
Marc Kupietz78114532017-11-29 17:00:16 +0100639 <label for="sbf">backw.</label>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100640 <input id="sbf" type="checkbox" name="sbf" value="1" <%= ($searchBaseVocabFirst ? "checked" : "") %> title="If checkecked base vocabulary will be searched first. Otherwise merged vocabulray will be searched first.">
641 % }
642 <label for="neighbours">max. neighbours:</label>
643 <input id="neighbours" size="4" name="n" value="<%= $no_nbs %>">
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100644 <label for="no_iterations">max. iterations</label>
645 <input id="no_iterations" name="N" size="4" value="<%= $no_iterations %>">
Marc Kupietz78114532017-11-29 17:00:16 +0100646 <!-- <label for="dosom">SOM</label>
647 <input id="dosom" type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100648 % if($collocators) {
649 <label for="sortby">window/sort</label>
650 <select id="sortby" name="sort">
651 <option value="0" <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
Marc Kupietza77acce2017-11-30 16:59:07 +0100652 <!-- <option value="1" <%= ($sort==1? "selected":"") %>>any single position</option>
653 <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100654 </select>
655 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100656 <input type="button" value="→ KorAP" onclick="queryKorAP();" title="query word with KorAP"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100657 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100658 </form>
659 </div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100660 </div>
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100661 </div>
662 <div id="topwrapper">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100663 <div id="tabs">
664 <ul>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100665 <li><a href="#tabs-1">Semantics (TSNE-map)</a></li>
666 <li><a href="#tabs-2">Semantics (SOM)</a></li>
667 <li><a href="#tabs-3">Syntagmatic (collocators)</a></li>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100668 </ul>
669 <div id="tabs-1">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100670 % if($lists && (@$lists) > 0 && (@$lists)[0]) {
671 <div id="wrapper">
672 <div id="first" style="width:220px">
673 <table class="display compact nowrap" id="firsttable">
674 <thead>
675 <tr>
676 <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th>
677 </tr>
678 </thead>
679 <tbody>
680 % my $j=0; my @words; my @vecs; my @ranks; my @marked;
681 % for my $list (@$lists) {
682 % my $i=0; while($list) {
683 % my $item = (@$list)[$i];
684 % my $c = ($collocators? (@$collocators)[$i] : 0);
685 % last if(!$c && !$item);
686 <tr>
687 <td align="right">
688 <%= ++$i %>.
689 </td>
690 % if($item) {
691 % if(!grep{$_ eq $item->{word}} @words) {
692 % push @vecs, $item->{vector};
693 % push @words, $item->{word};
694 % push @ranks, $item->{rank};
695 % push @marked, ($marked->{$item->{word}}? 1 : 0);
Marc Kupietz4abcd682017-11-28 20:51:08 +0100696 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100697 <td align="right">
698 <%= sprintf("%.3f", $item->{dist}) %>
699 </td>
700 <td>
Marc Kupietz34c08172017-11-29 17:08:47 +0100701 % my $class = ($marked->{$item->{word}}? "marked " : "");
702 % my $r = $item->{rank};
Marc Kupietzdab9f222017-11-29 14:22:59 +0100703 % if($r < $mergedEnd) {
704 % $class .= "merged";
705 % $r .= " (merged vocab)";
706 % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
707 % $r -= $mergedEnd;
708 % }
709 <a class="<%= $class =%>"
710 title="freq. rank: <%= $r =%>"
711 href="<%= url_with->query([word => $item->{word}]) =%>">
712 <%= $item->{word} =%>
713 </a>
714 </td>
715 % } else {
716 <td colspan="2"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100717 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100718 </tr>
719 % last if($i >= 100);
720 % }
721 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100722 </tbody>
723 </table>
724 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100725 <script>
726 % use Mojo::ByteStream 'b';
727 % my $urlprefix = url_with->query([word=>'']);
728 $(window).load(function() {
729 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked, urlprefix => $urlprefix})); %>);
730 });
731 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100732 % } else {
733 <div id="wrapper">
Marc Kupietz2f6b74a2017-12-01 13:20:21 +0100734 <div id="not-found-dialog" title="Not found">
735 <p>ERROR: "<%= $word %>" not found in vocabluary.</p>
736 <p>If you are sure you have spelled the word as intended, you can try to increase the cutoff parameter in the options menu.</p>
737 </div>
738 <script>
739 $( function() {
740 $( "#not-found-dialog" ).dialog({
741 autoOpen: true,
742 modal: true,
743 draggable: false,
744 height: "auto",
745 width: "auto",
746 resizable: false,
747 buttons: {
748 "OK": function() {
749 $( this ).dialog( "close" );
750 },
751 "Apply": function() {
752 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
753 }
754 }
755 });
756 });
757 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100758 </div>
759 % }
760 <div id="second">
761 <div id="embed">
762 </div>
763 <div id="cost">
764 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100765 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100766 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100767 </div>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100768 <div id="tabs-2">
769 <div id="som2" style="width: 800; height: 800px">
770 </div>
771 <div id="sominfo1"><span id="somcolor1"> </span> <span id="somword1"> </span> <span id="somcolor2"> </span> <span id="somword2"> </span> <span id="somcolor3"> </span></div>
772 <div id="sominfo">SOM iteration <span id="iterations">0</span></div>
773 </div>
774 <div id="tabs-3">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100775 <div id="second" style="width:500px">
776 <table class="display compact nowrap" id="secondtable">
777 <thead>
778 <tr>
779 % if($collocators) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100780 <th>#</th>
Marc Kupietz22796142017-12-01 13:19:15 +0100781 <th align="center" title="The columns (c) around the target are considered for summation are marked with *.">w'</th>
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100782 <th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
Marc Kupietza77acce2017-11-30 16:59:07 +0100783 <th title="Co-normalized raw activation sum of the collocator in the selected columns." align="right">⊥Σa</th>
784 <th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σc</th>
785 <th title="Sum of the column normalized activations over the selected colunns." align="right">Σ(a/c)</th>
786 <th title="Sum of the activations over the whole window normalized by the total window sum (no auto-focus)." align="right">Σa/Σw</th>
787 <th align="left">collocator</th>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100788 % }
789 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100790 </thead>
791 <tbody>
Marc Kupietz22796142017-12-01 13:19:15 +0100792 <tr>
793 <td align="right">
794 </td>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100795 <td align="right">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100796 </td>
Marc Kupietz22796142017-12-01 13:19:15 +0100797 <td align="right">
798 </td>
799 <td align="right">
800 </td>
801 <td align="right">
802 </td>
803 <td align="right">
804 </td>
805 <td align="right">
806 </td>
807 <td align="left">
808 </td>
809 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100810 </tbody>
811 </table>
812 </div> <!-- - tab2 -->
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100813 </div> <!-- tabs -->
814 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100815 </div> <!-- topwrapper -->
816 <div style="clear: both;"></div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100817 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100818 % if($training_args) {
819 <p>
820 Word vector model trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters: <pre><%= $training_args %></pre>
821 </p>
Marc Kupietz83305222016-04-28 09:57:22 +0200822 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100823 </body>
Marc Kupietz83305222016-04-28 09:57:22 +0200824</html>