blob: 72066ed4def9349e2d88a7827ecc082aaa336756 [file] [log] [blame]
Marc Kupietz83305222016-04-28 09:57:22 +02001<!DOCTYPE html>
2<html>
3 <head>
4 <title>DeReKo-Word-Vector-Distances: <%= $word %></title>
Marc Kupietz80bd7b92017-07-04 16:25:54 +02005 <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
Marc Kupietz6dbadd12017-11-29 16:43:33 +01006 <link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Condensed" rel="stylesheet">
Marc Kupietz80bd7b92017-07-04 16:25:54 +02007 <script src="http://code.jquery.com/jquery-latest.min.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +01008 <script src = "https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
9 <script src = "https://cdn.datatables.net/fixedcolumns/3.2.3/js/dataTables.fixedColumns.min.js"></script>
Marc Kupietz22796142017-12-01 13:19:15 +010010 <script src = "https://cdn.datatables.net/plug-ins/1.10.16/sorting/scientific.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010011 <link rel="stylesheet" href="https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css">
Marc Kupietz80bd7b92017-07-04 16:25:54 +020012 <script
Marc Kupietz6dbadd12017-11-29 16:43:33 +010013 src="http://code.jquery.com/ui/1.12.1/jquery-ui.min.js"
Marc Kupietz34c08172017-11-29 17:08:47 +010014 integrity="sha256-VazP97ZCwtekAsvgPBSUwPFKdrwD3unUfSGVYrahUqU="
15 crossorigin="anonymous"></script>
Marc Kupietz80bd7b92017-07-04 16:25:54 +020016 <script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010017 $('#firstable').hide();
18 $(document).ready(function() {
19 $("#xxxtabs").tabs( {
20 "show": function(event, ui) {
21 var oTable = $('div.dataTables_scrollBody>table.display', ui.panel).dataTable();
22 if ( oTable.length > 0 ) {
23 oTable.fnAdjustColumnSizing();
24 }
25 }
26 } );
Marc Kupietz694610d2017-11-25 18:30:03 +010027
Marc Kupietzdab9f222017-11-29 14:22:59 +010028 $("input").bind("keydown", function(event) {
29 // track enter key
30 var keycode = (event.keyCode ? event.keyCode : (event.which ? event.which : event.charCode));
31 if (keycode == 13) { // keycode for enter key
32 // force the 'Enter Key' to implicitly click the Update button
33 document.getElementById('SEARCH').click();
34 return false;
35 } else {
36 return true;
37 }});
38
Marc Kupietz4abcd682017-11-28 20:51:08 +010039 $(".selector").tabs({ active: 1 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010040
Marc Kupietz2f6b74a2017-12-01 13:20:21 +010041
Marc Kupietz4abcd682017-11-28 20:51:08 +010042 $('#firsttable').DataTable({
43 "sScrollY": "780px",
44 "bScrollCollapse": true,
45 "bPaginate": false,
46 "bJQueryUI": true,
47 "dom": '<"top">rt<"bottom"flp><"clear">',
48 "aoColumnDefs": [
49 { "sWidth": "10%", "aTargets": [ -1 ] }
50 ]
51 } );
Marc Kupietz0af83e32017-11-27 09:31:37 +010052
Marc Kupietz22796142017-12-01 13:19:15 +010053 function bitvec2window(n) {
54 var str = n.toString(2).padStart(10, "0")
55 .replace(/^([0-9]{5})/, '$1x')
56 .replace(/0/g, '·')
57 .replace(/1/g, '+');
58 return str;
59 }
60
61 var collocatorData = <%= b(Mojo::JSON::to_json($collocators)) %>;
62
63 if (collocatorData != null) {
64 var t = $('#secondtable').DataTable({
65 data: collocatorData,
66 "sScrollY": "800px",
67 "bScrollCollapse": true,
68 "bPaginate": false,
69 "bJQueryUI": true,
70 "dom": '<"top">rt<"bottom"flp><"clear">',
71 "columns": [
72 { "data": "rank", type: "allnumeric" },
73 { "data": "pos", width: "7%", sClass: "dt-center mono compact", render: function ( data, type, row ) {return bitvec2window(data) }},
74 { "data": "max", render: function ( data, type, row ) {return data.toFixed(3) }},
75 { "data": "conorm", render: function ( data, type, row ) {return data.toFixed(3) }},
76 { "data": "prob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
77 { "data": "cprob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
78 { "data": "overall", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
79 { "data": "word", sClass: "collocator" }
80 ],
81 "columnDefs": [
82 { className: "dt-right", "targets": [0,2,3,4,5,6] },
83 { className: "dt-center", "targets": [ 1] },
84 { "searchable": false,
85 "orderable": false,
86 "targets": 0
87 },
88 { "type": "scientific", targets: [2,3,4,5,6] },
89 { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4, 5, 6 ] },
90 { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 7 ] },
91 ],
92 "order": [[ 4, 'desc' ]],
Marc Kupietzd64f3f22017-11-30 12:07:42 +010093 } );
Marc Kupietz22796142017-12-01 13:19:15 +010094 t.on( 'order.dt search.dt', function () {
95 t.column(0, {order:'applied'}).nodes().each( function (cell, i) {
96 cell.innerHTML = i+1;
97 } );
98 } ).draw();
99 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100100 });
Marc Kupietzdab9f222017-11-29 14:22:59 +0100101 $(function(){
102 $("#dropdownoptions").dialog({
103 title: "Options",
104 autoOpen: false,
105 modal: false,
106 draggable: false,
107 height: "auto",
108 width: "auto",
109 resizable: false,
110 buttons: {
111 "Cancel": function() {
112 $( this ).dialog( "close" );
113 },
114 "Apply": function() {
115 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
116 }
117 }
118 });
119 });
120
121 $(function(){
122 $("#SEARCH").click(function() {
123 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
124 });
125 });
126
127 $(function(){
Marc Kupietz22796142017-12-01 13:19:15 +0100128 $("td.collocator").click(function(){
129 queryKorAPCII(this.textContent + " /w5 <%= $word %>");
130 });
131 });
132
133 $(function(){
Marc Kupietzdab9f222017-11-29 14:22:59 +0100134 $("#showoptions").click(function(){
135 $("#dropdownoptions").dialog("open");
136 var target = $(this);
137 $("#dropdownoptions").dialog("widget").position({
138 my: 'left bottom',
139 at: 'left bottom',
140 of: target
141 });
142 });
143 });
144
Marc Kupietz4abcd682017-11-28 20:51:08 +0100145 $( function() {
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100146 $( "#no_iterations" ).spinner({
Marc Kupietz4abcd682017-11-28 20:51:08 +0100147 spin: function( event, ui ) {
148 if ( ui.value < 1000 ) {
149 $( this ).spinner( "value", 1000 );
150 return false;
151 } else if ( ui.value > 10000 ) {
152 $( this ).spinner( "value", 10000 );
153 return false;
154 }
155 }
156 });
157 } );
Marc Kupietz3305b0a2017-11-27 10:46:20 +0100158
Marc Kupietz4abcd682017-11-28 20:51:08 +0100159 $( function() {
160 $( "#neighbours" ).spinner({
161 spin: function( event, ui ) {
162 if ( ui.value < 0 ) {
163 $( this ).spinner( "value", 0 );
164 return false;
165 } else if ( ui.value > 200 ) {
166 $( this ).spinner( "value", 200 );
167 return false;
168 }
169 }
170 });
171 } );
172
173 $( function() {
174 $( "#cutoff" ).spinner({
175 spin: function( event, ui ) {
176 if ( ui.value < 100000 ) {
177 $( this ).spinner( "value", 100000 );
178 return false;
179 } else if ( ui.value > 2000000 ) {
180 $( this ).spinner( "value", 2000000 );
181 return false;
182 }
183 }
184 });
185 } );
186
187 $( function() {
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100188 $( "#tabs" ).tabs().addClass('tabs-min');
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100189 } );
Marc Kupietz4abcd682017-11-28 20:51:08 +0100190
191 $( function() {
192 $( ".controlgroup-vertical" ).controlgroup({
193 "direction": "vertical"
194 });
195 } );
196
197 $(function() {
198 $( document ).tooltip({
199 content: function() {
200 return $(this).attr('title');
201 }}
202 )
Marc Kupietz83305222016-04-28 09:57:22 +0200203 })
Marc Kupietz694610d2017-11-25 18:30:03 +0100204
Marc Kupietz83305222016-04-28 09:57:22 +0200205 </script>
206 <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
Marc Kupietz554aff52017-11-09 14:42:09 +0100207 <script src="/derekovecs/js/tsne.js"></script>
208 <script src="/derekovecs/js/som.js"></script>
209 <script src="/derekovecs/js/labeler.js"></script>
Marc Kupietz83305222016-04-28 09:57:22 +0200210 <style>
211 body, input {
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100212 font-family: Lato, sans-serif;
Marc Kupietz83305222016-04-28 09:57:22 +0200213 font-size: 11pt;
214 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100215
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100216 h1, h2, h3 {
217 margin: 5px 10px 0 0;
218 color: rgb(246,168,0);
219 font-family: "Univers LT Std 47 Cn Lt", "Univers LT Std 67 Cn Lt", "Roboto Condensed", "Univers LT Std 67 Cn Bold", "UniversLTStd-BoldCn", "Times", 'League Gothic', Impact, sans-serif;
220 font-weight: bold;
221 line-height: 1.35;
222 letter-spacing: normal;
223 text-transform: uppercase;
224 text-shadow: none;
Marc Kupietz34c08172017-11-29 17:08:47 +0100225 word-wrap: break-word;
226 }
227
228
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100229 showoptions, #SEARCH {
230 margin-left: 10px;
231 margin-right: 10px;
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100232 }
233
Marc Kupietzdab9f222017-11-29 14:22:59 +0100234 .tabs-left-vertical .ui-tabs-nav {
235 position: absolute;
236 width: 21em;
237 transform: translate(-100%,0%) rotate(-90deg);
238 transform-origin: 100% 0%;
239 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100240
Marc Kupietzdab9f222017-11-29 14:22:59 +0100241 .tabs-left-vertical .ui-tabs-nav li {
242 float: right;
243 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100244
Marc Kupietzdab9f222017-11-29 14:22:59 +0100245 .tabs-left-vertical .ui-tabs-panel {
246 padding-left: 3.5em;
247 }
248
249 .tabs-left-vertical .ui-tabs-panel {
250 height: 20em;
Marc Kupietz34c08172017-11-29 17:08:47 +0100251 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100252
Marc Kupietz34c08172017-11-29 17:08:47 +0100253 .mono {
254 font-family: "DejaVu Sans Mono", Inconsolata, SourceCodePro, Courier;
255 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100256
Marc Kupietz34c08172017-11-29 17:08:47 +0100257 .ui-tooltip-content {
258 font-size: 9pt;
259 color: #222222;
260 }
Marc Kupietzf4b49392016-04-28 10:49:56 +0200261
Marc Kupietz34c08172017-11-29 17:08:47 +0100262 svg > .ui-tooltip-content {
263 font-size: 8pt;
264 color: #222222;
265 }
266
267 a.merged {
268 color: green;
269 fill: green;
270 }
271
272 #first a {
273 text-decoration: none;
274 }
275
276 a.marked, #first a.marked {
277 text-decoration: underline;
278 }
279
280 a.target {
281 color: red;
282 fill: red;
283 }
Marc Kupietz694610d2017-11-25 18:30:03 +0100284
Marc Kupietz4abcd682017-11-28 20:51:08 +0100285 table.display {
286 width: 40% important!;
287 margin: 1; /* <- works for me this way ****/
288 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100289
Marc Kupietz4abcd682017-11-28 20:51:08 +0100290 table.dataTable thead th, table.dataTable thead td, table.dataTable tbody td {
291 padding: 2px 2px;
292 // border-bottom: 1px solid #111;
293 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100294
Marc Kupietz34c08172017-11-29 17:08:47 +0100295 #collocators {
296 margin-bottom: 15px;
297 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100298
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100299 #header {
300 width: 100%;
301 // border: 1px solid red;
302 overflow: hidden; /* will contain if #first is longer than #second */
303 }
304
Marc Kupietz34c08172017-11-29 17:08:47 +0100305 #topwrapper {
306 width: 100%;
307 // border: 1px solid red;
308 overflow: hidden; /* will contain if #first is longer than #second */
309 }
310
311 #wrapper {
312 // border: 1px solid red;
313 overflow: hidden; /* will contain if #first is longer than #second */
314 }
315
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100316 #pagetitle {
317 max-width: 460px;
318 margin-right: 20px;
319 float: left;
320 overflow: hidden; /* if you don't want #second to wrap below #first */
321 // border: 1px solid green;
322 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100323
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100324 #options {
325 float: left;
326 width: 800px;
327 margin: 10px;
328 overflow: hidden; /* if you don't want #second to wrap below #first */
329 }
330
331 #word {
332 width: 50%;
333 }
334
Marc Kupietz34c08172017-11-29 17:08:47 +0100335 #first {
336 margin-right: 20px;
337 float: left;
338 overflow: hidden; /* if you don't want #second to wrap below #first */
339 // border: 1px solid green;
340 }
341 #tabs {
342 margin-right: 20px;
343 overflow: hidden; /* if you don't want #second to wrap below #first */
344 }
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100345
346 .tabs-min {
347 background: transparent;
348 border: none;
349 }
350
351 .tabs-min .ui-widget-header {
352 background: transparent;
353 border: none;
354 border-bottom: 1px solid #c0c0c0;
355 -moz-border-radius: 0px;
356 -webkit-border-radius: 0px;
357 border-radius: 0px;
358 }
359
360 .tabs-min .ui-tabs-nav .ui-state-default {
361 background: transparent;
362 border: none;
363 }
364
365 .tabs-min .ui-tabs-nav .ui-state-active {
366 background: transparent url(img/uiTabsArrow.png) no-repeat bottom center;
367 border: none;
368 }
369
370 .tabs-min .ui-tabs-nav .ui-state-default a {
371 color: #c0c0c0;
372 }
373
374 .tabs-min .ui-tabs-nav .ui-state-active a {
375 color: rgb(246,168,0);
376 }
377
Marc Kupietz4abcd682017-11-28 20:51:08 +0100378 #embed {
379 max-width: 802px;
380 border: 1px solid #333;
381 }
382
Marc Kupietz34c08172017-11-29 17:08:47 +0100383 #second {
384 min-width: 800px;
Marc Kupietzdab9f222017-11-29 14:22:59 +0100385 // border: 1px solid #333;
Marc Kupietz34c08172017-11-29 17:08:47 +0100386 overflow: hidden; /* if you don't want #second to wrap below #first */
387 }
388 #som2 svg {
389 border: 1px solid #333;
390 }
Marc Kupietz83305222016-04-28 09:57:22 +0200391
Marc Kupietz34c08172017-11-29 17:08:47 +0100392 #cost {
393 font-size: 8pt;
394 color: #222222;
395 margin-top: 4px;
396 margin-bottom: 12px;
397 }
Marc Kupietz83305222016-04-28 09:57:22 +0200398
Marc Kupietz34c08172017-11-29 17:08:47 +0100399 #sominfo1, #sominfo {
400 font-size: 8pt;
401 color: #222222;
402 margin-top: 0px;
403 }
Marc Kupietz83305222016-04-28 09:57:22 +0200404
Marc Kupietz34c08172017-11-29 17:08:47 +0100405 #somcolor1, #somcolor2, #somcolor3 {
406 display: inline-block;
407 height: 10px;
408 width: 10px;
409 }
Marc Kupietz83305222016-04-28 09:57:22 +0200410
Marc Kupietz34c08172017-11-29 17:08:47 +0100411 #third {
412 border: 1px solid #333;
413 }
Marc Kupietz83305222016-04-28 09:57:22 +0200414
415 </style>
416 <script>
417
418 var opt = {epsilon: <%= $epsilon %>, perplexity: <%= $perplexity %>},
419 mapWidth = 800, // width map
420 mapHeight = 800,
421 jitterRadius = 7;
422
423 var T = new tsnejs.tSNE(opt); // create a tSNE instance
424
425 var Y;
426
427 var data;
428 var labeler;
429
430 function applyJitter() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100431 svg.selectAll('.tsnet')
432 .data(labels)
433 .transition()
434 .duration(50)
435 .attr("transform", function(d, i) {
436 T.Y[i][0] = (d.x - mapWidth/2 - tx)/ss/20;
437 T.Y[i][1] = (d.y - mapHeight/2 - ty)/ss/20;
438 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100439 (d.x) + "," +
440 (d.y) + ")";
Marc Kupietz4abcd682017-11-28 20:51:08 +0100441 });
Marc Kupietz83305222016-04-28 09:57:22 +0200442 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100443
Marc Kupietz83305222016-04-28 09:57:22 +0200444 function updateEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100445 var Y = T.getSolution();
446 svg.selectAll('.tsnet')
447 .data(data.words)
Marc Kupietz34c08172017-11-29 17:08:47 +0100448 .attr("transform", function(d, i) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100449 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100450 ((Y[i][0]*20*ss + tx) + mapWidth/2) + "," +
451 ((Y[i][1]*20*ss + ty) + mapHeight/2) + ")"; });
Marc Kupietz83305222016-04-28 09:57:22 +0200452 }
453
454 var svg;
455 var labels = [];
456 var anchor_array = [];
457 var text;
458
459 function drawEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100460 $("#embed").empty();
461 var div = d3.select("#embed");
Marc Kupietz34c08172017-11-29 17:08:47 +0100462
Marc Kupietz4abcd682017-11-28 20:51:08 +0100463 // get min and max in each column of Y
464 var Y = T.Y;
Marc Kupietz34c08172017-11-29 17:08:47 +0100465
Marc Kupietz4abcd682017-11-28 20:51:08 +0100466 svg = div.append("svg") // svg is global
467 .attr("width", mapWidth)
468 .attr("height", mapHeight);
Marc Kupietz34c08172017-11-29 17:08:47 +0100469
Marc Kupietz4abcd682017-11-28 20:51:08 +0100470 var g = svg.selectAll(".b")
471 .data(data.words)
472 .enter().append("g")
473 .attr("class", "tsnet");
Marc Kupietz34c08172017-11-29 17:08:47 +0100474
Marc Kupietz4abcd682017-11-28 20:51:08 +0100475 g.append("a")
476 .attr("xlink:href", function(word) {
477 return (data.urlprefix+word);})
478 .attr("class", function(d, i) {
479 var res="";
480 if(data.marked[i]) {
481 res="marked ";
482 }
483 if(data.target.indexOf(" "+d+" ") >= 0) {
484 return res+"target";
485 } else if(data.ranks[i] < data.mergedEnd) {
486 return res+"merged";
487 } else {
488 return res;
489 }
490 })
491 .attr("title", function(d, i) {
492 if(data.mergedEnd > 0) {
493 if(data.ranks[i] >= data.mergedEnd) {
494 return "rank: "+i +" "+"freq. rank: "+(data.ranks[i]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
495 } else {
496 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") + " (merged vocab)";
Marc Kupietz34c08172017-11-29 17:08:47 +0100497 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100498 } else {
499 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
500 }
501 })
502 .append("text")
503 .attr("text-anchor", "top")
504 .attr("font-size", 12)
505 .text(function(d) { return d; });
Marc Kupietz34c08172017-11-29 17:08:47 +0100506
Marc Kupietz4abcd682017-11-28 20:51:08 +0100507 var zoomListener = d3.behavior.zoom()
508 .scaleExtent([0.1, 10])
509 .center([0,0])
510 .on("zoom", zoomHandler);
511 zoomListener(svg);
Marc Kupietz83305222016-04-28 09:57:22 +0200512 }
513
514 var tx=0, ty=0;
515 var ss=1;
516 var iter_id=-1;
517
518 function zoomHandler() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100519 tx = d3.event.translate[0];
520 ty = d3.event.translate[1];
521 ss = d3.event.scale;
522 updateEmbedding();
Marc Kupietz83305222016-04-28 09:57:22 +0200523 }
524
525 var stepnum = 0;
526
527 function stopStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100528 clearInterval(iter_id);
529 text = svg.selectAll("text");
Marc Kupietz34c08172017-11-29 17:08:47 +0100530
Marc Kupietz4abcd682017-11-28 20:51:08 +0100531 // jitter function needs different data and co-ordinate representation
532 labels = d3.range(data.words.length).map(function(i) {
533 var x = (T.Y[i][0]*20*ss + tx) + mapWidth/2;
534 var y = (T.Y[i][1]*20*ss + ty) + mapHeight/2;
535 anchor_array.push({x: x, y: y, r: jitterRadius});
536 return {
537 x: x,
538 y: y,
539 name: data.words[i]
540 };
541 });
Marc Kupietz34c08172017-11-29 17:08:47 +0100542
Marc Kupietz4abcd682017-11-28 20:51:08 +0100543 // get the actual label bounding boxes for the jitter function
544 var index = 0;
545 text.each(function() {
546 labels[index].width = this.getBBox().width;
547 labels[index].height = this.getBBox().height;
548 index += 1;
549 });
Marc Kupietz83305222016-04-28 09:57:22 +0200550
Marc Kupietz34c08172017-11-29 17:08:47 +0100551
Marc Kupietz4abcd682017-11-28 20:51:08 +0100552 // setTimeout(updateEmbedding, 1);
553 // setTimeout(
554 labeler = d3.labeler()
555 .label(labels)
556 .anchor(anchor_array)
557 .width(mapWidth)
558 .height(mapHeight)
559 .update(applyJitter);
560 // .start(1000);
Marc Kupietz83305222016-04-28 09:57:22 +0200561
Marc Kupietz4abcd682017-11-28 20:51:08 +0100562 iter_id = setInterval(jitterStep, 1);
Marc Kupietz83305222016-04-28 09:57:22 +0200563 }
564
565 var jitter_i=0;
566
567 function jitterStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100568 if(jitter_i++ > 100) {
569 clearInterval(iter_id);
570 } else {
571 labeler.start2(10);
572 applyJitter();
573 }
Marc Kupietz83305222016-04-28 09:57:22 +0200574 }
575
576 var last_cost=1000;
577
578 function step() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100579 var i = T.iter;
Marc Kupietz34c08172017-11-29 17:08:47 +0100580
Marc Kupietz4abcd682017-11-28 20:51:08 +0100581 if(i > <%= $no_iterations %>) {
582 stopStep();
583 } else {
584 var cost = Math.round(T.step() * 100000) / 100000; // do a few steps
585 $("#cost").html("tsne iteration " + i + ", cost: " + cost.toFixed(5));
586 if(i % 250 == 0 && cost >= last_cost) {
587 stopStep();
588 } else {
589 last_cost = cost;
590 updateEmbedding();
591 }
592 }
Marc Kupietz83305222016-04-28 09:57:22 +0200593 }
594
595 function showMap(j) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100596 data=j;
597 T.iter=0;
598 T.initDataRaw(data.vecs); // init embedding
599 drawEmbedding(); // draw initial embedding
Marc Kupietz78114532017-11-29 17:00:16 +0100600
Marc Kupietz4abcd682017-11-28 20:51:08 +0100601 if(iter_id >= 0) {
602 clearInterval(iter_id);
603 }
604 //T.debugGrad();
605 iter_id = setInterval(step, 1);
Marc Kupietz78114532017-11-29 17:00:16 +0100606 if(true) { // (<%= $show_som %>) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100607 makeSOM(j, <%= $no_iterations %>);
608 }
Marc Kupietz83305222016-04-28 09:57:22 +0200609 }
Marc Kupietz39179ab2017-07-04 16:28:06 +0200610 var queryword;
611
612 function onload() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100613 queryword = document.getElementById('word');
Marc Kupietz39179ab2017-07-04 16:28:06 +0200614 }
615
616 function queryKorAP() {
617 window.open('http://korap.ids-mannheim.de/kalamar/?q='+queryword.value, 'KorAP');
618 }
Marc Kupietz4dc270c2017-11-24 10:17:12 +0100619
620 function queryKorAPCII(query) {
621 window.open('http://korap.ids-mannheim.de/kalamar/?ql=cosmas2&q='+query, 'KorAP');
622 }
Marc Kupietz83305222016-04-28 09:57:22 +0200623 </script>
624 </head>
Marc Kupietz39179ab2017-07-04 16:28:06 +0200625 <body onload="onload()">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100626 <div id="header">
627 <div id="pagetitle">
628 <h1>DeReKo-Vectors</h1>
629 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100630 <div id="options" class="widget">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100631 <form id="queryform">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100632 <input id="word" type="text" name="word" placeholder="Word(s) to be searched" value="<%= $word %>"
633 title="When looking for multiple words use spaces as separators to search around the average vector and | as separator to get the neighbours for each word."/>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100634 <input id="SEARCH" type="button" value="SEARCH">
635 <input type="button" id="showoptions" name="showoptions" value="Options" />
636 </form>
637 <div id="dropdownoptions" style="display: hidden">
638 <form id="optionsform">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100639 <div class="controlgroup-vertical">
640 <label for="cutoff">cut-off</label>
641 <input id="cutoff" type="text" name="cutoff" size="10" value="<%= $cutoff %>" title="Only consider the most frequent x word forms.">
642 <label for="dedupe">dedupe</label>
643 <input id="dedupe" type="checkbox" name="dedupe" value="1" <%= ($dedupe ? "checked" : "") %> title="radically filter out any near-duplicates">
644 % if($mergedEnd > 0) {
Marc Kupietz78114532017-11-29 17:00:16 +0100645 <label for="sbf">backw.</label>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100646 <input id="sbf" type="checkbox" name="sbf" value="1" <%= ($searchBaseVocabFirst ? "checked" : "") %> title="If checkecked base vocabulary will be searched first. Otherwise merged vocabulray will be searched first.">
647 % }
648 <label for="neighbours">max. neighbours:</label>
649 <input id="neighbours" size="4" name="n" value="<%= $no_nbs %>">
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100650 <label for="no_iterations">max. iterations</label>
651 <input id="no_iterations" name="N" size="4" value="<%= $no_iterations %>">
Marc Kupietz78114532017-11-29 17:00:16 +0100652 <!-- <label for="dosom">SOM</label>
653 <input id="dosom" type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100654 % if($collocators) {
655 <label for="sortby">window/sort</label>
656 <select id="sortby" name="sort">
657 <option value="0" <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
Marc Kupietza77acce2017-11-30 16:59:07 +0100658 <!-- <option value="1" <%= ($sort==1? "selected":"") %>>any single position</option>
659 <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100660 </select>
661 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100662 <input type="button" value="→ KorAP" onclick="queryKorAP();" title="query word with KorAP"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100663 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100664 </form>
665 </div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100666 </div>
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100667 </div>
668 <div id="topwrapper">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100669 <div id="tabs">
670 <ul>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100671 <li><a href="#tabs-1">Semantics (TSNE-map)</a></li>
672 <li><a href="#tabs-2">Semantics (SOM)</a></li>
673 <li><a href="#tabs-3">Syntagmatic (collocators)</a></li>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100674 </ul>
675 <div id="tabs-1">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100676 % if($lists && (@$lists) > 0 && (@$lists)[0]) {
677 <div id="wrapper">
678 <div id="first" style="width:220px">
679 <table class="display compact nowrap" id="firsttable">
680 <thead>
681 <tr>
682 <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th>
683 </tr>
684 </thead>
685 <tbody>
686 % my $j=0; my @words; my @vecs; my @ranks; my @marked;
687 % for my $list (@$lists) {
688 % my $i=0; while($list) {
689 % my $item = (@$list)[$i];
690 % my $c = ($collocators? (@$collocators)[$i] : 0);
691 % last if(!$c && !$item);
692 <tr>
693 <td align="right">
694 <%= ++$i %>.
695 </td>
696 % if($item) {
697 % if(!grep{$_ eq $item->{word}} @words) {
698 % push @vecs, $item->{vector};
699 % push @words, $item->{word};
700 % push @ranks, $item->{rank};
701 % push @marked, ($marked->{$item->{word}}? 1 : 0);
Marc Kupietz4abcd682017-11-28 20:51:08 +0100702 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100703 <td align="right">
704 <%= sprintf("%.3f", $item->{dist}) %>
705 </td>
706 <td>
Marc Kupietz34c08172017-11-29 17:08:47 +0100707 % my $class = ($marked->{$item->{word}}? "marked " : "");
708 % my $r = $item->{rank};
Marc Kupietzdab9f222017-11-29 14:22:59 +0100709 % if($r < $mergedEnd) {
710 % $class .= "merged";
711 % $r .= " (merged vocab)";
712 % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
713 % $r -= $mergedEnd;
714 % }
715 <a class="<%= $class =%>"
716 title="freq. rank: <%= $r =%>"
717 href="<%= url_with->query([word => $item->{word}]) =%>">
718 <%= $item->{word} =%>
719 </a>
720 </td>
721 % } else {
722 <td colspan="2"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100723 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100724 </tr>
725 % last if($i >= 100);
726 % }
727 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100728 </tbody>
729 </table>
730 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100731 <script>
732 % use Mojo::ByteStream 'b';
733 % my $urlprefix = url_with->query([word=>'']);
734 $(window).load(function() {
735 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked, urlprefix => $urlprefix})); %>);
736 });
737 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100738 % } else {
739 <div id="wrapper">
Marc Kupietz2f6b74a2017-12-01 13:20:21 +0100740 <div id="not-found-dialog" title="Not found">
741 <p>ERROR: "<%= $word %>" not found in vocabluary.</p>
742 <p>If you are sure you have spelled the word as intended, you can try to increase the cutoff parameter in the options menu.</p>
743 </div>
744 <script>
745 $( function() {
746 $( "#not-found-dialog" ).dialog({
747 autoOpen: true,
748 modal: true,
749 draggable: false,
750 height: "auto",
751 width: "auto",
752 resizable: false,
753 buttons: {
754 "OK": function() {
755 $( this ).dialog( "close" );
756 },
757 "Apply": function() {
758 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
759 }
760 }
761 });
762 });
763 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100764 </div>
765 % }
766 <div id="second">
767 <div id="embed">
768 </div>
769 <div id="cost">
770 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100771 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100772 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100773 </div>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100774 <div id="tabs-2">
775 <div id="som2" style="width: 800; height: 800px">
776 </div>
777 <div id="sominfo1"><span id="somcolor1"> </span> <span id="somword1"> </span> <span id="somcolor2"> </span> <span id="somword2"> </span> <span id="somcolor3"> </span></div>
778 <div id="sominfo">SOM iteration <span id="iterations">0</span></div>
779 </div>
780 <div id="tabs-3">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100781 <div id="second" style="width:500px">
782 <table class="display compact nowrap" id="secondtable">
783 <thead>
784 <tr>
785 % if($collocators) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100786 <th>#</th>
Marc Kupietz22796142017-12-01 13:19:15 +0100787 <th align="center" title="The columns (c) around the target are considered for summation are marked with *.">w'</th>
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100788 <th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
Marc Kupietza77acce2017-11-30 16:59:07 +0100789 <th title="Co-normalized raw activation sum of the collocator in the selected columns." align="right">⊥Σa</th>
790 <th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σc</th>
791 <th title="Sum of the column normalized activations over the selected colunns." align="right">Σ(a/c)</th>
792 <th title="Sum of the activations over the whole window normalized by the total window sum (no auto-focus)." align="right">Σa/Σw</th>
793 <th align="left">collocator</th>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100794 % }
795 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100796 </thead>
797 <tbody>
Marc Kupietz22796142017-12-01 13:19:15 +0100798 <tr>
799 <td align="right">
800 </td>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100801 <td align="right">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100802 </td>
Marc Kupietz22796142017-12-01 13:19:15 +0100803 <td align="right">
804 </td>
805 <td align="right">
806 </td>
807 <td align="right">
808 </td>
809 <td align="right">
810 </td>
811 <td align="right">
812 </td>
813 <td align="left">
814 </td>
815 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100816 </tbody>
817 </table>
818 </div> <!-- - tab2 -->
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100819 </div> <!-- tabs -->
820 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100821 </div> <!-- topwrapper -->
822 <div style="clear: both;"></div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100823 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100824 % if($training_args) {
825 <p>
826 Word vector model trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters: <pre><%= $training_args %></pre>
827 </p>
Marc Kupietz83305222016-04-28 09:57:22 +0200828 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100829 </body>
Marc Kupietz83305222016-04-28 09:57:22 +0200830</html>