blob: cdca76b46d631b1f4734bdafe557022c312d53eb [file] [log] [blame]
Marc Kupietz83305222016-04-28 09:57:22 +02001<!DOCTYPE html>
2<html>
3 <head>
4 <title>DeReKo-Word-Vector-Distances: <%= $word %></title>
Marc Kupietz80bd7b92017-07-04 16:25:54 +02005 <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
Marc Kupietz6dbadd12017-11-29 16:43:33 +01006 <link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Condensed" rel="stylesheet">
Marc Kupietz80bd7b92017-07-04 16:25:54 +02007 <script src="http://code.jquery.com/jquery-latest.min.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +01008 <script src = "https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
9 <script src = "https://cdn.datatables.net/fixedcolumns/3.2.3/js/dataTables.fixedColumns.min.js"></script>
Marc Kupietz22796142017-12-01 13:19:15 +010010 <script src = "https://cdn.datatables.net/plug-ins/1.10.16/sorting/scientific.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010011 <link rel="stylesheet" href="https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css">
Marc Kupietz80bd7b92017-07-04 16:25:54 +020012 <script
Marc Kupietz6dbadd12017-11-29 16:43:33 +010013 src="http://code.jquery.com/ui/1.12.1/jquery-ui.min.js"
Marc Kupietz34c08172017-11-29 17:08:47 +010014 integrity="sha256-VazP97ZCwtekAsvgPBSUwPFKdrwD3unUfSGVYrahUqU="
15 crossorigin="anonymous"></script>
Marc Kupietz80bd7b92017-07-04 16:25:54 +020016 <script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010017 $('#firstable').hide();
18 $(document).ready(function() {
19 $("#xxxtabs").tabs( {
20 "show": function(event, ui) {
21 var oTable = $('div.dataTables_scrollBody>table.display', ui.panel).dataTable();
22 if ( oTable.length > 0 ) {
23 oTable.fnAdjustColumnSizing();
24 }
25 }
26 } );
Marc Kupietz694610d2017-11-25 18:30:03 +010027
Marc Kupietzdab9f222017-11-29 14:22:59 +010028 $("input").bind("keydown", function(event) {
29 // track enter key
30 var keycode = (event.keyCode ? event.keyCode : (event.which ? event.which : event.charCode));
31 if (keycode == 13) { // keycode for enter key
32 // force the 'Enter Key' to implicitly click the Update button
33 document.getElementById('SEARCH').click();
34 return false;
35 } else {
36 return true;
37 }});
38
Marc Kupietz4abcd682017-11-28 20:51:08 +010039 $(".selector").tabs({ active: 1 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010040
Marc Kupietz4abcd682017-11-28 20:51:08 +010041 $('#firsttable').DataTable({
42 "sScrollY": "780px",
43 "bScrollCollapse": true,
44 "bPaginate": false,
45 "bJQueryUI": true,
46 "dom": '<"top">rt<"bottom"flp><"clear">',
47 "aoColumnDefs": [
48 { "sWidth": "10%", "aTargets": [ -1 ] }
49 ]
50 } );
Marc Kupietz0af83e32017-11-27 09:31:37 +010051
Marc Kupietz22796142017-12-01 13:19:15 +010052 function bitvec2window(n) {
53 var str = n.toString(2).padStart(10, "0")
54 .replace(/^([0-9]{5})/, '$1x')
55 .replace(/0/g, '·')
56 .replace(/1/g, '+');
57 return str;
58 }
59
60 var collocatorData = <%= b(Mojo::JSON::to_json($collocators)) %>;
61
62 if (collocatorData != null) {
63 var t = $('#secondtable').DataTable({
64 data: collocatorData,
65 "sScrollY": "800px",
66 "bScrollCollapse": true,
67 "bPaginate": false,
68 "bJQueryUI": true,
69 "dom": '<"top">rt<"bottom"flp><"clear">',
70 "columns": [
71 { "data": "rank", type: "allnumeric" },
72 { "data": "pos", width: "7%", sClass: "dt-center mono compact", render: function ( data, type, row ) {return bitvec2window(data) }},
73 { "data": "max", render: function ( data, type, row ) {return data.toFixed(3) }},
74 { "data": "conorm", render: function ( data, type, row ) {return data.toFixed(3) }},
75 { "data": "prob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
76 { "data": "cprob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
77 { "data": "overall", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
78 { "data": "word", sClass: "collocator" }
79 ],
80 "columnDefs": [
81 { className: "dt-right", "targets": [0,2,3,4,5,6] },
82 { className: "dt-center", "targets": [ 1] },
83 { "searchable": false,
84 "orderable": false,
85 "targets": 0
86 },
87 { "type": "scientific", targets: [2,3,4,5,6] },
88 { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4, 5, 6 ] },
89 { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 7 ] },
90 ],
91 "order": [[ 4, 'desc' ]],
Marc Kupietzd64f3f22017-11-30 12:07:42 +010092 } );
Marc Kupietz22796142017-12-01 13:19:15 +010093 t.on( 'order.dt search.dt', function () {
94 t.column(0, {order:'applied'}).nodes().each( function (cell, i) {
95 cell.innerHTML = i+1;
96 } );
97 } ).draw();
98 }
Marc Kupietz4abcd682017-11-28 20:51:08 +010099 });
Marc Kupietzdab9f222017-11-29 14:22:59 +0100100 $(function(){
101 $("#dropdownoptions").dialog({
102 title: "Options",
103 autoOpen: false,
104 modal: false,
105 draggable: false,
106 height: "auto",
107 width: "auto",
108 resizable: false,
109 buttons: {
110 "Cancel": function() {
111 $( this ).dialog( "close" );
112 },
113 "Apply": function() {
114 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
115 }
116 }
117 });
118 });
119
120 $(function(){
121 $("#SEARCH").click(function() {
122 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
123 });
124 });
125
126 $(function(){
Marc Kupietz22796142017-12-01 13:19:15 +0100127 $("td.collocator").click(function(){
128 queryKorAPCII(this.textContent + " /w5 <%= $word %>");
129 });
130 });
131
132 $(function(){
Marc Kupietzdab9f222017-11-29 14:22:59 +0100133 $("#showoptions").click(function(){
134 $("#dropdownoptions").dialog("open");
135 var target = $(this);
136 $("#dropdownoptions").dialog("widget").position({
137 my: 'left bottom',
138 at: 'left bottom',
139 of: target
140 });
141 });
142 });
143
Marc Kupietz4abcd682017-11-28 20:51:08 +0100144 $( function() {
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100145 $( "#no_iterations" ).spinner({
Marc Kupietz4abcd682017-11-28 20:51:08 +0100146 spin: function( event, ui ) {
147 if ( ui.value < 1000 ) {
148 $( this ).spinner( "value", 1000 );
149 return false;
150 } else if ( ui.value > 10000 ) {
151 $( this ).spinner( "value", 10000 );
152 return false;
153 }
154 }
155 });
156 } );
Marc Kupietz3305b0a2017-11-27 10:46:20 +0100157
Marc Kupietz4abcd682017-11-28 20:51:08 +0100158 $( function() {
159 $( "#neighbours" ).spinner({
160 spin: function( event, ui ) {
161 if ( ui.value < 0 ) {
162 $( this ).spinner( "value", 0 );
163 return false;
164 } else if ( ui.value > 200 ) {
165 $( this ).spinner( "value", 200 );
166 return false;
167 }
168 }
169 });
170 } );
171
172 $( function() {
173 $( "#cutoff" ).spinner({
174 spin: function( event, ui ) {
175 if ( ui.value < 100000 ) {
176 $( this ).spinner( "value", 100000 );
177 return false;
178 } else if ( ui.value > 2000000 ) {
179 $( this ).spinner( "value", 2000000 );
180 return false;
181 }
182 }
183 });
184 } );
185
186 $( function() {
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100187 $( "#tabs" ).tabs().addClass('tabs-min');
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100188 } );
Marc Kupietz4abcd682017-11-28 20:51:08 +0100189
190 $( function() {
191 $( ".controlgroup-vertical" ).controlgroup({
192 "direction": "vertical"
193 });
194 } );
195
196 $(function() {
197 $( document ).tooltip({
198 content: function() {
199 return $(this).attr('title');
200 }}
201 )
Marc Kupietz83305222016-04-28 09:57:22 +0200202 })
Marc Kupietz694610d2017-11-25 18:30:03 +0100203
Marc Kupietz83305222016-04-28 09:57:22 +0200204 </script>
205 <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
Marc Kupietz554aff52017-11-09 14:42:09 +0100206 <script src="/derekovecs/js/tsne.js"></script>
207 <script src="/derekovecs/js/som.js"></script>
208 <script src="/derekovecs/js/labeler.js"></script>
Marc Kupietz83305222016-04-28 09:57:22 +0200209 <style>
210 body, input {
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100211 font-family: Lato, sans-serif;
Marc Kupietz83305222016-04-28 09:57:22 +0200212 font-size: 11pt;
213 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100214
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100215 h1, h2, h3 {
216 margin: 5px 10px 0 0;
217 color: rgb(246,168,0);
218 font-family: "Univers LT Std 47 Cn Lt", "Univers LT Std 67 Cn Lt", "Roboto Condensed", "Univers LT Std 67 Cn Bold", "UniversLTStd-BoldCn", "Times", 'League Gothic', Impact, sans-serif;
219 font-weight: bold;
220 line-height: 1.35;
221 letter-spacing: normal;
222 text-transform: uppercase;
223 text-shadow: none;
Marc Kupietz34c08172017-11-29 17:08:47 +0100224 word-wrap: break-word;
225 }
226
227
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100228 showoptions, #SEARCH {
229 margin-left: 10px;
230 margin-right: 10px;
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100231 }
232
Marc Kupietzdab9f222017-11-29 14:22:59 +0100233 .tabs-left-vertical .ui-tabs-nav {
234 position: absolute;
235 width: 21em;
236 transform: translate(-100%,0%) rotate(-90deg);
237 transform-origin: 100% 0%;
238 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100239
Marc Kupietzdab9f222017-11-29 14:22:59 +0100240 .tabs-left-vertical .ui-tabs-nav li {
241 float: right;
242 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100243
Marc Kupietzdab9f222017-11-29 14:22:59 +0100244 .tabs-left-vertical .ui-tabs-panel {
245 padding-left: 3.5em;
246 }
247
248 .tabs-left-vertical .ui-tabs-panel {
249 height: 20em;
Marc Kupietz34c08172017-11-29 17:08:47 +0100250 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100251
Marc Kupietz34c08172017-11-29 17:08:47 +0100252 .mono {
253 font-family: "DejaVu Sans Mono", Inconsolata, SourceCodePro, Courier;
254 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100255
Marc Kupietz34c08172017-11-29 17:08:47 +0100256 .ui-tooltip-content {
257 font-size: 9pt;
258 color: #222222;
259 }
Marc Kupietzf4b49392016-04-28 10:49:56 +0200260
Marc Kupietz34c08172017-11-29 17:08:47 +0100261 svg > .ui-tooltip-content {
262 font-size: 8pt;
263 color: #222222;
264 }
265
266 a.merged {
267 color: green;
268 fill: green;
269 }
270
271 #first a {
272 text-decoration: none;
273 }
274
275 a.marked, #first a.marked {
276 text-decoration: underline;
277 }
278
279 a.target {
280 color: red;
281 fill: red;
282 }
Marc Kupietz694610d2017-11-25 18:30:03 +0100283
Marc Kupietz4abcd682017-11-28 20:51:08 +0100284 table.display {
285 width: 40% important!;
286 margin: 1; /* <- works for me this way ****/
287 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100288
Marc Kupietz4abcd682017-11-28 20:51:08 +0100289 table.dataTable thead th, table.dataTable thead td, table.dataTable tbody td {
290 padding: 2px 2px;
291 // border-bottom: 1px solid #111;
292 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100293
Marc Kupietz34c08172017-11-29 17:08:47 +0100294 #collocators {
295 margin-bottom: 15px;
296 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100297
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100298 #header {
299 width: 100%;
300 // border: 1px solid red;
301 overflow: hidden; /* will contain if #first is longer than #second */
302 }
303
Marc Kupietz34c08172017-11-29 17:08:47 +0100304 #topwrapper {
305 width: 100%;
306 // border: 1px solid red;
307 overflow: hidden; /* will contain if #first is longer than #second */
308 }
309
310 #wrapper {
311 // border: 1px solid red;
312 overflow: hidden; /* will contain if #first is longer than #second */
313 }
314
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100315 #pagetitle {
316 max-width: 460px;
317 margin-right: 20px;
318 float: left;
319 overflow: hidden; /* if you don't want #second to wrap below #first */
320 // border: 1px solid green;
321 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100322
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100323 #options {
324 float: left;
325 width: 800px;
326 margin: 10px;
327 overflow: hidden; /* if you don't want #second to wrap below #first */
328 }
329
330 #word {
331 width: 50%;
332 }
333
Marc Kupietz34c08172017-11-29 17:08:47 +0100334 #first {
335 margin-right: 20px;
336 float: left;
337 overflow: hidden; /* if you don't want #second to wrap below #first */
338 // border: 1px solid green;
339 }
340 #tabs {
341 margin-right: 20px;
342 overflow: hidden; /* if you don't want #second to wrap below #first */
343 }
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100344
345 .tabs-min {
346 background: transparent;
347 border: none;
348 }
349
350 .tabs-min .ui-widget-header {
351 background: transparent;
352 border: none;
353 border-bottom: 1px solid #c0c0c0;
354 -moz-border-radius: 0px;
355 -webkit-border-radius: 0px;
356 border-radius: 0px;
357 }
358
359 .tabs-min .ui-tabs-nav .ui-state-default {
360 background: transparent;
361 border: none;
362 }
363
364 .tabs-min .ui-tabs-nav .ui-state-active {
365 background: transparent url(img/uiTabsArrow.png) no-repeat bottom center;
366 border: none;
367 }
368
369 .tabs-min .ui-tabs-nav .ui-state-default a {
370 color: #c0c0c0;
371 }
372
373 .tabs-min .ui-tabs-nav .ui-state-active a {
374 color: rgb(246,168,0);
375 }
376
Marc Kupietz4abcd682017-11-28 20:51:08 +0100377 #embed {
378 max-width: 802px;
379 border: 1px solid #333;
380 }
381
Marc Kupietz34c08172017-11-29 17:08:47 +0100382 #second {
383 min-width: 800px;
Marc Kupietzdab9f222017-11-29 14:22:59 +0100384 // border: 1px solid #333;
Marc Kupietz34c08172017-11-29 17:08:47 +0100385 overflow: hidden; /* if you don't want #second to wrap below #first */
386 }
387 #som2 svg {
388 border: 1px solid #333;
389 }
Marc Kupietz83305222016-04-28 09:57:22 +0200390
Marc Kupietz34c08172017-11-29 17:08:47 +0100391 #cost {
392 font-size: 8pt;
393 color: #222222;
394 margin-top: 4px;
395 margin-bottom: 12px;
396 }
Marc Kupietz83305222016-04-28 09:57:22 +0200397
Marc Kupietz34c08172017-11-29 17:08:47 +0100398 #sominfo1, #sominfo {
399 font-size: 8pt;
400 color: #222222;
401 margin-top: 0px;
402 }
Marc Kupietz83305222016-04-28 09:57:22 +0200403
Marc Kupietz34c08172017-11-29 17:08:47 +0100404 #somcolor1, #somcolor2, #somcolor3 {
405 display: inline-block;
406 height: 10px;
407 width: 10px;
408 }
Marc Kupietz83305222016-04-28 09:57:22 +0200409
Marc Kupietz34c08172017-11-29 17:08:47 +0100410 #third {
411 border: 1px solid #333;
412 }
Marc Kupietz83305222016-04-28 09:57:22 +0200413
414 </style>
415 <script>
416
417 var opt = {epsilon: <%= $epsilon %>, perplexity: <%= $perplexity %>},
418 mapWidth = 800, // width map
419 mapHeight = 800,
420 jitterRadius = 7;
421
422 var T = new tsnejs.tSNE(opt); // create a tSNE instance
423
424 var Y;
425
426 var data;
427 var labeler;
428
429 function applyJitter() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100430 svg.selectAll('.tsnet')
431 .data(labels)
432 .transition()
433 .duration(50)
434 .attr("transform", function(d, i) {
435 T.Y[i][0] = (d.x - mapWidth/2 - tx)/ss/20;
436 T.Y[i][1] = (d.y - mapHeight/2 - ty)/ss/20;
437 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100438 (d.x) + "," +
439 (d.y) + ")";
Marc Kupietz4abcd682017-11-28 20:51:08 +0100440 });
Marc Kupietz83305222016-04-28 09:57:22 +0200441 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100442
Marc Kupietz83305222016-04-28 09:57:22 +0200443 function updateEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100444 var Y = T.getSolution();
445 svg.selectAll('.tsnet')
446 .data(data.words)
Marc Kupietz34c08172017-11-29 17:08:47 +0100447 .attr("transform", function(d, i) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100448 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100449 ((Y[i][0]*20*ss + tx) + mapWidth/2) + "," +
450 ((Y[i][1]*20*ss + ty) + mapHeight/2) + ")"; });
Marc Kupietz83305222016-04-28 09:57:22 +0200451 }
452
453 var svg;
454 var labels = [];
455 var anchor_array = [];
456 var text;
457
458 function drawEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100459 $("#embed").empty();
460 var div = d3.select("#embed");
Marc Kupietz34c08172017-11-29 17:08:47 +0100461
Marc Kupietz4abcd682017-11-28 20:51:08 +0100462 // get min and max in each column of Y
463 var Y = T.Y;
Marc Kupietz34c08172017-11-29 17:08:47 +0100464
Marc Kupietz4abcd682017-11-28 20:51:08 +0100465 svg = div.append("svg") // svg is global
466 .attr("width", mapWidth)
467 .attr("height", mapHeight);
Marc Kupietz34c08172017-11-29 17:08:47 +0100468
Marc Kupietz4abcd682017-11-28 20:51:08 +0100469 var g = svg.selectAll(".b")
470 .data(data.words)
471 .enter().append("g")
472 .attr("class", "tsnet");
Marc Kupietz34c08172017-11-29 17:08:47 +0100473
Marc Kupietz4abcd682017-11-28 20:51:08 +0100474 g.append("a")
475 .attr("xlink:href", function(word) {
476 return (data.urlprefix+word);})
477 .attr("class", function(d, i) {
478 var res="";
479 if(data.marked[i]) {
480 res="marked ";
481 }
482 if(data.target.indexOf(" "+d+" ") >= 0) {
483 return res+"target";
484 } else if(data.ranks[i] < data.mergedEnd) {
485 return res+"merged";
486 } else {
487 return res;
488 }
489 })
490 .attr("title", function(d, i) {
491 if(data.mergedEnd > 0) {
492 if(data.ranks[i] >= data.mergedEnd) {
493 return "rank: "+i +" "+"freq. rank: "+(data.ranks[i]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
494 } else {
495 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") + " (merged vocab)";
Marc Kupietz34c08172017-11-29 17:08:47 +0100496 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100497 } else {
498 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
499 }
500 })
501 .append("text")
502 .attr("text-anchor", "top")
503 .attr("font-size", 12)
504 .text(function(d) { return d; });
Marc Kupietz34c08172017-11-29 17:08:47 +0100505
Marc Kupietz4abcd682017-11-28 20:51:08 +0100506 var zoomListener = d3.behavior.zoom()
507 .scaleExtent([0.1, 10])
508 .center([0,0])
509 .on("zoom", zoomHandler);
510 zoomListener(svg);
Marc Kupietz83305222016-04-28 09:57:22 +0200511 }
512
513 var tx=0, ty=0;
514 var ss=1;
515 var iter_id=-1;
516
517 function zoomHandler() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100518 tx = d3.event.translate[0];
519 ty = d3.event.translate[1];
520 ss = d3.event.scale;
521 updateEmbedding();
Marc Kupietz83305222016-04-28 09:57:22 +0200522 }
523
524 var stepnum = 0;
525
526 function stopStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100527 clearInterval(iter_id);
528 text = svg.selectAll("text");
Marc Kupietz34c08172017-11-29 17:08:47 +0100529
Marc Kupietz4abcd682017-11-28 20:51:08 +0100530 // jitter function needs different data and co-ordinate representation
531 labels = d3.range(data.words.length).map(function(i) {
532 var x = (T.Y[i][0]*20*ss + tx) + mapWidth/2;
533 var y = (T.Y[i][1]*20*ss + ty) + mapHeight/2;
534 anchor_array.push({x: x, y: y, r: jitterRadius});
535 return {
536 x: x,
537 y: y,
538 name: data.words[i]
539 };
540 });
Marc Kupietz34c08172017-11-29 17:08:47 +0100541
Marc Kupietz4abcd682017-11-28 20:51:08 +0100542 // get the actual label bounding boxes for the jitter function
543 var index = 0;
544 text.each(function() {
545 labels[index].width = this.getBBox().width;
546 labels[index].height = this.getBBox().height;
547 index += 1;
548 });
Marc Kupietz83305222016-04-28 09:57:22 +0200549
Marc Kupietz34c08172017-11-29 17:08:47 +0100550
Marc Kupietz4abcd682017-11-28 20:51:08 +0100551 // setTimeout(updateEmbedding, 1);
552 // setTimeout(
553 labeler = d3.labeler()
554 .label(labels)
555 .anchor(anchor_array)
556 .width(mapWidth)
557 .height(mapHeight)
558 .update(applyJitter);
559 // .start(1000);
Marc Kupietz83305222016-04-28 09:57:22 +0200560
Marc Kupietz4abcd682017-11-28 20:51:08 +0100561 iter_id = setInterval(jitterStep, 1);
Marc Kupietz83305222016-04-28 09:57:22 +0200562 }
563
564 var jitter_i=0;
565
566 function jitterStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100567 if(jitter_i++ > 100) {
568 clearInterval(iter_id);
569 } else {
570 labeler.start2(10);
571 applyJitter();
572 }
Marc Kupietz83305222016-04-28 09:57:22 +0200573 }
574
575 var last_cost=1000;
576
577 function step() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100578 var i = T.iter;
Marc Kupietz34c08172017-11-29 17:08:47 +0100579
Marc Kupietz4abcd682017-11-28 20:51:08 +0100580 if(i > <%= $no_iterations %>) {
581 stopStep();
582 } else {
583 var cost = Math.round(T.step() * 100000) / 100000; // do a few steps
584 $("#cost").html("tsne iteration " + i + ", cost: " + cost.toFixed(5));
585 if(i % 250 == 0 && cost >= last_cost) {
586 stopStep();
587 } else {
588 last_cost = cost;
589 updateEmbedding();
590 }
591 }
Marc Kupietz83305222016-04-28 09:57:22 +0200592 }
593
594 function showMap(j) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100595 data=j;
596 T.iter=0;
597 T.initDataRaw(data.vecs); // init embedding
598 drawEmbedding(); // draw initial embedding
Marc Kupietz78114532017-11-29 17:00:16 +0100599
Marc Kupietz4abcd682017-11-28 20:51:08 +0100600 if(iter_id >= 0) {
601 clearInterval(iter_id);
602 }
603 //T.debugGrad();
604 iter_id = setInterval(step, 1);
Marc Kupietz78114532017-11-29 17:00:16 +0100605 if(true) { // (<%= $show_som %>) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100606 makeSOM(j, <%= $no_iterations %>);
607 }
Marc Kupietz83305222016-04-28 09:57:22 +0200608 }
Marc Kupietz39179ab2017-07-04 16:28:06 +0200609 var queryword;
610
611 function onload() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100612 queryword = document.getElementById('word');
Marc Kupietz39179ab2017-07-04 16:28:06 +0200613 }
614
615 function queryKorAP() {
616 window.open('http://korap.ids-mannheim.de/kalamar/?q='+queryword.value, 'KorAP');
617 }
Marc Kupietz4dc270c2017-11-24 10:17:12 +0100618
619 function queryKorAPCII(query) {
620 window.open('http://korap.ids-mannheim.de/kalamar/?ql=cosmas2&q='+query, 'KorAP');
621 }
Marc Kupietz83305222016-04-28 09:57:22 +0200622 </script>
623 </head>
Marc Kupietz39179ab2017-07-04 16:28:06 +0200624 <body onload="onload()">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100625 <div id="header">
626 <div id="pagetitle">
627 <h1>DeReKo-Vectors</h1>
628 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100629 <div id="options" class="widget">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100630 <form id="queryform">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100631 <input id="word" type="text" name="word" placeholder="Word(s) to be searched" value="<%= $word %>"
632 title="When looking for multiple words use spaces as separators to search around the average vector and | as separator to get the neighbours for each word."/>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100633 <input id="SEARCH" type="button" value="SEARCH">
634 <input type="button" id="showoptions" name="showoptions" value="Options" />
635 </form>
636 <div id="dropdownoptions" style="display: hidden">
637 <form id="optionsform">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100638 <div class="controlgroup-vertical">
639 <label for="cutoff">cut-off</label>
640 <input id="cutoff" type="text" name="cutoff" size="10" value="<%= $cutoff %>" title="Only consider the most frequent x word forms.">
641 <label for="dedupe">dedupe</label>
642 <input id="dedupe" type="checkbox" name="dedupe" value="1" <%= ($dedupe ? "checked" : "") %> title="radically filter out any near-duplicates">
643 % if($mergedEnd > 0) {
Marc Kupietz78114532017-11-29 17:00:16 +0100644 <label for="sbf">backw.</label>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100645 <input id="sbf" type="checkbox" name="sbf" value="1" <%= ($searchBaseVocabFirst ? "checked" : "") %> title="If checkecked base vocabulary will be searched first. Otherwise merged vocabulray will be searched first.">
646 % }
647 <label for="neighbours">max. neighbours:</label>
648 <input id="neighbours" size="4" name="n" value="<%= $no_nbs %>">
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100649 <label for="no_iterations">max. iterations</label>
650 <input id="no_iterations" name="N" size="4" value="<%= $no_iterations %>">
Marc Kupietz78114532017-11-29 17:00:16 +0100651 <!-- <label for="dosom">SOM</label>
652 <input id="dosom" type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100653 % if($collocators) {
654 <label for="sortby">window/sort</label>
655 <select id="sortby" name="sort">
656 <option value="0" <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
Marc Kupietza77acce2017-11-30 16:59:07 +0100657 <!-- <option value="1" <%= ($sort==1? "selected":"") %>>any single position</option>
658 <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100659 </select>
660 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100661 <input type="button" value="→ KorAP" onclick="queryKorAP();" title="query word with KorAP"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100662 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100663 </form>
664 </div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100665 </div>
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100666 </div>
667 <div id="topwrapper">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100668 <div id="tabs">
669 <ul>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100670 <li><a href="#tabs-1">Semantics (TSNE-map)</a></li>
671 <li><a href="#tabs-2">Semantics (SOM)</a></li>
672 <li><a href="#tabs-3">Syntagmatic (collocators)</a></li>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100673 </ul>
674 <div id="tabs-1">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100675 % if($lists && (@$lists) > 0 && (@$lists)[0]) {
676 <div id="wrapper">
677 <div id="first" style="width:220px">
678 <table class="display compact nowrap" id="firsttable">
679 <thead>
680 <tr>
681 <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th>
682 </tr>
683 </thead>
684 <tbody>
685 % my $j=0; my @words; my @vecs; my @ranks; my @marked;
686 % for my $list (@$lists) {
687 % my $i=0; while($list) {
688 % my $item = (@$list)[$i];
689 % my $c = ($collocators? (@$collocators)[$i] : 0);
690 % last if(!$c && !$item);
691 <tr>
692 <td align="right">
693 <%= ++$i %>.
694 </td>
695 % if($item) {
696 % if(!grep{$_ eq $item->{word}} @words) {
697 % push @vecs, $item->{vector};
698 % push @words, $item->{word};
699 % push @ranks, $item->{rank};
700 % push @marked, ($marked->{$item->{word}}? 1 : 0);
Marc Kupietz4abcd682017-11-28 20:51:08 +0100701 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100702 <td align="right">
703 <%= sprintf("%.3f", $item->{dist}) %>
704 </td>
705 <td>
Marc Kupietz34c08172017-11-29 17:08:47 +0100706 % my $class = ($marked->{$item->{word}}? "marked " : "");
707 % my $r = $item->{rank};
Marc Kupietzdab9f222017-11-29 14:22:59 +0100708 % if($r < $mergedEnd) {
709 % $class .= "merged";
710 % $r .= " (merged vocab)";
711 % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
712 % $r -= $mergedEnd;
713 % }
714 <a class="<%= $class =%>"
715 title="freq. rank: <%= $r =%>"
716 href="<%= url_with->query([word => $item->{word}]) =%>">
717 <%= $item->{word} =%>
718 </a>
719 </td>
720 % } else {
721 <td colspan="2"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100722 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100723 </tr>
724 % last if($i >= 100);
725 % }
726 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100727 </tbody>
728 </table>
729 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100730 <script>
731 % use Mojo::ByteStream 'b';
732 % my $urlprefix = url_with->query([word=>'']);
733 $(window).load(function() {
734 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked, urlprefix => $urlprefix})); %>);
735 });
736 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100737 % } else {
738 <div id="wrapper">
739 <p>
740 ERROR: "<%= $word %>" not found in vocabluary.
741 </p>
742 </div>
743 % }
744 <div id="second">
745 <div id="embed">
746 </div>
747 <div id="cost">
748 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100749 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100750 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100751 </div>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100752 <div id="tabs-2">
753 <div id="som2" style="width: 800; height: 800px">
754 </div>
755 <div id="sominfo1"><span id="somcolor1"> </span> <span id="somword1"> </span> <span id="somcolor2"> </span> <span id="somword2"> </span> <span id="somcolor3"> </span></div>
756 <div id="sominfo">SOM iteration <span id="iterations">0</span></div>
757 </div>
758 <div id="tabs-3">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100759 <div id="second" style="width:500px">
760 <table class="display compact nowrap" id="secondtable">
761 <thead>
762 <tr>
763 % if($collocators) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100764 <th>#</th>
Marc Kupietz22796142017-12-01 13:19:15 +0100765 <th align="center" title="The columns (c) around the target are considered for summation are marked with *.">w'</th>
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100766 <th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
Marc Kupietza77acce2017-11-30 16:59:07 +0100767 <th title="Co-normalized raw activation sum of the collocator in the selected columns." align="right">⊥Σa</th>
768 <th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σc</th>
769 <th title="Sum of the column normalized activations over the selected colunns." align="right">Σ(a/c)</th>
770 <th title="Sum of the activations over the whole window normalized by the total window sum (no auto-focus)." align="right">Σa/Σw</th>
771 <th align="left">collocator</th>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100772 % }
773 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100774 </thead>
775 <tbody>
Marc Kupietz22796142017-12-01 13:19:15 +0100776 <tr>
777 <td align="right">
778 </td>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100779 <td align="right">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100780 </td>
Marc Kupietz22796142017-12-01 13:19:15 +0100781 <td align="right">
782 </td>
783 <td align="right">
784 </td>
785 <td align="right">
786 </td>
787 <td align="right">
788 </td>
789 <td align="right">
790 </td>
791 <td align="left">
792 </td>
793 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100794 </tbody>
795 </table>
796 </div> <!-- - tab2 -->
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100797 </div> <!-- tabs -->
798 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100799 </div> <!-- topwrapper -->
800 <div style="clear: both;"></div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100801 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100802 % if($training_args) {
803 <p>
804 Word vector model trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters: <pre><%= $training_args %></pre>
805 </p>
Marc Kupietz83305222016-04-28 09:57:22 +0200806 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100807 </body>
Marc Kupietz83305222016-04-28 09:57:22 +0200808</html>