blob: 78ec92159018ca9e85a4c0e26c8e514fb1edf06e [file] [log] [blame]
Marc Kupietz83305222016-04-28 09:57:22 +02001<!DOCTYPE html>
2<html>
3 <head>
4 <title>DeReKo-Word-Vector-Distances: <%= $word %></title>
Marc Kupietz80bd7b92017-07-04 16:25:54 +02005 <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
Marc Kupietz6dbadd12017-11-29 16:43:33 +01006 <link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Condensed" rel="stylesheet">
Marc Kupietz80bd7b92017-07-04 16:25:54 +02007 <script src="http://code.jquery.com/jquery-latest.min.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +01008 <script src = "https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
9 <script src = "https://cdn.datatables.net/fixedcolumns/3.2.3/js/dataTables.fixedColumns.min.js"></script>
10 <link rel="stylesheet" href="https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css">
Marc Kupietz80bd7b92017-07-04 16:25:54 +020011 <script
Marc Kupietz6dbadd12017-11-29 16:43:33 +010012 src="http://code.jquery.com/ui/1.12.1/jquery-ui.min.js"
Marc Kupietz34c08172017-11-29 17:08:47 +010013 integrity="sha256-VazP97ZCwtekAsvgPBSUwPFKdrwD3unUfSGVYrahUqU="
14 crossorigin="anonymous"></script>
Marc Kupietz80bd7b92017-07-04 16:25:54 +020015 <script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010016 $('#firstable').hide();
17 $(document).ready(function() {
18 $("#xxxtabs").tabs( {
19 "show": function(event, ui) {
20 var oTable = $('div.dataTables_scrollBody>table.display', ui.panel).dataTable();
21 if ( oTable.length > 0 ) {
22 oTable.fnAdjustColumnSizing();
23 }
24 }
25 } );
Marc Kupietz694610d2017-11-25 18:30:03 +010026
Marc Kupietzdab9f222017-11-29 14:22:59 +010027 $("input").bind("keydown", function(event) {
28 // track enter key
29 var keycode = (event.keyCode ? event.keyCode : (event.which ? event.which : event.charCode));
30 if (keycode == 13) { // keycode for enter key
31 // force the 'Enter Key' to implicitly click the Update button
32 document.getElementById('SEARCH').click();
33 return false;
34 } else {
35 return true;
36 }});
37
Marc Kupietz4abcd682017-11-28 20:51:08 +010038 $(".selector").tabs({ active: 1 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010039
Marc Kupietz4abcd682017-11-28 20:51:08 +010040 $('#firsttable').DataTable({
41 "sScrollY": "780px",
42 "bScrollCollapse": true,
43 "bPaginate": false,
44 "bJQueryUI": true,
45 "dom": '<"top">rt<"bottom"flp><"clear">',
46 "aoColumnDefs": [
47 { "sWidth": "10%", "aTargets": [ -1 ] }
48 ]
49 } );
Marc Kupietz0af83e32017-11-27 09:31:37 +010050
Marc Kupietzd64f3f22017-11-30 12:07:42 +010051 var t = $('#secondtable').DataTable({
Marc Kupietz4abcd682017-11-28 20:51:08 +010052 "sScrollY": "800px",
53 "bScrollCollapse": true,
54 "bPaginate": false,
55 "bJQueryUI": true,
56 "dom": '<"top">rt<"bottom"flp><"clear">',
Marc Kupietzd64f3f22017-11-30 12:07:42 +010057 "columnDefs": [
58 { "searchable": false,
59 "orderable": false,
60 "targets": 0
61 },
62 { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4 ] },
63 { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 5 ] },
64 { "orderSequence": [ "desc" ], "targets": [ 3 ] }
65 ],
66 "order": [[ 3, 'desc' ]],
Marc Kupietz4abcd682017-11-28 20:51:08 +010067 } );
Marc Kupietzd64f3f22017-11-30 12:07:42 +010068 t.on( 'order.dt search.dt', function () {
69 t.column(0, {order:'applied'}).nodes().each( function (cell, i) {
70 cell.innerHTML = i+1;
71 } );
72 } ).draw();
73
Marc Kupietz4abcd682017-11-28 20:51:08 +010074 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010075
Marc Kupietzd64f3f22017-11-30 12:07:42 +010076
77
Marc Kupietzdab9f222017-11-29 14:22:59 +010078 $(function(){
79 $("#dropdownoptions").dialog({
80 title: "Options",
81 autoOpen: false,
82 modal: false,
83 draggable: false,
84 height: "auto",
85 width: "auto",
86 resizable: false,
87 buttons: {
88 "Cancel": function() {
89 $( this ).dialog( "close" );
90 },
91 "Apply": function() {
92 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
93 }
94 }
95 });
96 });
97
98 $(function(){
99 $("#SEARCH").click(function() {
100 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
101 });
102 });
103
104 $(function(){
105 $("#showoptions").click(function(){
106 $("#dropdownoptions").dialog("open");
107 var target = $(this);
108 $("#dropdownoptions").dialog("widget").position({
109 my: 'left bottom',
110 at: 'left bottom',
111 of: target
112 });
113 });
114 });
115
Marc Kupietz4abcd682017-11-28 20:51:08 +0100116 $( function() {
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100117 $( "#no_iterations" ).spinner({
Marc Kupietz4abcd682017-11-28 20:51:08 +0100118 spin: function( event, ui ) {
119 if ( ui.value < 1000 ) {
120 $( this ).spinner( "value", 1000 );
121 return false;
122 } else if ( ui.value > 10000 ) {
123 $( this ).spinner( "value", 10000 );
124 return false;
125 }
126 }
127 });
128 } );
Marc Kupietz3305b0a2017-11-27 10:46:20 +0100129
Marc Kupietz4abcd682017-11-28 20:51:08 +0100130 $( function() {
131 $( "#neighbours" ).spinner({
132 spin: function( event, ui ) {
133 if ( ui.value < 0 ) {
134 $( this ).spinner( "value", 0 );
135 return false;
136 } else if ( ui.value > 200 ) {
137 $( this ).spinner( "value", 200 );
138 return false;
139 }
140 }
141 });
142 } );
143
144 $( function() {
145 $( "#cutoff" ).spinner({
146 spin: function( event, ui ) {
147 if ( ui.value < 100000 ) {
148 $( this ).spinner( "value", 100000 );
149 return false;
150 } else if ( ui.value > 2000000 ) {
151 $( this ).spinner( "value", 2000000 );
152 return false;
153 }
154 }
155 });
156 } );
157
158 $( function() {
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100159 $( "#tabs" ).tabs().addClass('tabs-min');
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100160 } );
Marc Kupietz4abcd682017-11-28 20:51:08 +0100161
162 $( function() {
163 $( ".controlgroup-vertical" ).controlgroup({
164 "direction": "vertical"
165 });
166 } );
167
168 $(function() {
169 $( document ).tooltip({
170 content: function() {
171 return $(this).attr('title');
172 }}
173 )
Marc Kupietz83305222016-04-28 09:57:22 +0200174 })
Marc Kupietz694610d2017-11-25 18:30:03 +0100175
Marc Kupietz83305222016-04-28 09:57:22 +0200176 </script>
177 <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
Marc Kupietz554aff52017-11-09 14:42:09 +0100178 <script src="/derekovecs/js/tsne.js"></script>
179 <script src="/derekovecs/js/som.js"></script>
180 <script src="/derekovecs/js/labeler.js"></script>
Marc Kupietz83305222016-04-28 09:57:22 +0200181 <style>
182 body, input {
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100183 font-family: Lato, sans-serif;
Marc Kupietz83305222016-04-28 09:57:22 +0200184 font-size: 11pt;
185 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100186
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100187 h1, h2, h3 {
188 margin: 5px 10px 0 0;
189 color: rgb(246,168,0);
190 font-family: "Univers LT Std 47 Cn Lt", "Univers LT Std 67 Cn Lt", "Roboto Condensed", "Univers LT Std 67 Cn Bold", "UniversLTStd-BoldCn", "Times", 'League Gothic', Impact, sans-serif;
191 font-weight: bold;
192 line-height: 1.35;
193 letter-spacing: normal;
194 text-transform: uppercase;
195 text-shadow: none;
Marc Kupietz34c08172017-11-29 17:08:47 +0100196 word-wrap: break-word;
197 }
198
199
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100200 showoptions, #SEARCH {
201 margin-left: 10px;
202 margin-right: 10px;
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100203 }
204
Marc Kupietzdab9f222017-11-29 14:22:59 +0100205 .tabs-left-vertical .ui-tabs-nav {
206 position: absolute;
207 width: 21em;
208 transform: translate(-100%,0%) rotate(-90deg);
209 transform-origin: 100% 0%;
210 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100211
Marc Kupietzdab9f222017-11-29 14:22:59 +0100212 .tabs-left-vertical .ui-tabs-nav li {
213 float: right;
214 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100215
Marc Kupietzdab9f222017-11-29 14:22:59 +0100216 .tabs-left-vertical .ui-tabs-panel {
217 padding-left: 3.5em;
218 }
219
220 .tabs-left-vertical .ui-tabs-panel {
221 height: 20em;
Marc Kupietz34c08172017-11-29 17:08:47 +0100222 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100223
Marc Kupietz34c08172017-11-29 17:08:47 +0100224 .mono {
225 font-family: "DejaVu Sans Mono", Inconsolata, SourceCodePro, Courier;
226 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100227
Marc Kupietz34c08172017-11-29 17:08:47 +0100228 .ui-tooltip-content {
229 font-size: 9pt;
230 color: #222222;
231 }
Marc Kupietzf4b49392016-04-28 10:49:56 +0200232
Marc Kupietz34c08172017-11-29 17:08:47 +0100233 svg > .ui-tooltip-content {
234 font-size: 8pt;
235 color: #222222;
236 }
237
238 a.merged {
239 color: green;
240 fill: green;
241 }
242
243 #first a {
244 text-decoration: none;
245 }
246
247 a.marked, #first a.marked {
248 text-decoration: underline;
249 }
250
251 a.target {
252 color: red;
253 fill: red;
254 }
Marc Kupietz694610d2017-11-25 18:30:03 +0100255
Marc Kupietz4abcd682017-11-28 20:51:08 +0100256 table.display {
257 width: 40% important!;
258 margin: 1; /* <- works for me this way ****/
259 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100260
Marc Kupietz4abcd682017-11-28 20:51:08 +0100261 table.dataTable thead th, table.dataTable thead td, table.dataTable tbody td {
262 padding: 2px 2px;
263 // border-bottom: 1px solid #111;
264 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100265
Marc Kupietz34c08172017-11-29 17:08:47 +0100266 #collocators {
267 margin-bottom: 15px;
268 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100269
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100270 #header {
271 width: 100%;
272 // border: 1px solid red;
273 overflow: hidden; /* will contain if #first is longer than #second */
274 }
275
Marc Kupietz34c08172017-11-29 17:08:47 +0100276 #topwrapper {
277 width: 100%;
278 // border: 1px solid red;
279 overflow: hidden; /* will contain if #first is longer than #second */
280 }
281
282 #wrapper {
283 // border: 1px solid red;
284 overflow: hidden; /* will contain if #first is longer than #second */
285 }
286
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100287 #pagetitle {
288 max-width: 460px;
289 margin-right: 20px;
290 float: left;
291 overflow: hidden; /* if you don't want #second to wrap below #first */
292 // border: 1px solid green;
293 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100294
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100295 #options {
296 float: left;
297 width: 800px;
298 margin: 10px;
299 overflow: hidden; /* if you don't want #second to wrap below #first */
300 }
301
302 #word {
303 width: 50%;
304 }
305
Marc Kupietz34c08172017-11-29 17:08:47 +0100306 #first {
307 margin-right: 20px;
308 float: left;
309 overflow: hidden; /* if you don't want #second to wrap below #first */
310 // border: 1px solid green;
311 }
312 #tabs {
313 margin-right: 20px;
314 overflow: hidden; /* if you don't want #second to wrap below #first */
315 }
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100316
317 .tabs-min {
318 background: transparent;
319 border: none;
320 }
321
322 .tabs-min .ui-widget-header {
323 background: transparent;
324 border: none;
325 border-bottom: 1px solid #c0c0c0;
326 -moz-border-radius: 0px;
327 -webkit-border-radius: 0px;
328 border-radius: 0px;
329 }
330
331 .tabs-min .ui-tabs-nav .ui-state-default {
332 background: transparent;
333 border: none;
334 }
335
336 .tabs-min .ui-tabs-nav .ui-state-active {
337 background: transparent url(img/uiTabsArrow.png) no-repeat bottom center;
338 border: none;
339 }
340
341 .tabs-min .ui-tabs-nav .ui-state-default a {
342 color: #c0c0c0;
343 }
344
345 .tabs-min .ui-tabs-nav .ui-state-active a {
346 color: rgb(246,168,0);
347 }
348
Marc Kupietz4abcd682017-11-28 20:51:08 +0100349 #embed {
350 max-width: 802px;
351 border: 1px solid #333;
352 }
353
Marc Kupietz34c08172017-11-29 17:08:47 +0100354 #second {
355 min-width: 800px;
Marc Kupietzdab9f222017-11-29 14:22:59 +0100356 // border: 1px solid #333;
Marc Kupietz34c08172017-11-29 17:08:47 +0100357 overflow: hidden; /* if you don't want #second to wrap below #first */
358 }
359 #som2 svg {
360 border: 1px solid #333;
361 }
Marc Kupietz83305222016-04-28 09:57:22 +0200362
Marc Kupietz34c08172017-11-29 17:08:47 +0100363 #cost {
364 font-size: 8pt;
365 color: #222222;
366 margin-top: 4px;
367 margin-bottom: 12px;
368 }
Marc Kupietz83305222016-04-28 09:57:22 +0200369
Marc Kupietz34c08172017-11-29 17:08:47 +0100370 #sominfo1, #sominfo {
371 font-size: 8pt;
372 color: #222222;
373 margin-top: 0px;
374 }
Marc Kupietz83305222016-04-28 09:57:22 +0200375
Marc Kupietz34c08172017-11-29 17:08:47 +0100376 #somcolor1, #somcolor2, #somcolor3 {
377 display: inline-block;
378 height: 10px;
379 width: 10px;
380 }
Marc Kupietz83305222016-04-28 09:57:22 +0200381
Marc Kupietz34c08172017-11-29 17:08:47 +0100382 #third {
383 border: 1px solid #333;
384 }
Marc Kupietz83305222016-04-28 09:57:22 +0200385
386 </style>
387 <script>
388
389 var opt = {epsilon: <%= $epsilon %>, perplexity: <%= $perplexity %>},
390 mapWidth = 800, // width map
391 mapHeight = 800,
392 jitterRadius = 7;
393
394 var T = new tsnejs.tSNE(opt); // create a tSNE instance
395
396 var Y;
397
398 var data;
399 var labeler;
400
401 function applyJitter() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100402 svg.selectAll('.tsnet')
403 .data(labels)
404 .transition()
405 .duration(50)
406 .attr("transform", function(d, i) {
407 T.Y[i][0] = (d.x - mapWidth/2 - tx)/ss/20;
408 T.Y[i][1] = (d.y - mapHeight/2 - ty)/ss/20;
409 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100410 (d.x) + "," +
411 (d.y) + ")";
Marc Kupietz4abcd682017-11-28 20:51:08 +0100412 });
Marc Kupietz83305222016-04-28 09:57:22 +0200413 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100414
Marc Kupietz83305222016-04-28 09:57:22 +0200415 function updateEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100416 var Y = T.getSolution();
417 svg.selectAll('.tsnet')
418 .data(data.words)
Marc Kupietz34c08172017-11-29 17:08:47 +0100419 .attr("transform", function(d, i) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100420 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100421 ((Y[i][0]*20*ss + tx) + mapWidth/2) + "," +
422 ((Y[i][1]*20*ss + ty) + mapHeight/2) + ")"; });
Marc Kupietz83305222016-04-28 09:57:22 +0200423 }
424
425 var svg;
426 var labels = [];
427 var anchor_array = [];
428 var text;
429
430 function drawEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100431 $("#embed").empty();
432 var div = d3.select("#embed");
Marc Kupietz34c08172017-11-29 17:08:47 +0100433
Marc Kupietz4abcd682017-11-28 20:51:08 +0100434 // get min and max in each column of Y
435 var Y = T.Y;
Marc Kupietz34c08172017-11-29 17:08:47 +0100436
Marc Kupietz4abcd682017-11-28 20:51:08 +0100437 svg = div.append("svg") // svg is global
438 .attr("width", mapWidth)
439 .attr("height", mapHeight);
Marc Kupietz34c08172017-11-29 17:08:47 +0100440
Marc Kupietz4abcd682017-11-28 20:51:08 +0100441 var g = svg.selectAll(".b")
442 .data(data.words)
443 .enter().append("g")
444 .attr("class", "tsnet");
Marc Kupietz34c08172017-11-29 17:08:47 +0100445
Marc Kupietz4abcd682017-11-28 20:51:08 +0100446 g.append("a")
447 .attr("xlink:href", function(word) {
448 return (data.urlprefix+word);})
449 .attr("class", function(d, i) {
450 var res="";
451 if(data.marked[i]) {
452 res="marked ";
453 }
454 if(data.target.indexOf(" "+d+" ") >= 0) {
455 return res+"target";
456 } else if(data.ranks[i] < data.mergedEnd) {
457 return res+"merged";
458 } else {
459 return res;
460 }
461 })
462 .attr("title", function(d, i) {
463 if(data.mergedEnd > 0) {
464 if(data.ranks[i] >= data.mergedEnd) {
465 return "rank: "+i +" "+"freq. rank: "+(data.ranks[i]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
466 } else {
467 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") + " (merged vocab)";
Marc Kupietz34c08172017-11-29 17:08:47 +0100468 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100469 } else {
470 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
471 }
472 })
473 .append("text")
474 .attr("text-anchor", "top")
475 .attr("font-size", 12)
476 .text(function(d) { return d; });
Marc Kupietz34c08172017-11-29 17:08:47 +0100477
Marc Kupietz4abcd682017-11-28 20:51:08 +0100478 var zoomListener = d3.behavior.zoom()
479 .scaleExtent([0.1, 10])
480 .center([0,0])
481 .on("zoom", zoomHandler);
482 zoomListener(svg);
Marc Kupietz83305222016-04-28 09:57:22 +0200483 }
484
485 var tx=0, ty=0;
486 var ss=1;
487 var iter_id=-1;
488
489 function zoomHandler() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100490 tx = d3.event.translate[0];
491 ty = d3.event.translate[1];
492 ss = d3.event.scale;
493 updateEmbedding();
Marc Kupietz83305222016-04-28 09:57:22 +0200494 }
495
496 var stepnum = 0;
497
498 function stopStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100499 clearInterval(iter_id);
500 text = svg.selectAll("text");
Marc Kupietz34c08172017-11-29 17:08:47 +0100501
Marc Kupietz4abcd682017-11-28 20:51:08 +0100502 // jitter function needs different data and co-ordinate representation
503 labels = d3.range(data.words.length).map(function(i) {
504 var x = (T.Y[i][0]*20*ss + tx) + mapWidth/2;
505 var y = (T.Y[i][1]*20*ss + ty) + mapHeight/2;
506 anchor_array.push({x: x, y: y, r: jitterRadius});
507 return {
508 x: x,
509 y: y,
510 name: data.words[i]
511 };
512 });
Marc Kupietz34c08172017-11-29 17:08:47 +0100513
Marc Kupietz4abcd682017-11-28 20:51:08 +0100514 // get the actual label bounding boxes for the jitter function
515 var index = 0;
516 text.each(function() {
517 labels[index].width = this.getBBox().width;
518 labels[index].height = this.getBBox().height;
519 index += 1;
520 });
Marc Kupietz83305222016-04-28 09:57:22 +0200521
Marc Kupietz34c08172017-11-29 17:08:47 +0100522
Marc Kupietz4abcd682017-11-28 20:51:08 +0100523 // setTimeout(updateEmbedding, 1);
524 // setTimeout(
525 labeler = d3.labeler()
526 .label(labels)
527 .anchor(anchor_array)
528 .width(mapWidth)
529 .height(mapHeight)
530 .update(applyJitter);
531 // .start(1000);
Marc Kupietz83305222016-04-28 09:57:22 +0200532
Marc Kupietz4abcd682017-11-28 20:51:08 +0100533 iter_id = setInterval(jitterStep, 1);
Marc Kupietz83305222016-04-28 09:57:22 +0200534 }
535
536 var jitter_i=0;
537
538 function jitterStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100539 if(jitter_i++ > 100) {
540 clearInterval(iter_id);
541 } else {
542 labeler.start2(10);
543 applyJitter();
544 }
Marc Kupietz83305222016-04-28 09:57:22 +0200545 }
546
547 var last_cost=1000;
548
549 function step() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100550 var i = T.iter;
Marc Kupietz34c08172017-11-29 17:08:47 +0100551
Marc Kupietz4abcd682017-11-28 20:51:08 +0100552 if(i > <%= $no_iterations %>) {
553 stopStep();
554 } else {
555 var cost = Math.round(T.step() * 100000) / 100000; // do a few steps
556 $("#cost").html("tsne iteration " + i + ", cost: " + cost.toFixed(5));
557 if(i % 250 == 0 && cost >= last_cost) {
558 stopStep();
559 } else {
560 last_cost = cost;
561 updateEmbedding();
562 }
563 }
Marc Kupietz83305222016-04-28 09:57:22 +0200564 }
565
566 function showMap(j) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100567 data=j;
568 T.iter=0;
569 T.initDataRaw(data.vecs); // init embedding
570 drawEmbedding(); // draw initial embedding
Marc Kupietz78114532017-11-29 17:00:16 +0100571
Marc Kupietz4abcd682017-11-28 20:51:08 +0100572 if(iter_id >= 0) {
573 clearInterval(iter_id);
574 }
575 //T.debugGrad();
576 iter_id = setInterval(step, 1);
Marc Kupietz78114532017-11-29 17:00:16 +0100577 if(true) { // (<%= $show_som %>) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100578 makeSOM(j, <%= $no_iterations %>);
579 }
Marc Kupietz83305222016-04-28 09:57:22 +0200580 }
Marc Kupietz39179ab2017-07-04 16:28:06 +0200581 var queryword;
582
583 function onload() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100584 queryword = document.getElementById('word');
Marc Kupietz39179ab2017-07-04 16:28:06 +0200585 }
586
587 function queryKorAP() {
588 window.open('http://korap.ids-mannheim.de/kalamar/?q='+queryword.value, 'KorAP');
589 }
Marc Kupietz4dc270c2017-11-24 10:17:12 +0100590
591 function queryKorAPCII(query) {
592 window.open('http://korap.ids-mannheim.de/kalamar/?ql=cosmas2&q='+query, 'KorAP');
593 }
Marc Kupietz83305222016-04-28 09:57:22 +0200594 </script>
595 </head>
Marc Kupietz39179ab2017-07-04 16:28:06 +0200596 <body onload="onload()">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100597 <div id="header">
598 <div id="pagetitle">
599 <h1>DeReKo-Vectors</h1>
600 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100601 <div id="options" class="widget">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100602 <form id="queryform">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100603 <input id="word" type="text" name="word" placeholder="Word(s) to be searched" value="<%= $word %>"
604 title="When looking for multiple words use spaces as separators to search around the average vector and | as separator to get the neighbours for each word."/>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100605 <input id="SEARCH" type="button" value="SEARCH">
606 <input type="button" id="showoptions" name="showoptions" value="Options" />
607 </form>
608 <div id="dropdownoptions" style="display: hidden">
609 <form id="optionsform">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100610 <div class="controlgroup-vertical">
611 <label for="cutoff">cut-off</label>
612 <input id="cutoff" type="text" name="cutoff" size="10" value="<%= $cutoff %>" title="Only consider the most frequent x word forms.">
613 <label for="dedupe">dedupe</label>
614 <input id="dedupe" type="checkbox" name="dedupe" value="1" <%= ($dedupe ? "checked" : "") %> title="radically filter out any near-duplicates">
615 % if($mergedEnd > 0) {
Marc Kupietz78114532017-11-29 17:00:16 +0100616 <label for="sbf">backw.</label>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100617 <input id="sbf" type="checkbox" name="sbf" value="1" <%= ($searchBaseVocabFirst ? "checked" : "") %> title="If checkecked base vocabulary will be searched first. Otherwise merged vocabulray will be searched first.">
618 % }
619 <label for="neighbours">max. neighbours:</label>
620 <input id="neighbours" size="4" name="n" value="<%= $no_nbs %>">
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100621 <label for="no_iterations">max. iterations</label>
622 <input id="no_iterations" name="N" size="4" value="<%= $no_iterations %>">
Marc Kupietz78114532017-11-29 17:00:16 +0100623 <!-- <label for="dosom">SOM</label>
624 <input id="dosom" type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100625 % if($collocators) {
626 <label for="sortby">window/sort</label>
627 <select id="sortby" name="sort">
628 <option value="0" <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
629 <option value="1" <%= ($sort==1? "selected":"") %>>any single position</option>
630 <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option>
631 </select>
632 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100633 <input type="button" value="→ KorAP" onclick="queryKorAP();" title="query word with KorAP"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100634 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100635 </form>
636 </div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100637 </div>
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100638 </div>
639 <div id="topwrapper">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100640 <div id="tabs">
641 <ul>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100642 <li><a href="#tabs-1">Semantics (TSNE-map)</a></li>
643 <li><a href="#tabs-2">Semantics (SOM)</a></li>
644 <li><a href="#tabs-3">Syntagmatic (collocators)</a></li>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100645 </ul>
646 <div id="tabs-1">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100647 % if($lists && (@$lists) > 0 && (@$lists)[0]) {
648 <div id="wrapper">
649 <div id="first" style="width:220px">
650 <table class="display compact nowrap" id="firsttable">
651 <thead>
652 <tr>
653 <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th>
654 </tr>
655 </thead>
656 <tbody>
657 % my $j=0; my @words; my @vecs; my @ranks; my @marked;
658 % for my $list (@$lists) {
659 % my $i=0; while($list) {
660 % my $item = (@$list)[$i];
661 % my $c = ($collocators? (@$collocators)[$i] : 0);
662 % last if(!$c && !$item);
663 <tr>
664 <td align="right">
665 <%= ++$i %>.
666 </td>
667 % if($item) {
668 % if(!grep{$_ eq $item->{word}} @words) {
669 % push @vecs, $item->{vector};
670 % push @words, $item->{word};
671 % push @ranks, $item->{rank};
672 % push @marked, ($marked->{$item->{word}}? 1 : 0);
Marc Kupietz4abcd682017-11-28 20:51:08 +0100673 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100674 <td align="right">
675 <%= sprintf("%.3f", $item->{dist}) %>
676 </td>
677 <td>
Marc Kupietz34c08172017-11-29 17:08:47 +0100678 % my $class = ($marked->{$item->{word}}? "marked " : "");
679 % my $r = $item->{rank};
Marc Kupietzdab9f222017-11-29 14:22:59 +0100680 % if($r < $mergedEnd) {
681 % $class .= "merged";
682 % $r .= " (merged vocab)";
683 % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
684 % $r -= $mergedEnd;
685 % }
686 <a class="<%= $class =%>"
687 title="freq. rank: <%= $r =%>"
688 href="<%= url_with->query([word => $item->{word}]) =%>">
689 <%= $item->{word} =%>
690 </a>
691 </td>
692 % } else {
693 <td colspan="2"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100694 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100695 </tr>
696 % last if($i >= 100);
697 % }
698 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100699 </tbody>
700 </table>
701 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100702 <script>
703 % use Mojo::ByteStream 'b';
704 % my $urlprefix = url_with->query([word=>'']);
705 $(window).load(function() {
706 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked, urlprefix => $urlprefix})); %>);
707 });
708 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100709 % } else {
710 <div id="wrapper">
711 <p>
712 ERROR: "<%= $word %>" not found in vocabluary.
713 </p>
714 </div>
715 % }
716 <div id="second">
717 <div id="embed">
718 </div>
719 <div id="cost">
720 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100721 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100722 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100723 </div>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100724 <div id="tabs-2">
725 <div id="som2" style="width: 800; height: 800px">
726 </div>
727 <div id="sominfo1"><span id="somcolor1"> </span> <span id="somword1"> </span> <span id="somcolor2"> </span> <span id="somword2"> </span> <span id="somcolor3"> </span></div>
728 <div id="sominfo">SOM iteration <span id="iterations">0</span></div>
729 </div>
730 <div id="tabs-3">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100731 <div id="second" style="width:500px">
732 <table class="display compact nowrap" id="secondtable">
733 <thead>
734 <tr>
735 % if($collocators) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100736 <th>#</th>
737 <th align="right" title="The window around the target word that is considered for summation.">w'</th>
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100738 <th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
739 <th title="(c<sub><small>@</small></sub>) – Sum of the probability approximations that the combination of the target word and the collocator at the relative position @ come from the training corpus. Single approximations can be distorted because of sub-sampling frequent words and the sum cannot itself be interpreted as probability." align="right">⊥Σa</th>
740 <th align="right">Σa/Σw</th>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100741 <th title="c" align="left">collocator</th>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100742 % }
743 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100744 </thead>
745 <tbody>
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100746 % for(my $i=0; $i < (@$collocators); $i++) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100747 % my $c = ($collocators? (@$collocators)[$i] : 0);
748 <tr>
749 <td align="right">
750 <%= $i %>
751 </td>
752 % if($c) {
753 <td align="right">
754 <span class="mono"><%= bitvec2window( $c->{pos} ) %></span>
755 </td>
756 <td align="right">
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100757 <%= sprintf("%.3f", $c->{max}) %>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100758 </td>
759 <td align="right">
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100760 <%= sprintf("%.3e", $c->{conorm}) %>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100761 </td>
762 <td align="right">
Marc Kupietzd64f3f22017-11-30 12:07:42 +0100763 <%= sprintf("%.3e", $c->{prob}) %>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100764 </td>
765 <td align="left">
766 <a onclick="<%= sprintf("queryKorAPCII('%s /w5 %s')", $c->{word}, $word) =%>"
767 title="freq. rank: <%= $c->{rank} =%>">
768 <%= $c->{word} %>
769 </a>
770 </td>
771 % } else {
772 <td colspan="5"/>
773 % }
774 </tr>
775 % }
776 </tbody>
777 </table>
778 </div> <!-- - tab2 -->
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100779 </div> <!-- tabs -->
780 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100781 </div> <!-- topwrapper -->
782 <div style="clear: both;"></div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100783 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100784 % if($training_args) {
785 <p>
786 Word vector model trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters: <pre><%= $training_args %></pre>
787 </p>
Marc Kupietz83305222016-04-28 09:57:22 +0200788 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100789 </body>
Marc Kupietz83305222016-04-28 09:57:22 +0200790</html>