blob: 427fb56873b40f8fdbd81e7a4de1bb19f7ea722d [file] [log] [blame]
Marc Kupietz83305222016-04-28 09:57:22 +02001<!DOCTYPE html>
2<html>
3 <head>
4 <title>DeReKo-Word-Vector-Distances: <%= $word %></title>
Marc Kupietz80bd7b92017-07-04 16:25:54 +02005 <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
Marc Kupietz6dbadd12017-11-29 16:43:33 +01006 <link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Condensed" rel="stylesheet">
Marc Kupietz80bd7b92017-07-04 16:25:54 +02007 <script src="http://code.jquery.com/jquery-latest.min.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +01008 <script src = "https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
9 <script src = "https://cdn.datatables.net/fixedcolumns/3.2.3/js/dataTables.fixedColumns.min.js"></script>
10 <link rel="stylesheet" href="https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css">
Marc Kupietz80bd7b92017-07-04 16:25:54 +020011 <script
Marc Kupietz6dbadd12017-11-29 16:43:33 +010012 src="http://code.jquery.com/ui/1.12.1/jquery-ui.min.js"
Marc Kupietz34c08172017-11-29 17:08:47 +010013 integrity="sha256-VazP97ZCwtekAsvgPBSUwPFKdrwD3unUfSGVYrahUqU="
14 crossorigin="anonymous"></script>
Marc Kupietz80bd7b92017-07-04 16:25:54 +020015 <script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010016 $('#firstable').hide();
17 $(document).ready(function() {
18 $("#xxxtabs").tabs( {
19 "show": function(event, ui) {
20 var oTable = $('div.dataTables_scrollBody>table.display', ui.panel).dataTable();
21 if ( oTable.length > 0 ) {
22 oTable.fnAdjustColumnSizing();
23 }
24 }
25 } );
Marc Kupietz694610d2017-11-25 18:30:03 +010026
Marc Kupietzdab9f222017-11-29 14:22:59 +010027 $("input").bind("keydown", function(event) {
28 // track enter key
29 var keycode = (event.keyCode ? event.keyCode : (event.which ? event.which : event.charCode));
30 if (keycode == 13) { // keycode for enter key
31 // force the 'Enter Key' to implicitly click the Update button
32 document.getElementById('SEARCH').click();
33 return false;
34 } else {
35 return true;
36 }});
37
Marc Kupietz4abcd682017-11-28 20:51:08 +010038 $(".selector").tabs({ active: 1 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010039
Marc Kupietz4abcd682017-11-28 20:51:08 +010040 $('#firsttable').DataTable({
41 "sScrollY": "780px",
42 "bScrollCollapse": true,
43 "bPaginate": false,
44 "bJQueryUI": true,
45 "dom": '<"top">rt<"bottom"flp><"clear">',
46 "aoColumnDefs": [
47 { "sWidth": "10%", "aTargets": [ -1 ] }
48 ]
49 } );
Marc Kupietz0af83e32017-11-27 09:31:37 +010050
51 $('#secondtable').DataTable({
Marc Kupietz4abcd682017-11-28 20:51:08 +010052 "sScrollY": "800px",
53 "bScrollCollapse": true,
54 "bPaginate": false,
55 "bJQueryUI": true,
56 "dom": '<"top">rt<"bottom"flp><"clear">',
57 "aoColumnDefs": [
58 { "sWidth": "10%", "aTargets": [ -1 ] }
59 ]
60 } );
61 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010062
Marc Kupietzdab9f222017-11-29 14:22:59 +010063 $(function(){
64 $("#dropdownoptions").dialog({
65 title: "Options",
66 autoOpen: false,
67 modal: false,
68 draggable: false,
69 height: "auto",
70 width: "auto",
71 resizable: false,
72 buttons: {
73 "Cancel": function() {
74 $( this ).dialog( "close" );
75 },
76 "Apply": function() {
77 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
78 }
79 }
80 });
81 });
82
83 $(function(){
84 $("#SEARCH").click(function() {
85 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
86 });
87 });
88
89 $(function(){
90 $("#showoptions").click(function(){
91 $("#dropdownoptions").dialog("open");
92 var target = $(this);
93 $("#dropdownoptions").dialog("widget").position({
94 my: 'left bottom',
95 at: 'left bottom',
96 of: target
97 });
98 });
99 });
100
Marc Kupietz4abcd682017-11-28 20:51:08 +0100101 $( function() {
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100102 $( "#no_iterations" ).spinner({
Marc Kupietz4abcd682017-11-28 20:51:08 +0100103 spin: function( event, ui ) {
104 if ( ui.value < 1000 ) {
105 $( this ).spinner( "value", 1000 );
106 return false;
107 } else if ( ui.value > 10000 ) {
108 $( this ).spinner( "value", 10000 );
109 return false;
110 }
111 }
112 });
113 } );
Marc Kupietz3305b0a2017-11-27 10:46:20 +0100114
Marc Kupietz4abcd682017-11-28 20:51:08 +0100115 $( function() {
116 $( "#neighbours" ).spinner({
117 spin: function( event, ui ) {
118 if ( ui.value < 0 ) {
119 $( this ).spinner( "value", 0 );
120 return false;
121 } else if ( ui.value > 200 ) {
122 $( this ).spinner( "value", 200 );
123 return false;
124 }
125 }
126 });
127 } );
128
129 $( function() {
130 $( "#cutoff" ).spinner({
131 spin: function( event, ui ) {
132 if ( ui.value < 100000 ) {
133 $( this ).spinner( "value", 100000 );
134 return false;
135 } else if ( ui.value > 2000000 ) {
136 $( this ).spinner( "value", 2000000 );
137 return false;
138 }
139 }
140 });
141 } );
142
143 $( function() {
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100144 $( "#tabs" ).tabs().addClass('tabs-min');
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100145 } );
Marc Kupietz4abcd682017-11-28 20:51:08 +0100146
147 $( function() {
148 $( ".controlgroup-vertical" ).controlgroup({
149 "direction": "vertical"
150 });
151 } );
152
153 $(function() {
154 $( document ).tooltip({
155 content: function() {
156 return $(this).attr('title');
157 }}
158 )
Marc Kupietz83305222016-04-28 09:57:22 +0200159 })
Marc Kupietz694610d2017-11-25 18:30:03 +0100160
Marc Kupietz83305222016-04-28 09:57:22 +0200161 </script>
162 <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
Marc Kupietz554aff52017-11-09 14:42:09 +0100163 <script src="/derekovecs/js/tsne.js"></script>
164 <script src="/derekovecs/js/som.js"></script>
165 <script src="/derekovecs/js/labeler.js"></script>
Marc Kupietz83305222016-04-28 09:57:22 +0200166 <style>
167 body, input {
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100168 font-family: Lato, sans-serif;
Marc Kupietz83305222016-04-28 09:57:22 +0200169 font-size: 11pt;
170 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100171
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100172 h1, h2, h3 {
173 margin: 5px 10px 0 0;
174 color: rgb(246,168,0);
175 font-family: "Univers LT Std 47 Cn Lt", "Univers LT Std 67 Cn Lt", "Roboto Condensed", "Univers LT Std 67 Cn Bold", "UniversLTStd-BoldCn", "Times", 'League Gothic', Impact, sans-serif;
176 font-weight: bold;
177 line-height: 1.35;
178 letter-spacing: normal;
179 text-transform: uppercase;
180 text-shadow: none;
Marc Kupietz34c08172017-11-29 17:08:47 +0100181 word-wrap: break-word;
182 }
183
184
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100185 showoptions, #SEARCH {
186 margin-left: 10px;
187 margin-right: 10px;
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100188 }
189
Marc Kupietzdab9f222017-11-29 14:22:59 +0100190 .tabs-left-vertical .ui-tabs-nav {
191 position: absolute;
192 width: 21em;
193 transform: translate(-100%,0%) rotate(-90deg);
194 transform-origin: 100% 0%;
195 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100196
Marc Kupietzdab9f222017-11-29 14:22:59 +0100197 .tabs-left-vertical .ui-tabs-nav li {
198 float: right;
199 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100200
Marc Kupietzdab9f222017-11-29 14:22:59 +0100201 .tabs-left-vertical .ui-tabs-panel {
202 padding-left: 3.5em;
203 }
204
205 .tabs-left-vertical .ui-tabs-panel {
206 height: 20em;
Marc Kupietz34c08172017-11-29 17:08:47 +0100207 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100208
Marc Kupietz34c08172017-11-29 17:08:47 +0100209 .mono {
210 font-family: "DejaVu Sans Mono", Inconsolata, SourceCodePro, Courier;
211 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100212
Marc Kupietz34c08172017-11-29 17:08:47 +0100213 .ui-tooltip-content {
214 font-size: 9pt;
215 color: #222222;
216 }
Marc Kupietzf4b49392016-04-28 10:49:56 +0200217
Marc Kupietz34c08172017-11-29 17:08:47 +0100218 svg > .ui-tooltip-content {
219 font-size: 8pt;
220 color: #222222;
221 }
222
223 a.merged {
224 color: green;
225 fill: green;
226 }
227
228 #first a {
229 text-decoration: none;
230 }
231
232 a.marked, #first a.marked {
233 text-decoration: underline;
234 }
235
236 a.target {
237 color: red;
238 fill: red;
239 }
Marc Kupietz694610d2017-11-25 18:30:03 +0100240
Marc Kupietz4abcd682017-11-28 20:51:08 +0100241 table.display {
242 width: 40% important!;
243 margin: 1; /* <- works for me this way ****/
244 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100245
Marc Kupietz4abcd682017-11-28 20:51:08 +0100246 table.dataTable thead th, table.dataTable thead td, table.dataTable tbody td {
247 padding: 2px 2px;
248 // border-bottom: 1px solid #111;
249 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100250
Marc Kupietz34c08172017-11-29 17:08:47 +0100251 #collocators {
252 margin-bottom: 15px;
253 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100254
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100255 #header {
256 width: 100%;
257 // border: 1px solid red;
258 overflow: hidden; /* will contain if #first is longer than #second */
259 }
260
Marc Kupietz34c08172017-11-29 17:08:47 +0100261 #topwrapper {
262 width: 100%;
263 // border: 1px solid red;
264 overflow: hidden; /* will contain if #first is longer than #second */
265 }
266
267 #wrapper {
268 // border: 1px solid red;
269 overflow: hidden; /* will contain if #first is longer than #second */
270 }
271
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100272 #pagetitle {
273 max-width: 460px;
274 margin-right: 20px;
275 float: left;
276 overflow: hidden; /* if you don't want #second to wrap below #first */
277 // border: 1px solid green;
278 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100279
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100280 #options {
281 float: left;
282 width: 800px;
283 margin: 10px;
284 overflow: hidden; /* if you don't want #second to wrap below #first */
285 }
286
287 #word {
288 width: 50%;
289 }
290
Marc Kupietz34c08172017-11-29 17:08:47 +0100291 #first {
292 margin-right: 20px;
293 float: left;
294 overflow: hidden; /* if you don't want #second to wrap below #first */
295 // border: 1px solid green;
296 }
297 #tabs {
298 margin-right: 20px;
299 overflow: hidden; /* if you don't want #second to wrap below #first */
300 }
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100301
302 .tabs-min {
303 background: transparent;
304 border: none;
305 }
306
307 .tabs-min .ui-widget-header {
308 background: transparent;
309 border: none;
310 border-bottom: 1px solid #c0c0c0;
311 -moz-border-radius: 0px;
312 -webkit-border-radius: 0px;
313 border-radius: 0px;
314 }
315
316 .tabs-min .ui-tabs-nav .ui-state-default {
317 background: transparent;
318 border: none;
319 }
320
321 .tabs-min .ui-tabs-nav .ui-state-active {
322 background: transparent url(img/uiTabsArrow.png) no-repeat bottom center;
323 border: none;
324 }
325
326 .tabs-min .ui-tabs-nav .ui-state-default a {
327 color: #c0c0c0;
328 }
329
330 .tabs-min .ui-tabs-nav .ui-state-active a {
331 color: rgb(246,168,0);
332 }
333
Marc Kupietz4abcd682017-11-28 20:51:08 +0100334 #embed {
335 max-width: 802px;
336 border: 1px solid #333;
337 }
338
Marc Kupietz34c08172017-11-29 17:08:47 +0100339 #second {
340 min-width: 800px;
Marc Kupietzdab9f222017-11-29 14:22:59 +0100341 // border: 1px solid #333;
Marc Kupietz34c08172017-11-29 17:08:47 +0100342 overflow: hidden; /* if you don't want #second to wrap below #first */
343 }
344 #som2 svg {
345 border: 1px solid #333;
346 }
Marc Kupietz83305222016-04-28 09:57:22 +0200347
Marc Kupietz34c08172017-11-29 17:08:47 +0100348 #cost {
349 font-size: 8pt;
350 color: #222222;
351 margin-top: 4px;
352 margin-bottom: 12px;
353 }
Marc Kupietz83305222016-04-28 09:57:22 +0200354
Marc Kupietz34c08172017-11-29 17:08:47 +0100355 #sominfo1, #sominfo {
356 font-size: 8pt;
357 color: #222222;
358 margin-top: 0px;
359 }
Marc Kupietz83305222016-04-28 09:57:22 +0200360
Marc Kupietz34c08172017-11-29 17:08:47 +0100361 #somcolor1, #somcolor2, #somcolor3 {
362 display: inline-block;
363 height: 10px;
364 width: 10px;
365 }
Marc Kupietz83305222016-04-28 09:57:22 +0200366
Marc Kupietz34c08172017-11-29 17:08:47 +0100367 #third {
368 border: 1px solid #333;
369 }
Marc Kupietz83305222016-04-28 09:57:22 +0200370
371 </style>
372 <script>
373
374 var opt = {epsilon: <%= $epsilon %>, perplexity: <%= $perplexity %>},
375 mapWidth = 800, // width map
376 mapHeight = 800,
377 jitterRadius = 7;
378
379 var T = new tsnejs.tSNE(opt); // create a tSNE instance
380
381 var Y;
382
383 var data;
384 var labeler;
385
386 function applyJitter() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100387 svg.selectAll('.tsnet')
388 .data(labels)
389 .transition()
390 .duration(50)
391 .attr("transform", function(d, i) {
392 T.Y[i][0] = (d.x - mapWidth/2 - tx)/ss/20;
393 T.Y[i][1] = (d.y - mapHeight/2 - ty)/ss/20;
394 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100395 (d.x) + "," +
396 (d.y) + ")";
Marc Kupietz4abcd682017-11-28 20:51:08 +0100397 });
Marc Kupietz83305222016-04-28 09:57:22 +0200398 }
Marc Kupietz34c08172017-11-29 17:08:47 +0100399
Marc Kupietz83305222016-04-28 09:57:22 +0200400 function updateEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100401 var Y = T.getSolution();
402 svg.selectAll('.tsnet')
403 .data(data.words)
Marc Kupietz34c08172017-11-29 17:08:47 +0100404 .attr("transform", function(d, i) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100405 return "translate(" +
Marc Kupietz34c08172017-11-29 17:08:47 +0100406 ((Y[i][0]*20*ss + tx) + mapWidth/2) + "," +
407 ((Y[i][1]*20*ss + ty) + mapHeight/2) + ")"; });
Marc Kupietz83305222016-04-28 09:57:22 +0200408 }
409
410 var svg;
411 var labels = [];
412 var anchor_array = [];
413 var text;
414
415 function drawEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100416 $("#embed").empty();
417 var div = d3.select("#embed");
Marc Kupietz34c08172017-11-29 17:08:47 +0100418
Marc Kupietz4abcd682017-11-28 20:51:08 +0100419 // get min and max in each column of Y
420 var Y = T.Y;
Marc Kupietz34c08172017-11-29 17:08:47 +0100421
Marc Kupietz4abcd682017-11-28 20:51:08 +0100422 svg = div.append("svg") // svg is global
423 .attr("width", mapWidth)
424 .attr("height", mapHeight);
Marc Kupietz34c08172017-11-29 17:08:47 +0100425
Marc Kupietz4abcd682017-11-28 20:51:08 +0100426 var g = svg.selectAll(".b")
427 .data(data.words)
428 .enter().append("g")
429 .attr("class", "tsnet");
Marc Kupietz34c08172017-11-29 17:08:47 +0100430
Marc Kupietz4abcd682017-11-28 20:51:08 +0100431 g.append("a")
432 .attr("xlink:href", function(word) {
433 return (data.urlprefix+word);})
434 .attr("class", function(d, i) {
435 var res="";
436 if(data.marked[i]) {
437 res="marked ";
438 }
439 if(data.target.indexOf(" "+d+" ") >= 0) {
440 return res+"target";
441 } else if(data.ranks[i] < data.mergedEnd) {
442 return res+"merged";
443 } else {
444 return res;
445 }
446 })
447 .attr("title", function(d, i) {
448 if(data.mergedEnd > 0) {
449 if(data.ranks[i] >= data.mergedEnd) {
450 return "rank: "+i +" "+"freq. rank: "+(data.ranks[i]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
451 } else {
452 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") + " (merged vocab)";
Marc Kupietz34c08172017-11-29 17:08:47 +0100453 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100454 } else {
455 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
456 }
457 })
458 .append("text")
459 .attr("text-anchor", "top")
460 .attr("font-size", 12)
461 .text(function(d) { return d; });
Marc Kupietz34c08172017-11-29 17:08:47 +0100462
Marc Kupietz4abcd682017-11-28 20:51:08 +0100463 var zoomListener = d3.behavior.zoom()
464 .scaleExtent([0.1, 10])
465 .center([0,0])
466 .on("zoom", zoomHandler);
467 zoomListener(svg);
Marc Kupietz83305222016-04-28 09:57:22 +0200468 }
469
470 var tx=0, ty=0;
471 var ss=1;
472 var iter_id=-1;
473
474 function zoomHandler() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100475 tx = d3.event.translate[0];
476 ty = d3.event.translate[1];
477 ss = d3.event.scale;
478 updateEmbedding();
Marc Kupietz83305222016-04-28 09:57:22 +0200479 }
480
481 var stepnum = 0;
482
483 function stopStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100484 clearInterval(iter_id);
485 text = svg.selectAll("text");
Marc Kupietz34c08172017-11-29 17:08:47 +0100486
Marc Kupietz4abcd682017-11-28 20:51:08 +0100487 // jitter function needs different data and co-ordinate representation
488 labels = d3.range(data.words.length).map(function(i) {
489 var x = (T.Y[i][0]*20*ss + tx) + mapWidth/2;
490 var y = (T.Y[i][1]*20*ss + ty) + mapHeight/2;
491 anchor_array.push({x: x, y: y, r: jitterRadius});
492 return {
493 x: x,
494 y: y,
495 name: data.words[i]
496 };
497 });
Marc Kupietz34c08172017-11-29 17:08:47 +0100498
Marc Kupietz4abcd682017-11-28 20:51:08 +0100499 // get the actual label bounding boxes for the jitter function
500 var index = 0;
501 text.each(function() {
502 labels[index].width = this.getBBox().width;
503 labels[index].height = this.getBBox().height;
504 index += 1;
505 });
Marc Kupietz83305222016-04-28 09:57:22 +0200506
Marc Kupietz34c08172017-11-29 17:08:47 +0100507
Marc Kupietz4abcd682017-11-28 20:51:08 +0100508 // setTimeout(updateEmbedding, 1);
509 // setTimeout(
510 labeler = d3.labeler()
511 .label(labels)
512 .anchor(anchor_array)
513 .width(mapWidth)
514 .height(mapHeight)
515 .update(applyJitter);
516 // .start(1000);
Marc Kupietz83305222016-04-28 09:57:22 +0200517
Marc Kupietz4abcd682017-11-28 20:51:08 +0100518 iter_id = setInterval(jitterStep, 1);
Marc Kupietz83305222016-04-28 09:57:22 +0200519 }
520
521 var jitter_i=0;
522
523 function jitterStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100524 if(jitter_i++ > 100) {
525 clearInterval(iter_id);
526 } else {
527 labeler.start2(10);
528 applyJitter();
529 }
Marc Kupietz83305222016-04-28 09:57:22 +0200530 }
531
532 var last_cost=1000;
533
534 function step() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100535 var i = T.iter;
Marc Kupietz34c08172017-11-29 17:08:47 +0100536
Marc Kupietz4abcd682017-11-28 20:51:08 +0100537 if(i > <%= $no_iterations %>) {
538 stopStep();
539 } else {
540 var cost = Math.round(T.step() * 100000) / 100000; // do a few steps
541 $("#cost").html("tsne iteration " + i + ", cost: " + cost.toFixed(5));
542 if(i % 250 == 0 && cost >= last_cost) {
543 stopStep();
544 } else {
545 last_cost = cost;
546 updateEmbedding();
547 }
548 }
Marc Kupietz83305222016-04-28 09:57:22 +0200549 }
550
551 function showMap(j) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100552 data=j;
553 T.iter=0;
554 T.initDataRaw(data.vecs); // init embedding
555 drawEmbedding(); // draw initial embedding
Marc Kupietz78114532017-11-29 17:00:16 +0100556
Marc Kupietz4abcd682017-11-28 20:51:08 +0100557 if(iter_id >= 0) {
558 clearInterval(iter_id);
559 }
560 //T.debugGrad();
561 iter_id = setInterval(step, 1);
Marc Kupietz78114532017-11-29 17:00:16 +0100562 if(true) { // (<%= $show_som %>) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100563 makeSOM(j, <%= $no_iterations %>);
564 }
Marc Kupietz83305222016-04-28 09:57:22 +0200565 }
Marc Kupietz39179ab2017-07-04 16:28:06 +0200566 var queryword;
567
568 function onload() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100569 queryword = document.getElementById('word');
Marc Kupietz39179ab2017-07-04 16:28:06 +0200570 }
571
572 function queryKorAP() {
573 window.open('http://korap.ids-mannheim.de/kalamar/?q='+queryword.value, 'KorAP');
574 }
Marc Kupietz4dc270c2017-11-24 10:17:12 +0100575
576 function queryKorAPCII(query) {
577 window.open('http://korap.ids-mannheim.de/kalamar/?ql=cosmas2&q='+query, 'KorAP');
578 }
Marc Kupietz83305222016-04-28 09:57:22 +0200579 </script>
580 </head>
Marc Kupietz39179ab2017-07-04 16:28:06 +0200581 <body onload="onload()">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100582 <div id="header">
583 <div id="pagetitle">
584 <h1>DeReKo-Vectors</h1>
585 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100586 <div id="options" class="widget">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100587 <form id="queryform">
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100588 <input id="word" type="text" name="word" placeholder="Word(s) to be searched" value="<%= $word %>"
589 title="When looking for multiple words use spaces as separators to search around the average vector and | as separator to get the neighbours for each word."/>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100590 <input id="SEARCH" type="button" value="SEARCH">
591 <input type="button" id="showoptions" name="showoptions" value="Options" />
592 </form>
593 <div id="dropdownoptions" style="display: hidden">
594 <form id="optionsform">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100595 <div class="controlgroup-vertical">
596 <label for="cutoff">cut-off</label>
597 <input id="cutoff" type="text" name="cutoff" size="10" value="<%= $cutoff %>" title="Only consider the most frequent x word forms.">
598 <label for="dedupe">dedupe</label>
599 <input id="dedupe" type="checkbox" name="dedupe" value="1" <%= ($dedupe ? "checked" : "") %> title="radically filter out any near-duplicates">
600 % if($mergedEnd > 0) {
Marc Kupietz78114532017-11-29 17:00:16 +0100601 <label for="sbf">backw.</label>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100602 <input id="sbf" type="checkbox" name="sbf" value="1" <%= ($searchBaseVocabFirst ? "checked" : "") %> title="If checkecked base vocabulary will be searched first. Otherwise merged vocabulray will be searched first.">
603 % }
604 <label for="neighbours">max. neighbours:</label>
605 <input id="neighbours" size="4" name="n" value="<%= $no_nbs %>">
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100606 <label for="no_iterations">max. iterations</label>
607 <input id="no_iterations" name="N" size="4" value="<%= $no_iterations %>">
Marc Kupietz78114532017-11-29 17:00:16 +0100608 <!-- <label for="dosom">SOM</label>
609 <input id="dosom" type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100610 % if($collocators) {
611 <label for="sortby">window/sort</label>
612 <select id="sortby" name="sort">
613 <option value="0" <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
614 <option value="1" <%= ($sort==1? "selected":"") %>>any single position</option>
615 <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option>
616 </select>
617 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100618 <input type="button" value="→ KorAP" onclick="queryKorAP();" title="query word with KorAP"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100619 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100620 </form>
621 </div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100622 </div>
Marc Kupietz81bf39c2017-11-29 17:04:35 +0100623 </div>
624 <div id="topwrapper">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100625 <div id="tabs">
626 <ul>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100627 <li><a href="#tabs-1">Semantics (TSNE-map)</a></li>
628 <li><a href="#tabs-2">Semantics (SOM)</a></li>
629 <li><a href="#tabs-3">Syntagmatic (collocators)</a></li>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100630 </ul>
631 <div id="tabs-1">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100632 % if($lists && (@$lists) > 0 && (@$lists)[0]) {
633 <div id="wrapper">
634 <div id="first" style="width:220px">
635 <table class="display compact nowrap" id="firsttable">
636 <thead>
637 <tr>
638 <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th>
639 </tr>
640 </thead>
641 <tbody>
642 % my $j=0; my @words; my @vecs; my @ranks; my @marked;
643 % for my $list (@$lists) {
644 % my $i=0; while($list) {
645 % my $item = (@$list)[$i];
646 % my $c = ($collocators? (@$collocators)[$i] : 0);
647 % last if(!$c && !$item);
648 <tr>
649 <td align="right">
650 <%= ++$i %>.
651 </td>
652 % if($item) {
653 % if(!grep{$_ eq $item->{word}} @words) {
654 % push @vecs, $item->{vector};
655 % push @words, $item->{word};
656 % push @ranks, $item->{rank};
657 % push @marked, ($marked->{$item->{word}}? 1 : 0);
Marc Kupietz4abcd682017-11-28 20:51:08 +0100658 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100659 <td align="right">
660 <%= sprintf("%.3f", $item->{dist}) %>
661 </td>
662 <td>
Marc Kupietz34c08172017-11-29 17:08:47 +0100663 % my $class = ($marked->{$item->{word}}? "marked " : "");
664 % my $r = $item->{rank};
Marc Kupietzdab9f222017-11-29 14:22:59 +0100665 % if($r < $mergedEnd) {
666 % $class .= "merged";
667 % $r .= " (merged vocab)";
668 % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
669 % $r -= $mergedEnd;
670 % }
671 <a class="<%= $class =%>"
672 title="freq. rank: <%= $r =%>"
673 href="<%= url_with->query([word => $item->{word}]) =%>">
674 <%= $item->{word} =%>
675 </a>
676 </td>
677 % } else {
678 <td colspan="2"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100679 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100680 </tr>
681 % last if($i >= 100);
682 % }
683 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100684 </tbody>
685 </table>
686 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100687 <script>
688 % use Mojo::ByteStream 'b';
689 % my $urlprefix = url_with->query([word=>'']);
690 $(window).load(function() {
691 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked, urlprefix => $urlprefix})); %>);
692 });
693 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100694 % } else {
695 <div id="wrapper">
696 <p>
697 ERROR: "<%= $word %>" not found in vocabluary.
698 </p>
699 </div>
700 % }
701 <div id="second">
702 <div id="embed">
703 </div>
704 <div id="cost">
705 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100706 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100707 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100708 </div>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100709 <div id="tabs-2">
710 <div id="som2" style="width: 800; height: 800px">
711 </div>
712 <div id="sominfo1"><span id="somcolor1"> </span> <span id="somword1"> </span> <span id="somcolor2"> </span> <span id="somword2"> </span> <span id="somcolor3"> </span></div>
713 <div id="sominfo">SOM iteration <span id="iterations">0</span></div>
714 </div>
715 <div id="tabs-3">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100716 <div id="second" style="width:500px">
717 <table class="display compact nowrap" id="secondtable">
718 <thead>
719 <tr>
720 % if($collocators) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100721 <th>#</th>
722 <th align="right" title="The window around the target word that is considered for summation.">w'</th>
723 <th align="right" title="Raw (max.) activation of the collocator in the output layers.">a</th>
724 <th title="Σp(c<sub><small>@</small></sub>) – Sum of the probability approximations that the combination of the target word and the collocator at the relative position @ come from the training corpus. Single approximations can be distorted because of sub-sampling frequent words and the sum cannot itself be interpreted as probability." align="right">Σp</th>
725 <th align="right">Σp/|w|</th>
726 <th title="c" align="left">collocator</th>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100727 % }
728 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100729 </thead>
730 <tbody>
731 % for(my $i=0; $i < 100; $i++) {
732 % my $c = ($collocators? (@$collocators)[$i] : 0);
733 <tr>
734 <td align="right">
735 <%= $i %>
736 </td>
737 % if($c) {
738 <td align="right">
739 <span class="mono"><%= bitvec2window( $c->{pos} ) %></span>
740 </td>
741 <td align="right">
742 <%= sprintf("%.3f", $c->{dist}) %>
743 </td>
744 <td align="right">
745 <%= sprintf("%.3e", $c->{norm}) %>
746 </td>
747 <td align="right">
748 <%= sprintf("%.3e", $c->{sum}) %>
749 </td>
750 <td align="left">
751 <a onclick="<%= sprintf("queryKorAPCII('%s /w5 %s')", $c->{word}, $word) =%>"
752 title="freq. rank: <%= $c->{rank} =%>">
753 <%= $c->{word} %>
754 </a>
755 </td>
756 % } else {
757 <td colspan="5"/>
758 % }
759 </tr>
760 % }
761 </tbody>
762 </table>
763 </div> <!-- - tab2 -->
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100764 </div> <!-- tabs -->
765 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100766 </div> <!-- topwrapper -->
767 <div style="clear: both;"></div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100768 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100769 % if($training_args) {
770 <p>
771 Word vector model trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters: <pre><%= $training_args %></pre>
772 </p>
Marc Kupietz83305222016-04-28 09:57:22 +0200773 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100774 </body>
Marc Kupietz83305222016-04-28 09:57:22 +0200775</html>