blob: 59f2e91eaa10e025a1ece774f9def5e40bd002f6 [file] [log] [blame]
Marc Kupietz83305222016-04-28 09:57:22 +02001<!DOCTYPE html>
2<html>
3 <head>
4 <title>DeReKo-Word-Vector-Distances: <%= $word %></title>
Marc Kupietz80bd7b92017-07-04 16:25:54 +02005 <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
Marc Kupietz6dbadd12017-11-29 16:43:33 +01006 <link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Condensed" rel="stylesheet">
Marc Kupietz80bd7b92017-07-04 16:25:54 +02007 <script src="http://code.jquery.com/jquery-latest.min.js"></script>
Marc Kupietz4abcd682017-11-28 20:51:08 +01008 <script src = "https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
9 <script src = "https://cdn.datatables.net/fixedcolumns/3.2.3/js/dataTables.fixedColumns.min.js"></script>
10 <link rel="stylesheet" href="https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css">
Marc Kupietz80bd7b92017-07-04 16:25:54 +020011 <script
Marc Kupietz6dbadd12017-11-29 16:43:33 +010012 src="http://code.jquery.com/ui/1.12.1/jquery-ui.min.js"
13 integrity="sha256-VazP97ZCwtekAsvgPBSUwPFKdrwD3unUfSGVYrahUqU="
14 crossorigin="anonymous"></script>
Marc Kupietz80bd7b92017-07-04 16:25:54 +020015 <script>
Marc Kupietz4abcd682017-11-28 20:51:08 +010016 $('#firstable').hide();
17 $(document).ready(function() {
18 $("#xxxtabs").tabs( {
19 "show": function(event, ui) {
20 var oTable = $('div.dataTables_scrollBody>table.display', ui.panel).dataTable();
21 if ( oTable.length > 0 ) {
22 oTable.fnAdjustColumnSizing();
23 }
24 }
25 } );
Marc Kupietz694610d2017-11-25 18:30:03 +010026
Marc Kupietzdab9f222017-11-29 14:22:59 +010027 $("input").bind("keydown", function(event) {
28 // track enter key
29 var keycode = (event.keyCode ? event.keyCode : (event.which ? event.which : event.charCode));
30 if (keycode == 13) { // keycode for enter key
31 // force the 'Enter Key' to implicitly click the Update button
32 document.getElementById('SEARCH').click();
33 return false;
34 } else {
35 return true;
36 }});
37
Marc Kupietz4abcd682017-11-28 20:51:08 +010038 $(".selector").tabs({ active: 1 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010039
Marc Kupietz4abcd682017-11-28 20:51:08 +010040 $('#firsttable').DataTable({
41 "sScrollY": "780px",
42 "bScrollCollapse": true,
43 "bPaginate": false,
44 "bJQueryUI": true,
45 "dom": '<"top">rt<"bottom"flp><"clear">',
46 "aoColumnDefs": [
47 { "sWidth": "10%", "aTargets": [ -1 ] }
48 ]
49 } );
Marc Kupietz0af83e32017-11-27 09:31:37 +010050
51 $('#secondtable').DataTable({
Marc Kupietz4abcd682017-11-28 20:51:08 +010052 "sScrollY": "800px",
53 "bScrollCollapse": true,
54 "bPaginate": false,
55 "bJQueryUI": true,
56 "dom": '<"top">rt<"bottom"flp><"clear">',
57 "aoColumnDefs": [
58 { "sWidth": "10%", "aTargets": [ -1 ] }
59 ]
60 } );
61 });
Marc Kupietz0af83e32017-11-27 09:31:37 +010062
Marc Kupietzdab9f222017-11-29 14:22:59 +010063 $(function(){
64 $("#dropdownoptions").dialog({
65 title: "Options",
66 autoOpen: false,
67 modal: false,
68 draggable: false,
69 height: "auto",
70 width: "auto",
71 resizable: false,
72 buttons: {
73 "Cancel": function() {
74 $( this ).dialog( "close" );
75 },
76 "Apply": function() {
77 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
78 }
79 }
80 });
81 });
82
83 $(function(){
84 $("#SEARCH").click(function() {
85 window.open($(location).attr('pathname')+'?'+$('form').serialize(), "_self");
86 });
87 });
88
89 $(function(){
90 $("#showoptions").click(function(){
91 $("#dropdownoptions").dialog("open");
92 var target = $(this);
93 $("#dropdownoptions").dialog("widget").position({
94 my: 'left bottom',
95 at: 'left bottom',
96 of: target
97 });
98 });
99 });
100
Marc Kupietz4abcd682017-11-28 20:51:08 +0100101 $( function() {
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100102 $( "#no_iterations" ).spinner({
Marc Kupietz4abcd682017-11-28 20:51:08 +0100103 spin: function( event, ui ) {
104 if ( ui.value < 1000 ) {
105 $( this ).spinner( "value", 1000 );
106 return false;
107 } else if ( ui.value > 10000 ) {
108 $( this ).spinner( "value", 10000 );
109 return false;
110 }
111 }
112 });
113 } );
Marc Kupietz3305b0a2017-11-27 10:46:20 +0100114
Marc Kupietz4abcd682017-11-28 20:51:08 +0100115 $( function() {
116 $( "#neighbours" ).spinner({
117 spin: function( event, ui ) {
118 if ( ui.value < 0 ) {
119 $( this ).spinner( "value", 0 );
120 return false;
121 } else if ( ui.value > 200 ) {
122 $( this ).spinner( "value", 200 );
123 return false;
124 }
125 }
126 });
127 } );
128
129 $( function() {
130 $( "#cutoff" ).spinner({
131 spin: function( event, ui ) {
132 if ( ui.value < 100000 ) {
133 $( this ).spinner( "value", 100000 );
134 return false;
135 } else if ( ui.value > 2000000 ) {
136 $( this ).spinner( "value", 2000000 );
137 return false;
138 }
139 }
140 });
141 } );
142
143 $( function() {
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100144 $( "#tabs" ).tabs().addClass('tabs-min');
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100145 } );
Marc Kupietz4abcd682017-11-28 20:51:08 +0100146
147 $( function() {
148 $( ".controlgroup-vertical" ).controlgroup({
149 "direction": "vertical"
150 });
151 } );
152
153 $(function() {
154 $( document ).tooltip({
155 content: function() {
156 return $(this).attr('title');
157 }}
158 )
Marc Kupietz83305222016-04-28 09:57:22 +0200159 })
Marc Kupietz694610d2017-11-25 18:30:03 +0100160
Marc Kupietz83305222016-04-28 09:57:22 +0200161 </script>
162 <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
Marc Kupietz554aff52017-11-09 14:42:09 +0100163 <script src="/derekovecs/js/tsne.js"></script>
164 <script src="/derekovecs/js/som.js"></script>
165 <script src="/derekovecs/js/labeler.js"></script>
Marc Kupietz83305222016-04-28 09:57:22 +0200166 <style>
167 body, input {
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100168 font-family: Lato, sans-serif;
Marc Kupietz83305222016-04-28 09:57:22 +0200169 font-size: 11pt;
170 }
Marc Kupietz30ca4342017-11-22 21:21:20 +0100171
Marc Kupietz6dbadd12017-11-29 16:43:33 +0100172 h1, h2, h3 {
173 margin: 5px 10px 0 0;
174 color: rgb(246,168,0);
175 font-family: "Univers LT Std 47 Cn Lt", "Univers LT Std 67 Cn Lt", "Roboto Condensed", "Univers LT Std 67 Cn Bold", "UniversLTStd-BoldCn", "Times", 'League Gothic', Impact, sans-serif;
176 font-weight: bold;
177 line-height: 1.35;
178 letter-spacing: normal;
179 text-transform: uppercase;
180 text-shadow: none;
181 word-wrap: break-word;
182 }
183
Marc Kupietzdab9f222017-11-29 14:22:59 +0100184 .tabs-left-vertical .ui-tabs-nav {
185 position: absolute;
186 width: 21em;
187 transform: translate(-100%,0%) rotate(-90deg);
188 transform-origin: 100% 0%;
189 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100190
Marc Kupietzdab9f222017-11-29 14:22:59 +0100191 .tabs-left-vertical .ui-tabs-nav li {
192 float: right;
193 }
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100194
Marc Kupietzdab9f222017-11-29 14:22:59 +0100195 .tabs-left-vertical .ui-tabs-panel {
196 padding-left: 3.5em;
197 }
198
199 .tabs-left-vertical .ui-tabs-panel {
200 height: 20em;
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100201 }
202
Marc Kupietz30ca4342017-11-22 21:21:20 +0100203 .mono {
204 font-family: "DejaVu Sans Mono", Inconsolata, SourceCodePro, Courier;
205 }
206
Marc Kupietz83305222016-04-28 09:57:22 +0200207 .ui-tooltip-content {
208 font-size: 9pt;
209 color: #222222;
210 }
211
212 svg > .ui-tooltip-content {
213 font-size: 8pt;
214 color: #222222;
215 }
216
217 a.merged {
218 color: green;
219 fill: green;
220 }
221
222 #first a {
223 text-decoration: none;
224 }
225
226 a.marked, #first a.marked {
227 text-decoration: underline;
228 }
Marc Kupietzf4b49392016-04-28 10:49:56 +0200229
Marc Kupietz83305222016-04-28 09:57:22 +0200230 a.target {
231 color: red;
232 fill: red;
233 }
Marc Kupietz694610d2017-11-25 18:30:03 +0100234
Marc Kupietz4abcd682017-11-28 20:51:08 +0100235 table.display {
236 width: 40% important!;
237 margin: 1; /* <- works for me this way ****/
238 }
239 table.dataTable thead th, table.dataTable thead td, table.dataTable tbody td {
240 padding: 2px 2px;
241 // border-bottom: 1px solid #111;
242 }
Marc Kupietz83305222016-04-28 09:57:22 +0200243 #collocators {
244 margin-bottom: 15px;
245 }
246
Marc Kupietz4abcd682017-11-28 20:51:08 +0100247 #topwrapper {
Marc Kupietz83305222016-04-28 09:57:22 +0200248 width: 100%;
249 // border: 1px solid red;
250 overflow: hidden; /* will contain if #first is longer than #second */
251 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100252
253 #wrapper {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100254 // border: 1px solid red;
255 overflow: hidden; /* will contain if #first is longer than #second */
256 }
257
258 #options {
259 float: right;
260 margin: 20px;
261 max-width: 280px;
262 overflow: hidden; /* if you don't want #second to wrap below #first */
263 }
264
Marc Kupietz83305222016-04-28 09:57:22 +0200265 #first {
266 margin-right: 20px;
267 float: left;
Marc Kupietz4abcd682017-11-28 20:51:08 +0100268 overflow: hidden; /* if you don't want #second to wrap below #first */
Marc Kupietz83305222016-04-28 09:57:22 +0200269 // border: 1px solid green;
270 }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100271 #tabs {
272 margin-right: 20px;
273 overflow: hidden; /* if you don't want #second to wrap below #first */
274 }
275
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100276
277 .tabs-min {
278 background: transparent;
279 border: none;
280 }
281
282 .tabs-min .ui-widget-header {
283 background: transparent;
284 border: none;
285 border-bottom: 1px solid #c0c0c0;
286 -moz-border-radius: 0px;
287 -webkit-border-radius: 0px;
288 border-radius: 0px;
289 }
290
291 .tabs-min .ui-tabs-nav .ui-state-default {
292 background: transparent;
293 border: none;
294 }
295
296 .tabs-min .ui-tabs-nav .ui-state-active {
297 background: transparent url(img/uiTabsArrow.png) no-repeat bottom center;
298 border: none;
299 }
300
301 .tabs-min .ui-tabs-nav .ui-state-default a {
302 color: #c0c0c0;
303 }
304
305 .tabs-min .ui-tabs-nav .ui-state-active a {
306 color: rgb(246,168,0);
307 }
308
Marc Kupietz4abcd682017-11-28 20:51:08 +0100309 #embed {
310 max-width: 802px;
311 border: 1px solid #333;
312 }
313
Marc Kupietz83305222016-04-28 09:57:22 +0200314 #second {
Marc Kupietzb0173f12017-11-29 10:00:23 +0100315 min-width: 800px;
Marc Kupietzdab9f222017-11-29 14:22:59 +0100316 // border: 1px solid #333;
Marc Kupietz83305222016-04-28 09:57:22 +0200317 overflow: hidden; /* if you don't want #second to wrap below #first */
318 }
319 #som2 svg {
320 border: 1px solid #333;
321 }
322
323 #cost {
324 font-size: 8pt;
325 color: #222222;
326 margin-top: 4px;
327 margin-bottom: 12px;
328 }
329
330 #sominfo1, #sominfo {
331 font-size: 8pt;
332 color: #222222;
333 margin-top: 0px;
334 }
335
336 #somcolor1, #somcolor2, #somcolor3 {
337 display: inline-block;
338 height: 10px;
339 width: 10px;
340 }
341
342 #third {
343 border: 1px solid #333;
344 }
345
346 </style>
347 <script>
348
349 var opt = {epsilon: <%= $epsilon %>, perplexity: <%= $perplexity %>},
350 mapWidth = 800, // width map
351 mapHeight = 800,
352 jitterRadius = 7;
353
354 var T = new tsnejs.tSNE(opt); // create a tSNE instance
355
356 var Y;
357
358 var data;
359 var labeler;
360
361 function applyJitter() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100362 svg.selectAll('.tsnet')
363 .data(labels)
364 .transition()
365 .duration(50)
366 .attr("transform", function(d, i) {
367 T.Y[i][0] = (d.x - mapWidth/2 - tx)/ss/20;
368 T.Y[i][1] = (d.y - mapHeight/2 - ty)/ss/20;
369 return "translate(" +
Marc Kupietzdab9f222017-11-29 14:22:59 +0100370 (d.x) + "," +
371 (d.y) + ")";
Marc Kupietz4abcd682017-11-28 20:51:08 +0100372 });
Marc Kupietz83305222016-04-28 09:57:22 +0200373 }
374
375 function updateEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100376 var Y = T.getSolution();
377 svg.selectAll('.tsnet')
378 .data(data.words)
379 .attr("transform", function(d, i) {
380 return "translate(" +
Marc Kupietzdab9f222017-11-29 14:22:59 +0100381 ((Y[i][0]*20*ss + tx) + mapWidth/2) + "," +
382 ((Y[i][1]*20*ss + ty) + mapHeight/2) + ")"; });
Marc Kupietz83305222016-04-28 09:57:22 +0200383 }
384
385 var svg;
386 var labels = [];
387 var anchor_array = [];
388 var text;
389
390 function drawEmbedding() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100391 $("#embed").empty();
392 var div = d3.select("#embed");
393
394 // get min and max in each column of Y
395 var Y = T.Y;
396
397 svg = div.append("svg") // svg is global
398 .attr("width", mapWidth)
399 .attr("height", mapHeight);
400
401 var g = svg.selectAll(".b")
402 .data(data.words)
403 .enter().append("g")
404 .attr("class", "tsnet");
405
406 g.append("a")
407 .attr("xlink:href", function(word) {
408 return (data.urlprefix+word);})
409 .attr("class", function(d, i) {
410 var res="";
411 if(data.marked[i]) {
412 res="marked ";
413 }
414 if(data.target.indexOf(" "+d+" ") >= 0) {
415 return res+"target";
416 } else if(data.ranks[i] < data.mergedEnd) {
417 return res+"merged";
418 } else {
419 return res;
420 }
421 })
422 .attr("title", function(d, i) {
423 if(data.mergedEnd > 0) {
424 if(data.ranks[i] >= data.mergedEnd) {
425 return "rank: "+i +" "+"freq. rank: "+(data.ranks[i]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
426 } else {
427 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") + " (merged vocab)";
428 }
429 } else {
430 return "rank: "+i +" "+"freq. rank: "+data.ranks[i].toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
431 }
432 })
433 .append("text")
434 .attr("text-anchor", "top")
435 .attr("font-size", 12)
436 .text(function(d) { return d; });
437
438 var zoomListener = d3.behavior.zoom()
439 .scaleExtent([0.1, 10])
440 .center([0,0])
441 .on("zoom", zoomHandler);
442 zoomListener(svg);
Marc Kupietz83305222016-04-28 09:57:22 +0200443 }
444
445 var tx=0, ty=0;
446 var ss=1;
447 var iter_id=-1;
448
449 function zoomHandler() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100450 tx = d3.event.translate[0];
451 ty = d3.event.translate[1];
452 ss = d3.event.scale;
453 updateEmbedding();
Marc Kupietz83305222016-04-28 09:57:22 +0200454 }
455
456 var stepnum = 0;
457
458 function stopStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100459 clearInterval(iter_id);
460 text = svg.selectAll("text");
461
462 // jitter function needs different data and co-ordinate representation
463 labels = d3.range(data.words.length).map(function(i) {
464 var x = (T.Y[i][0]*20*ss + tx) + mapWidth/2;
465 var y = (T.Y[i][1]*20*ss + ty) + mapHeight/2;
466 anchor_array.push({x: x, y: y, r: jitterRadius});
467 return {
468 x: x,
469 y: y,
470 name: data.words[i]
471 };
472 });
473
474 // get the actual label bounding boxes for the jitter function
475 var index = 0;
476 text.each(function() {
477 labels[index].width = this.getBBox().width;
478 labels[index].height = this.getBBox().height;
479 index += 1;
480 });
Marc Kupietz83305222016-04-28 09:57:22 +0200481
Marc Kupietz4abcd682017-11-28 20:51:08 +0100482
483 // setTimeout(updateEmbedding, 1);
484 // setTimeout(
485 labeler = d3.labeler()
486 .label(labels)
487 .anchor(anchor_array)
488 .width(mapWidth)
489 .height(mapHeight)
490 .update(applyJitter);
491 // .start(1000);
Marc Kupietz83305222016-04-28 09:57:22 +0200492
Marc Kupietz4abcd682017-11-28 20:51:08 +0100493 iter_id = setInterval(jitterStep, 1);
Marc Kupietz83305222016-04-28 09:57:22 +0200494 }
495
496 var jitter_i=0;
497
498 function jitterStep() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100499 if(jitter_i++ > 100) {
500 clearInterval(iter_id);
501 } else {
502 labeler.start2(10);
503 applyJitter();
504 }
Marc Kupietz83305222016-04-28 09:57:22 +0200505 }
506
507 var last_cost=1000;
508
509 function step() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100510 var i = T.iter;
511
512 if(i > <%= $no_iterations %>) {
513 stopStep();
514 } else {
515 var cost = Math.round(T.step() * 100000) / 100000; // do a few steps
516 $("#cost").html("tsne iteration " + i + ", cost: " + cost.toFixed(5));
517 if(i % 250 == 0 && cost >= last_cost) {
518 stopStep();
519 } else {
520 last_cost = cost;
521 updateEmbedding();
522 }
523 }
Marc Kupietz83305222016-04-28 09:57:22 +0200524 }
525
526 function showMap(j) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100527 data=j;
528 T.iter=0;
529 T.initDataRaw(data.vecs); // init embedding
530 drawEmbedding(); // draw initial embedding
Marc Kupietz78114532017-11-29 17:00:16 +0100531
Marc Kupietz4abcd682017-11-28 20:51:08 +0100532 if(iter_id >= 0) {
533 clearInterval(iter_id);
534 }
535 //T.debugGrad();
536 iter_id = setInterval(step, 1);
Marc Kupietz78114532017-11-29 17:00:16 +0100537 if(true) { // (<%= $show_som %>) {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100538 makeSOM(j, <%= $no_iterations %>);
539 }
Marc Kupietz83305222016-04-28 09:57:22 +0200540 }
Marc Kupietz39179ab2017-07-04 16:28:06 +0200541 var queryword;
542
543 function onload() {
Marc Kupietz4abcd682017-11-28 20:51:08 +0100544 queryword = document.getElementById('word');
Marc Kupietz39179ab2017-07-04 16:28:06 +0200545 }
546
547 function queryKorAP() {
548 window.open('http://korap.ids-mannheim.de/kalamar/?q='+queryword.value, 'KorAP');
549 }
Marc Kupietz4dc270c2017-11-24 10:17:12 +0100550
551 function queryKorAPCII(query) {
552 window.open('http://korap.ids-mannheim.de/kalamar/?ql=cosmas2&q='+query, 'KorAP');
553 }
Marc Kupietz83305222016-04-28 09:57:22 +0200554 </script>
555 </head>
Marc Kupietz39179ab2017-07-04 16:28:06 +0200556 <body onload="onload()">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100557 <div id="topwrapper">
558 <div id="options" class="widget">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100559 <form id="queryform">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100560 <input id="word" type="text" name="word" size="20" placeholder="Word(s) to be searched" value="<%= $word %>"
561 title="When looking for multiple words use spaces as separators to search around the average vector and | as separator to get the neighbours for each word."/>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100562 <input id="SEARCH" type="button" value="SEARCH">
563 <input type="button" id="showoptions" name="showoptions" value="Options" />
564 </form>
565 <div id="dropdownoptions" style="display: hidden">
566 <form id="optionsform">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100567 <div class="controlgroup-vertical">
568 <label for="cutoff">cut-off</label>
569 <input id="cutoff" type="text" name="cutoff" size="10" value="<%= $cutoff %>" title="Only consider the most frequent x word forms.">
570 <label for="dedupe">dedupe</label>
571 <input id="dedupe" type="checkbox" name="dedupe" value="1" <%= ($dedupe ? "checked" : "") %> title="radically filter out any near-duplicates">
572 % if($mergedEnd > 0) {
Marc Kupietz78114532017-11-29 17:00:16 +0100573 <label for="sbf">backw.</label>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100574 <input id="sbf" type="checkbox" name="sbf" value="1" <%= ($searchBaseVocabFirst ? "checked" : "") %> title="If checkecked base vocabulary will be searched first. Otherwise merged vocabulray will be searched first.">
575 % }
576 <label for="neighbours">max. neighbours:</label>
577 <input id="neighbours" size="4" name="n" value="<%= $no_nbs %>">
Marc Kupietzc1e42c32017-11-29 16:47:56 +0100578 <label for="no_iterations">max. iterations</label>
579 <input id="no_iterations" name="N" size="4" value="<%= $no_iterations %>">
Marc Kupietz78114532017-11-29 17:00:16 +0100580 <!-- <label for="dosom">SOM</label>
581 <input id="dosom" type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>> -->
Marc Kupietz4abcd682017-11-28 20:51:08 +0100582 % if($collocators) {
583 <label for="sortby">window/sort</label>
584 <select id="sortby" name="sort">
585 <option value="0" <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
586 <option value="1" <%= ($sort==1? "selected":"") %>>any single position</option>
587 <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option>
588 </select>
589 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100590 <input type="button" value="→ KorAP" onclick="queryKorAP();" title="query word with KorAP"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100591 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100592 </form>
593 </div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100594 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100595 <div id="tabs">
596 <ul>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100597 <li><a href="#tabs-1">Semantics (TSNE-map)</a></li>
598 <li><a href="#tabs-2">Semantics (SOM)</a></li>
599 <li><a href="#tabs-3">Syntagmatic (collocators)</a></li>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100600 </ul>
601 <div id="tabs-1">
Marc Kupietzdab9f222017-11-29 14:22:59 +0100602 % if($lists && (@$lists) > 0 && (@$lists)[0]) {
603 <div id="wrapper">
604 <div id="first" style="width:220px">
605 <table class="display compact nowrap" id="firsttable">
606 <thead>
607 <tr>
608 <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th>
609 </tr>
610 </thead>
611 <tbody>
612 % my $j=0; my @words; my @vecs; my @ranks; my @marked;
613 % for my $list (@$lists) {
614 % my $i=0; while($list) {
615 % my $item = (@$list)[$i];
616 % my $c = ($collocators? (@$collocators)[$i] : 0);
617 % last if(!$c && !$item);
618 <tr>
619 <td align="right">
620 <%= ++$i %>.
621 </td>
622 % if($item) {
623 % if(!grep{$_ eq $item->{word}} @words) {
624 % push @vecs, $item->{vector};
625 % push @words, $item->{word};
626 % push @ranks, $item->{rank};
627 % push @marked, ($marked->{$item->{word}}? 1 : 0);
Marc Kupietz4abcd682017-11-28 20:51:08 +0100628 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100629 <td align="right">
630 <%= sprintf("%.3f", $item->{dist}) %>
631 </td>
632 <td>
633 % my $class = ($marked->{$item->{word}}? "marked " : "");
634 % my $r = $item->{rank};
635 % if($r < $mergedEnd) {
636 % $class .= "merged";
637 % $r .= " (merged vocab)";
638 % } elsif($mergedEnd!=0 && $r > $mergedEnd) {
639 % $r -= $mergedEnd;
640 % }
641 <a class="<%= $class =%>"
642 title="freq. rank: <%= $r =%>"
643 href="<%= url_with->query([word => $item->{word}]) =%>">
644 <%= $item->{word} =%>
645 </a>
646 </td>
647 % } else {
648 <td colspan="2"/>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100649 % }
Marc Kupietzdab9f222017-11-29 14:22:59 +0100650 </tr>
651 % last if($i >= 100);
652 % }
653 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100654 </tbody>
655 </table>
656 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100657 <script>
658 % use Mojo::ByteStream 'b';
659 % my $urlprefix = url_with->query([word=>'']);
660 $(window).load(function() {
661 showMap(<%= b(Mojo::JSON::to_json({target => " $word ", mergedEnd=> $mergedEnd, words => \@words, vecs => \@vecs, ranks => \@ranks, marked => \@marked, urlprefix => $urlprefix})); %>);
662 });
663 </script>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100664 % } else {
665 <div id="wrapper">
666 <p>
667 ERROR: "<%= $word %>" not found in vocabluary.
668 </p>
669 </div>
670 % }
671 <div id="second">
672 <div id="embed">
673 </div>
674 <div id="cost">
675 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100676 </div>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100677 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100678 </div>
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100679 <div id="tabs-2">
680 <div id="som2" style="width: 800; height: 800px">
681 </div>
682 <div id="sominfo1"><span id="somcolor1"> </span> <span id="somword1"> </span> <span id="somcolor2"> </span> <span id="somword2"> </span> <span id="somcolor3"> </span></div>
683 <div id="sominfo">SOM iteration <span id="iterations">0</span></div>
684 </div>
685 <div id="tabs-3">
Marc Kupietz4abcd682017-11-28 20:51:08 +0100686 <div id="second" style="width:500px">
687 <table class="display compact nowrap" id="secondtable">
688 <thead>
689 <tr>
690 % if($collocators) {
Marc Kupietzdab9f222017-11-29 14:22:59 +0100691 <th>#</th>
692 <th align="right" title="The window around the target word that is considered for summation.">w'</th>
693 <th align="right" title="Raw (max.) activation of the collocator in the output layers.">a</th>
694 <th title="Σp(c<sub><small>@</small></sub>) – Sum of the probability approximations that the combination of the target word and the collocator at the relative position @ come from the training corpus. Single approximations can be distorted because of sub-sampling frequent words and the sum cannot itself be interpreted as probability." align="right">Σp</th>
695 <th align="right">Σp/|w|</th>
696 <th title="c" align="left">collocator</th>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100697 % }
698 </tr>
Marc Kupietzdab9f222017-11-29 14:22:59 +0100699 </thead>
700 <tbody>
701 % for(my $i=0; $i < 100; $i++) {
702 % my $c = ($collocators? (@$collocators)[$i] : 0);
703 <tr>
704 <td align="right">
705 <%= $i %>
706 </td>
707 % if($c) {
708 <td align="right">
709 <span class="mono"><%= bitvec2window( $c->{pos} ) %></span>
710 </td>
711 <td align="right">
712 <%= sprintf("%.3f", $c->{dist}) %>
713 </td>
714 <td align="right">
715 <%= sprintf("%.3e", $c->{norm}) %>
716 </td>
717 <td align="right">
718 <%= sprintf("%.3e", $c->{sum}) %>
719 </td>
720 <td align="left">
721 <a onclick="<%= sprintf("queryKorAPCII('%s /w5 %s')", $c->{word}, $word) =%>"
722 title="freq. rank: <%= $c->{rank} =%>">
723 <%= $c->{word} %>
724 </a>
725 </td>
726 % } else {
727 <td colspan="5"/>
728 % }
729 </tr>
730 % }
731 </tbody>
732 </table>
733 </div> <!-- - tab2 -->
Marc Kupietz4fcda0c2017-11-29 09:00:31 +0100734 </div> <!-- tabs -->
735 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100736 </div> <!-- topwrapper -->
737 <div style="clear: both;"></div>
Marc Kupietz0af83e32017-11-27 09:31:37 +0100738 </div>
Marc Kupietz4abcd682017-11-28 20:51:08 +0100739 % if($training_args) {
740 <p>
741 Word vector model trained with <a href="https://code.google.com/p/word2vec/">word2vec</a> using the following parameters: <pre><%= $training_args %></pre>
742 </p>
Marc Kupietz83305222016-04-28 09:57:22 +0200743 % }
Marc Kupietz4abcd682017-11-28 20:51:08 +0100744 </body>
Marc Kupietz83305222016-04-28 09:57:22 +0200745</html>