blob: dfabae7a5f0c5497910c46a7eb8277579d27ae18 [file] [log] [blame]
Marc Kupietz66bbd2d2022-07-19 17:53:49 +02001#!/usr/bin/env perl
2
Marc Kupietz03e1bdc2023-11-06 15:40:44 +01003our $VERSION = '0.92.1';
Marc Kupietzbf9bac02022-04-11 21:16:47 +02004
Marc Kupietzc82b15f2022-07-19 17:36:27 +02005use IDS::DeReKoVecs::Read;
Marc Kupietzdc22b982015-10-09 09:19:34 +02006use Mojolicious::Lite;
Marc Kupietzc4893362016-02-25 08:04:46 +01007use Mojo::JSON qw(decode_json encode_json to_json);
Marc Kupietz30ca4342017-11-22 21:21:20 +01008use base 'Mojolicious::Plugin';
9
Marc Kupietz247500f2015-10-09 11:29:01 +020010use Encode qw(decode encode);
Marc Kupietza5b90152016-03-15 17:39:19 +010011use Getopt::Std;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020012#use Mojo::Server::Daemon;
Marc Kupietzffef9302017-11-07 15:58:01 +010013use Cwd;
Marc Kupietz66bfd952017-12-11 09:59:45 +010014
Marc Kupietzbf9bac02022-04-11 21:16:47 +020015my $mojo_config = $ENV{MOJO_CONFIG} // '../derekovecs-server.conf';
Marc Kupietzc0d41872021-02-25 16:33:22 +010016plugin Config => {file => $mojo_config};
17
Marc K20476c72021-03-11 12:18:01 +010018my $DEFAULT_VECS = app->config->{w2v}->{vecs} // "../models/dereko-2021-i.vecs";
Marc Kupietzc0d41872021-02-25 16:33:22 +010019my $DEFAULT_NET_NAME = "";
20if ($DEFAULT_VECS=~ /\.vecs/) {
21 $DEFAULT_NET_NAME = $DEFAULT_VECS;
22 $DEFAULT_NET_NAME =~ s/\.vecs/.net/;
23}
Marc Kupietz9ae184c2023-11-04 18:21:54 +010024my $DEFAULT_NET = app->config->{w2v}->{net} // $DEFAULT_NET_NAME;
25my $DOWNTIME_CALENDAR_URL = app->config->{downtime_calendar_url} // '';
Marc Kupietzffef9302017-11-07 15:58:01 +010026app->static->paths->[0] = getcwd;
27
Marc Kupietz1b856fa2019-12-07 23:01:43 +010028plugin 'Piwik';
Marc Kupietz2b8d44a2019-12-09 10:38:16 +010029plugin "RemoteAddr";
Marc Kupietz1b856fa2019-12-07 23:01:43 +010030plugin 'Util::RandomString' => {
31 piwik_rand_id => {
32 alphabet => '0123456789abcdef',
33 length => 16
34 }
35};
36
Marc Kupietzd4227392016-03-01 16:45:12 +010037plugin 'Log::Access';
Marc Kupietzb3422c12017-07-04 14:12:11 +020038plugin "RequestBase";
Marc Kupietz95104512019-12-05 10:13:05 +010039#plugin 'AutoReload';
40plugin Localize => {
41 dict => {
42 _ => sub { $_->locale },
43 },
Marc Kupietzbf9bac02022-04-11 21:16:47 +020044 resources => ['../derekovecs-server.dict']
Marc Kupietz95104512019-12-05 10:13:05 +010045};
Marc Kupietza5b90152016-03-15 17:39:19 +010046our $opt_i = 0; # latin1-input?
47our $opt_l = undef;
Marc Kupietza2e64502016-04-27 09:53:51 +020048our $opt_m;
Marc Kupietz6ed81872016-04-27 14:04:04 +020049our $opt_M;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020050our $opt_n = $DEFAULT_NET;
Marc Kupietz43ee87e2016-04-25 10:50:08 +020051our $opt_d;
Marc Kupietzfa194262018-06-05 09:39:32 +020052our $opt_D;
Marc Kupietzda7b3492023-02-27 20:10:59 +010053our $opt_G;
Marc Kupietzbf9bac02022-04-11 21:16:47 +020054
55our $mergedEnd=0;
56our %cache;
57our %cccache; # classic collocator cache
58our %spcache; # similar profile cache
59our $opt_p = 5676;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020060our $opt_C;
Marc Kupietza5b90152016-03-15 17:39:19 +010061
Marc Kupietz6ed81872016-04-27 14:04:04 +020062my %marked;
Marc Kupietzc053d972019-01-10 10:41:51 +010063my $title="";
Marc Kupietz793413b2016-04-02 21:48:57 +020064my $training_args="";
65
Marc Kupietzc82b15f2022-07-19 17:36:27 +020066getopts('d:D:Gil:p:m:n:M:C') or usage();
67
68sub usage() {
69 print STDERR <<EOF;
70non-server mode usage: MOJO_CONFIG=`pwd`/example.conf $0 [-h] [-d <file>]
71-h : this (help) message
72-d file : dump binary vecs as ascii text to <file>
73
74server-mode invocation:
75
76MOJO_CONFIG=`pwd`/example.conf morbo $0
77EOF
78 exit;
79}
Marc Kupietz6ed81872016-04-27 14:04:04 +020080
81if($opt_M) {
Marc Kupietzed930212016-04-27 15:42:38 +020082 open my $handle, '<:encoding(UTF-8)', $opt_M
83 or die "Can't open '$opt_M' for reading: $!";
84 while(<$handle>) {
Marc Kupietz6ed81872016-04-27 14:04:04 +020085 foreach my $mw (split /\s+/) {
86 $marked{$mw}=1
87 }
88 }
Marc Kupietzed930212016-04-27 15:42:38 +020089 close($handle);
Marc Kupietz6ed81872016-04-27 14:04:04 +020090}
Marc Kupietza5b90152016-03-15 17:39:19 +010091
Marc Kupietze8e3ded2020-07-13 17:53:56 +020092my $vecs_name = (@ARGV > 0 && -r $ARGV[0] ? $ARGV[0] : $DEFAULT_VECS);
93init_net($vecs_name, $opt_n, ($opt_i? 1 : 0), 1);
94if(open(FILE, "$vecs_name.args")) {
95 $training_args = <FILE>;
Marc Kupietz2cb667e2016-03-10 09:44:12 +010096}
Marc Kupietze8e3ded2020-07-13 17:53:56 +020097close(FILE);
98$title = fname2corpusname($vecs_name);
Marc Kupietzdc22b982015-10-09 09:19:34 +020099
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200100my $have_sprofiles = load_sprofiles($vecs_name);
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100101
Marc Kupietzc0d41872021-02-25 16:33:22 +0100102if (app->config->{w2v}->{merge}) {
103 $opt_m = app->config->{w2v}->{merge};
104}
105
Marc Kupietza2e64502016-04-27 09:53:51 +0200106if($opt_m) {
107 $mergedEnd = mergeVectors($opt_m);
Marc Kupietzc053d972019-01-10 10:41:51 +0100108 $title = "<span class=\"merged\">" . $title . "</span> vs. " . fname2corpusname($opt_m);
Marc Kupietza2e64502016-04-27 09:53:51 +0200109}
110
Marc Kupietze5568a02018-12-20 11:42:02 +0100111
Marc Kupietz43ee87e2016-04-25 10:50:08 +0200112if($opt_d) { # -d: dump vecs and exit
113 dump_vecs($opt_d);
114 exit;
115}
116
Marc Kupietzfa194262018-06-05 09:39:32 +0200117if($opt_D) { # -D: dump vecs for numpy and exit
118 dump_for_numpy($opt_D);
119 exit;
120}
121
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200122#my $daemon = Mojo::Server::Daemon->new(
123# app => app,
124# listen => ['http://'.($opt_l ? $opt_l : '*').":$opt_p"]
125#);
Marc Kupietza5b90152016-03-15 17:39:19 +0100126
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200127if($opt_G) {
128 print "Filtering garbage\n";
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100129 filter_garbage();
Marc Kupietzc0d41872021-02-25 16:33:22 +0100130 print "Finished filtering garbage\n";
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200131}
132
Marc Kupietz554aff52017-11-09 14:42:09 +0100133get '*/js/*' => sub {
Marc Kupietzffef9302017-11-07 15:58:01 +0100134 my $c = shift;
135 my $url = $c->req->url;
Marc K20476c72021-03-11 12:18:01 +0100136 $url =~ s@/derekovecs/@/@g;
Marc Kupietzffef9302017-11-07 15:58:01 +0100137 $c->app->log->info("GET: " . $url);
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100138 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100139} => 'js';
Marc Kupietzffef9302017-11-07 15:58:01 +0100140
Marc Kupietza9270572018-03-17 15:17:07 +0100141get '*/css/*' => sub {
142 my $c = shift;
143 my $url = $c->req->url;
144 $url =~ s@/derekovecs/@/@g;
145 $c->app->log->info("GET: " . $url);
146 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100147} => 'css';
Marc Kupietza9270572018-03-17 15:17:07 +0100148
Marc Kupietzc053d972019-01-10 10:41:51 +0100149sub fname2corpusname {
150 ($_) = @_;
151 s@.*/@@;
Marc Kupietz86b50292019-02-17 21:03:59 +0100152 s@\.en@-en@;
Marc Kupietzc053d972019-01-10 10:41:51 +0100153 s@\..*@@;
154 return $_;
155}
156
Marc Kupietzcb43e492019-12-03 10:07:53 +0100157sub getWord {
158 ($_) = @_;
159 if ($_ =~ /^\d+/) {
160 return $_;
161 } else {
162 return getWordNumber($_);
163 }
164}
165
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100166
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100167post '/derekovecs/getVecsByRanks' => sub {
Marc Kupietz66bfd952017-12-11 09:59:45 +0100168 my $self = shift;
169 my $vec = getVecs($self->req->json);
170 $self->render(json => $vec);
171};
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100172
Marc Kupietzf6080012021-03-12 09:14:42 +0100173any '*/getCollocationAssociation' => sub {
174 my $self = shift;
175 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
176} => 'getCollocationAssociation';
177
178any '/getCollocationAssociation' => sub {
179 my $self = shift;
180 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
181} => 'getCollocationAssociation1';
182
Marc Kupietze13a3552018-01-25 08:48:34 +0100183any '*/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100184 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100185 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100186} => 'getClassicCollocators1';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100187
Marc Kupietze13a3552018-01-25 08:48:34 +0100188any '/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100189 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100190 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100191} => 'getClassicCollocators';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100192
Marc Kupietzd7760b42019-02-21 09:01:44 +0100193any '/getBiggestVocabDistances' => sub {
194 my $self = shift;
195 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100196} => 'getBiggestVocabDistances1';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100197
198any '*/getBiggestVocabDistances' => sub {
199 my $self = shift;
200 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100201} => 'getBiggestVocabDistances';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100202
Marc Kupietz33c79d32019-08-02 15:11:23 +0200203any '*/getPosWiseW2VCollocators' => sub {
204 my $self = shift;
205 $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
206 ($self->param("max")? $self->param("max") : 200),
207 ($self->param("cutoff")? $self->param("cutoff") :750000),
208 ($self->param("threshold")? $self->param("threshold") : 0.2)),
209 format=>'tsv');
210};
211
212any '/getPosWiseW2VCollocators' => sub {
213 my $self = shift;
214 $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
215 ($self->param("max")? $self->param("max") : 200),
216 ($self->param("cutoff")? $self->param("cutoff") : 750000),
217 ($self->param("threshold")? $self->param("threshold") : 0.2)),
218 format=>'tsv');
219};
220
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100221any '*/getSimilarProfiles' => sub {
222 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100223 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100224};
225
Marc Kupietzc987fa82018-03-21 12:14:25 +0100226any '/getSimilarProfiles' => sub {
227 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100228 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietzc987fa82018-03-21 12:14:25 +0100229};
230
Marc Kupietz9f301572020-04-06 18:29:16 +0200231any '*/getWord' => sub {
232 my $self = shift;
233 my $w = $self->param("w");
234 my $rank = getWord($w);
235 my $status = 200;
236 if ($rank <= 0) {
237 $rank = -1;
238 $status = 404;
239 }
240 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
241};
242
243any '/getWord' => sub {
244 my $self = shift;
245 my $w = $self->param("w");
246 my $rank = getWord($w);
247 my $status = 200;
248 if ($rank <= 0) {
249 $rank = -1;
250 $status = 404;
251 }
252 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
253};
254
Marc Kupietz98ed1c02019-08-02 15:05:37 +0200255any '/getSimilarity' => sub {
256 my $self = shift;
257 my $w1 = $self->param("w1");
258 my $w2 = $self->param("w2");
259 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
260};
261
262any '*/getSimilarity' => sub {
263 my $self = shift;
264 my $w1 = $self->param("w1");
265 my $w2 = $self->param("w2");
266 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
267};
268
Marc Kupietz3576c622023-11-05 08:51:58 +0100269any '*/getDowntimeCalendar' => sub {
270 my $self = shift;
271 my $calendar = getDowntimeCalendar($DOWNTIME_CALENDAR_URL);
272 $self->render(data => $calendar, format=>'text');
273};
274
275any '/getDowntimeCalendar' => sub {
276 my $self = shift;
277 my $calendar = getDowntimeCalendar($DOWNTIME_CALENDAR_URL);
278 $self->render(data => $calendar, format=>'text');
279};
280
Marc Kupietzb36bc742023-11-05 17:46:11 +0100281any '/getVocabSize' => sub {
282 my $self = shift;
283 $self->render(data => getVocabSize(), format=>'json');
284};
285
286any '*/getVocabSize' => sub {
287 my $self = shift;
288 $self->render(data => getVocabSize(), format=>'json');
289};
290
Marc Kupietzaf708c22023-11-05 11:20:20 +0100291any '/getVersion' => sub {
292 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100293 $self->render(data => to_json($VERSION), format=>'json');
Marc Kupietzaf708c22023-11-05 11:20:20 +0100294};
295
296any '*/getVersion' => sub {
297 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100298 $self->render(data => to_json($VERSION), format=>'json');
Marc Kupietzaf708c22023-11-05 11:20:20 +0100299};
300
Marc Kupietzdea505e2023-11-05 11:42:36 +0100301any '/getModelName' => sub {
302 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100303 $self->render(data => to_json($title), format=>'json');
Marc Kupietzdea505e2023-11-05 11:42:36 +0100304};
305
306any '*/getModelName' => sub {
307 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100308 $self->render(data => to_json($title), format=>'json');
Marc Kupietzdea505e2023-11-05 11:42:36 +0100309};
310
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100311get '*/img/*' => sub {
312 my $c = shift;
313 my $url = $c->req->url;
314 $url =~ s@/derekovecs@@g;
315 $c->app->log->info("GET: " . $url);
316 $c->reply->static($url);
317};
318
Marc Kupietzdc22b982015-10-09 09:19:34 +0200319get '/' => sub {
320 my $c = shift;
Marc Kupietza5f60042017-05-04 10:38:12 +0200321 $c->app->log->info("get: ".$c->req->url->to_abs);
Marc Kupietzdc22b982015-10-09 09:19:34 +0200322 my $word=$c->param('word');
Marc Kupietz2da2a812019-02-21 14:17:35 +0100323 my $no_nbs=$c->param('n') || ($opt_m? 50 : 100);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100324 my $no_iterations=$c->param('N') || 2000;
Marc Kupietzd4227392016-03-01 16:45:12 +0100325 my $perplexity=$c->param('perplexity') || 20;
Marc Kupietzc4d62f82016-03-01 11:04:24 +0100326 my $epsilon=$c->param('epsilon') || 5;
Marc Kupietzd7aea722016-03-02 11:59:12 +0100327 my $som=$c->param('som') || 0;
Marc Kupietza2e64502016-04-27 09:53:51 +0200328 my $searchBaseVocabFirst=$c->param('sbf') || 0;
Marc Kupietz6d9a6782016-03-23 17:25:25 +0100329 my $sort=$c->param('sort') || 0;
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100330 my $csv=$c->param('csv') || 0;
Marc Kupietzb613b052016-04-28 14:11:59 +0200331 my $json=$c->param('json') || 0;
Marc Kupietzdb2dc7e2017-12-02 12:04:03 +0100332 my $cutoff=$c->param('cutoff') || 500000;
Marc Kupietzd91212f2017-11-13 10:05:09 +0100333 my $dedupe=$c->param('dedupe') || 0;
Marc Kupietzac707b32018-12-20 11:36:38 +0100334 my $nosp=$c->param('nosp') || 0;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100335 my $res;
Marc Kupietz7b2cbeb2016-02-25 11:22:00 +0100336 my @lists;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100337 my @collocations;
Marc Kupietzcddc8482019-12-04 08:57:33 +0100338 if(defined($word) && $word !~ /^\s*$/) {
Marc Kupietz7bc85fd2016-02-24 11:42:41 +0100339 $c->inactivity_timeout(300);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100340 $word =~ s/\s+/ /g;
Marc Kupietz3082fd02019-01-09 14:54:06 +0100341 if($opt_m && $word !~ /\|/) {
342 $word .= "|$word";
343 }
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100344 for my $w (split(' *\| *', $word)) {
Marc Kupietz3082fd02019-01-09 14:54:06 +0100345 if($opt_m) {
346 if($searchBaseVocabFirst) {
347 $searchBaseVocabFirst=0;
348 } else {
349 $searchBaseVocabFirst=1;
350 }
351 }
352 if ($cache{$w.$cutoff.$no_nbs.$sort.$dedupe,$searchBaseVocabFirst}) {
Marc Kupietz15987412017-11-07 15:56:58 +0100353 $c->app->log->info("Getting $w results from cache");
Marc Kupietz3082fd02019-01-09 14:54:06 +0100354 $res = $cache{$w.$cutoff.$no_nbs.$sort.$dedupe.$searchBaseVocabFirst}
Marc Kupietza5b90152016-03-15 17:39:19 +0100355 } else {
Marc Kupietz15987412017-11-07 15:56:58 +0100356 $c->app->log->info('Looking for neighbours of '.$w);
357 if($opt_i) {
Marc Kupietzac707b32018-12-20 11:36:38 +0100358 $res = get_neighbours(encode("iso-8859-1", $w), $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100359 } else {
Marc Kupietzac707b32018-12-20 11:36:38 +0100360 $res = get_neighbours($w, $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100361 }
Marc Kupietz2dd2dd72017-12-01 22:08:14 +0100362 $cache{$w.$cutoff.$no_nbs.$sort.$dedupe} = $res;
Marc Kupietza5b90152016-03-15 17:39:19 +0100363 }
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100364 push(@lists, $res->{paradigmatic});
Marc Kupietz15987412017-11-07 15:56:58 +0100365 }
366 }
Marc Kupietz56844a22019-08-02 15:12:19 +0200367
Marc Kupietz000ad862016-02-26 14:59:12 +0100368 $word =~ s/ *\| */ | /g;
Marc Kupietzb613b052016-04-28 14:11:59 +0200369 if($json) {
370 return $c->render(json => {word => $word, list => \@lists, collocators=>$res->{syntagmatic}});
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100371 } elsif($csv) {
372 my $csv_data="";
373 for (my $i=0; $i <= $no_nbs; $i++) {
374 $csv_data .= $res->{paradigmatic}->[$i]->{word} . ", ";
375 }
376 for (my $i=0; $i < $no_nbs; $i++) {
377 $csv_data .= $res->{syntagmatic}->[$i]->{word} . ", ";
378 }
379 chop $csv_data;
380 chop $csv_data;
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100381 $csv_data .= "\n";
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100382 return $c->render(text=>$csv_data);
Marc Kupietzb613b052016-04-28 14:11:59 +0200383 } else {
Marc Kupietzd7760b42019-02-21 09:01:44 +0100384 my $distantWords="";
385 if(!defined($word) || $word !~ /^\s*$/) {
386 $distantWords = getBiggestMergedDifferences();
387 }
Marc Kupietz9ae184c2023-11-04 18:21:54 +0100388 $c->render(
389 template => "index",
390 title => $title,
391 word => $word,
392 distantWords => $distantWords,
393 cutoff => $cutoff,
394 no_nbs => $no_nbs,
395 no_iterations => $no_iterations,
396 epsilon => $epsilon,
397 perplexity => $perplexity,
398 show_som => $som,
399 searchBaseVocabFirst => $searchBaseVocabFirst,
400 sort => $sort,
401 training_args => $training_args,
402 mergedEnd => $mergedEnd,
403 haveSProfiles => $have_sprofiles,
404 dedupe => $dedupe,
405 marked => \%marked,
406 lists => \@lists,
Marc Kupietzeacc63f2023-11-06 15:39:13 +0100407 collocators => $res->{syntagmatic},
408 version => $VERSION,
Marc Kupietz9ae184c2023-11-04 18:21:54 +0100409 );
Marc Kupietzb613b052016-04-28 14:11:59 +0200410 }
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100411} => "paradigmaticAndSyntagmaticNbs";
Marc Kupietzdc22b982015-10-09 09:19:34 +0200412
Marc Kupietz30ca4342017-11-22 21:21:20 +0100413helper(bitvec2window => sub {
414 my ($self, $n) = @_;
415 my $str = unpack("B32", pack("N", $n));
416 $str =~ s/^\d{22}//;
417 $str =~ s/^(\d{5})/$1x/;
418 $str =~ s/0/ยท/g;
419 $str =~ s/1/+/g;
420 return $str;
421 });
422
Marc Kupietz6017daf2022-05-19 09:22:49 +0200423if(app->config->{Piwik} && app->config->{Piwik}->{url}) {
Marc Kupietz3e3e3262022-04-12 23:11:45 +0200424 hook(
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100425 after_render => sub {
426 my $c = shift;
427
428 # Only track valid routes
429 my $route = $c->current_route or return;
430
431 # This won't forward personalized information
432 my $hash = {
Marc Kupietz251de9f2020-01-14 16:12:05 +0100433 action_url => $c->req->url->to_abs,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100434 action_name => $route,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100435 ua => $c->req->headers->user_agent,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100436 urlref => '',
437 send_image => 0,
438 dnt => 0,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100439 cip => $c->remote_addr,
440 lang => $c->req->headers->accept_language,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100441 uid => $c->random_string('piwik_rand_id')
442 };
Marc Kupietz251de9f2020-01-14 16:12:05 +0100443 # $c->app->log->info("PIWIK: counting " . $hash->{action_url} . "\nremote:" . $c->remote_addr);
444 # $c->app->log->info("PIWIK: tag " . $c->piwik_tag);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100445
446 # Send track
447 $c->piwik->api_p(Track => $hash)->wait;
Marc Kupietz251de9f2020-01-14 16:12:05 +0100448
449 # $c->app->log->info("PIWIK: counted.");
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100450 }
Marc Kupietz3e3e3262022-04-12 23:11:45 +0200451 );
452}
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100453
Marc Kupietzbf9bac02022-04-11 21:16:47 +0200454app->renderer->paths([app->home->rel_file('../templates')]);
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200455app->start;
456#$daemon->run;
Marc Kupietz95104512019-12-05 10:13:05 +0100457# app->start;
Marc Kupietzdc22b982015-10-09 09:19:34 +0200458
Marc Kupietz95104512019-12-05 10:13:05 +0100459# exit;