blob: 75b29b42cd170aa513fcc1000c0be63322125b48 [file] [log] [blame]
Marc Kupietz66bbd2d2022-07-19 17:53:49 +02001#!/usr/bin/env perl
2
Marc Kupietz03e1bdc2023-11-06 15:40:44 +01003our $VERSION = '0.92.1';
Marc Kupietzbf9bac02022-04-11 21:16:47 +02004
Marc Kupietzc82b15f2022-07-19 17:36:27 +02005use IDS::DeReKoVecs::Read;
Marc Kupietzdc22b982015-10-09 09:19:34 +02006use Mojolicious::Lite;
Marc Kupietzc4893362016-02-25 08:04:46 +01007use Mojo::JSON qw(decode_json encode_json to_json);
Marc Kupietz30ca4342017-11-22 21:21:20 +01008use base 'Mojolicious::Plugin';
9
Marc Kupietz247500f2015-10-09 11:29:01 +020010use Encode qw(decode encode);
Marc Kupietza5b90152016-03-15 17:39:19 +010011use Getopt::Std;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020012#use Mojo::Server::Daemon;
Marc Kupietzffef9302017-11-07 15:58:01 +010013use Cwd;
Marc Kupietz66bfd952017-12-11 09:59:45 +010014
Marc Kupietzbf9bac02022-04-11 21:16:47 +020015my $mojo_config = $ENV{MOJO_CONFIG} // '../derekovecs-server.conf';
Marc Kupietzc0d41872021-02-25 16:33:22 +010016plugin Config => {file => $mojo_config};
17
Marc K20476c72021-03-11 12:18:01 +010018my $DEFAULT_VECS = app->config->{w2v}->{vecs} // "../models/dereko-2021-i.vecs";
Marc Kupietz49c5cf02024-08-06 10:30:44 +020019my $KORAP_URL = app->config->{w2v}->{korap_url} // "https://korap.ids-mannheim.de";
20
Marc Kupietzc0d41872021-02-25 16:33:22 +010021my $DEFAULT_NET_NAME = "";
22if ($DEFAULT_VECS=~ /\.vecs/) {
23 $DEFAULT_NET_NAME = $DEFAULT_VECS;
24 $DEFAULT_NET_NAME =~ s/\.vecs/.net/;
25}
Marc Kupietz9ae184c2023-11-04 18:21:54 +010026my $DEFAULT_NET = app->config->{w2v}->{net} // $DEFAULT_NET_NAME;
27my $DOWNTIME_CALENDAR_URL = app->config->{downtime_calendar_url} // '';
Marc Kupietzffef9302017-11-07 15:58:01 +010028app->static->paths->[0] = getcwd;
29
Marc Kupietz1b856fa2019-12-07 23:01:43 +010030plugin 'Piwik';
Marc Kupietz2b8d44a2019-12-09 10:38:16 +010031plugin "RemoteAddr";
Marc Kupietz1b856fa2019-12-07 23:01:43 +010032plugin 'Util::RandomString' => {
33 piwik_rand_id => {
34 alphabet => '0123456789abcdef',
35 length => 16
36 }
37};
38
Marc Kupietzd4227392016-03-01 16:45:12 +010039plugin 'Log::Access';
Marc Kupietzb3422c12017-07-04 14:12:11 +020040plugin "RequestBase";
Marc Kupietz95104512019-12-05 10:13:05 +010041#plugin 'AutoReload';
42plugin Localize => {
43 dict => {
44 _ => sub { $_->locale },
45 },
Marc Kupietzbf9bac02022-04-11 21:16:47 +020046 resources => ['../derekovecs-server.dict']
Marc Kupietz95104512019-12-05 10:13:05 +010047};
Marc Kupietza5b90152016-03-15 17:39:19 +010048our $opt_i = 0; # latin1-input?
49our $opt_l = undef;
Marc Kupietza2e64502016-04-27 09:53:51 +020050our $opt_m;
Marc Kupietz6ed81872016-04-27 14:04:04 +020051our $opt_M;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020052our $opt_n = $DEFAULT_NET;
Marc Kupietz43ee87e2016-04-25 10:50:08 +020053our $opt_d;
Marc Kupietzfa194262018-06-05 09:39:32 +020054our $opt_D;
Marc Kupietzda7b3492023-02-27 20:10:59 +010055our $opt_G;
Marc Kupietzbf9bac02022-04-11 21:16:47 +020056
57our $mergedEnd=0;
58our %cache;
59our %cccache; # classic collocator cache
60our %spcache; # similar profile cache
61our $opt_p = 5676;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020062our $opt_C;
Marc Kupietza5b90152016-03-15 17:39:19 +010063
Marc Kupietz6ed81872016-04-27 14:04:04 +020064my %marked;
Marc Kupietzc053d972019-01-10 10:41:51 +010065my $title="";
Marc Kupietz793413b2016-04-02 21:48:57 +020066my $training_args="";
67
Marc Kupietzc82b15f2022-07-19 17:36:27 +020068getopts('d:D:Gil:p:m:n:M:C') or usage();
69
70sub usage() {
71 print STDERR <<EOF;
72non-server mode usage: MOJO_CONFIG=`pwd`/example.conf $0 [-h] [-d <file>]
73-h : this (help) message
74-d file : dump binary vecs as ascii text to <file>
75
76server-mode invocation:
77
78MOJO_CONFIG=`pwd`/example.conf morbo $0
79EOF
80 exit;
81}
Marc Kupietz6ed81872016-04-27 14:04:04 +020082
83if($opt_M) {
Marc Kupietzed930212016-04-27 15:42:38 +020084 open my $handle, '<:encoding(UTF-8)', $opt_M
85 or die "Can't open '$opt_M' for reading: $!";
86 while(<$handle>) {
Marc Kupietz6ed81872016-04-27 14:04:04 +020087 foreach my $mw (split /\s+/) {
88 $marked{$mw}=1
89 }
90 }
Marc Kupietzed930212016-04-27 15:42:38 +020091 close($handle);
Marc Kupietz6ed81872016-04-27 14:04:04 +020092}
Marc Kupietza5b90152016-03-15 17:39:19 +010093
Marc Kupietze8e3ded2020-07-13 17:53:56 +020094my $vecs_name = (@ARGV > 0 && -r $ARGV[0] ? $ARGV[0] : $DEFAULT_VECS);
95init_net($vecs_name, $opt_n, ($opt_i? 1 : 0), 1);
96if(open(FILE, "$vecs_name.args")) {
97 $training_args = <FILE>;
Marc Kupietz2cb667e2016-03-10 09:44:12 +010098}
Marc Kupietze8e3ded2020-07-13 17:53:56 +020099close(FILE);
100$title = fname2corpusname($vecs_name);
Marc Kupietzdc22b982015-10-09 09:19:34 +0200101
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200102my $have_sprofiles = load_sprofiles($vecs_name);
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100103
Marc Kupietzc0d41872021-02-25 16:33:22 +0100104if (app->config->{w2v}->{merge}) {
105 $opt_m = app->config->{w2v}->{merge};
106}
107
Marc Kupietza2e64502016-04-27 09:53:51 +0200108if($opt_m) {
109 $mergedEnd = mergeVectors($opt_m);
Marc Kupietzc053d972019-01-10 10:41:51 +0100110 $title = "<span class=\"merged\">" . $title . "</span> vs. " . fname2corpusname($opt_m);
Marc Kupietza2e64502016-04-27 09:53:51 +0200111}
112
Marc Kupietze5568a02018-12-20 11:42:02 +0100113
Marc Kupietz43ee87e2016-04-25 10:50:08 +0200114if($opt_d) { # -d: dump vecs and exit
115 dump_vecs($opt_d);
116 exit;
117}
118
Marc Kupietzfa194262018-06-05 09:39:32 +0200119if($opt_D) { # -D: dump vecs for numpy and exit
120 dump_for_numpy($opt_D);
121 exit;
122}
123
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200124#my $daemon = Mojo::Server::Daemon->new(
125# app => app,
126# listen => ['http://'.($opt_l ? $opt_l : '*').":$opt_p"]
127#);
Marc Kupietza5b90152016-03-15 17:39:19 +0100128
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200129if($opt_G) {
130 print "Filtering garbage\n";
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100131 filter_garbage();
Marc Kupietzc0d41872021-02-25 16:33:22 +0100132 print "Finished filtering garbage\n";
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200133}
134
Marc Kupietz554aff52017-11-09 14:42:09 +0100135get '*/js/*' => sub {
Marc Kupietzffef9302017-11-07 15:58:01 +0100136 my $c = shift;
137 my $url = $c->req->url;
Marc K20476c72021-03-11 12:18:01 +0100138 $url =~ s@/derekovecs/@/@g;
Marc Kupietzffef9302017-11-07 15:58:01 +0100139 $c->app->log->info("GET: " . $url);
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100140 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100141} => 'js';
Marc Kupietzffef9302017-11-07 15:58:01 +0100142
Marc Kupietza9270572018-03-17 15:17:07 +0100143get '*/css/*' => sub {
144 my $c = shift;
145 my $url = $c->req->url;
146 $url =~ s@/derekovecs/@/@g;
147 $c->app->log->info("GET: " . $url);
148 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100149} => 'css';
Marc Kupietza9270572018-03-17 15:17:07 +0100150
Marc Kupietzc053d972019-01-10 10:41:51 +0100151sub fname2corpusname {
152 ($_) = @_;
153 s@.*/@@;
Marc Kupietz86b50292019-02-17 21:03:59 +0100154 s@\.en@-en@;
Marc Kupietzc053d972019-01-10 10:41:51 +0100155 s@\..*@@;
156 return $_;
157}
158
Marc Kupietzcb43e492019-12-03 10:07:53 +0100159sub getWord {
160 ($_) = @_;
161 if ($_ =~ /^\d+/) {
162 return $_;
163 } else {
164 return getWordNumber($_);
165 }
166}
167
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100168
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100169post '/derekovecs/getVecsByRanks' => sub {
Marc Kupietz66bfd952017-12-11 09:59:45 +0100170 my $self = shift;
171 my $vec = getVecs($self->req->json);
172 $self->render(json => $vec);
173};
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100174
Marc Kupietzf6080012021-03-12 09:14:42 +0100175any '*/getCollocationAssociation' => sub {
176 my $self = shift;
177 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
178} => 'getCollocationAssociation';
179
180any '/getCollocationAssociation' => sub {
181 my $self = shift;
182 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
183} => 'getCollocationAssociation1';
184
Marc Kupietze13a3552018-01-25 08:48:34 +0100185any '*/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100186 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100187 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100188} => 'getClassicCollocators1';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100189
Marc Kupietze13a3552018-01-25 08:48:34 +0100190any '/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100191 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100192 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100193} => 'getClassicCollocators';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100194
Marc Kupietzd7760b42019-02-21 09:01:44 +0100195any '/getBiggestVocabDistances' => sub {
196 my $self = shift;
197 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100198} => 'getBiggestVocabDistances1';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100199
200any '*/getBiggestVocabDistances' => sub {
201 my $self = shift;
202 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100203} => 'getBiggestVocabDistances';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100204
Marc Kupietz33c79d32019-08-02 15:11:23 +0200205any '*/getPosWiseW2VCollocators' => sub {
206 my $self = shift;
207 $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
208 ($self->param("max")? $self->param("max") : 200),
209 ($self->param("cutoff")? $self->param("cutoff") :750000),
210 ($self->param("threshold")? $self->param("threshold") : 0.2)),
211 format=>'tsv');
212};
213
214any '/getPosWiseW2VCollocators' => sub {
215 my $self = shift;
216 $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
217 ($self->param("max")? $self->param("max") : 200),
218 ($self->param("cutoff")? $self->param("cutoff") : 750000),
219 ($self->param("threshold")? $self->param("threshold") : 0.2)),
220 format=>'tsv');
221};
222
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100223any '*/getSimilarProfiles' => sub {
224 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100225 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100226};
227
Marc Kupietzc987fa82018-03-21 12:14:25 +0100228any '/getSimilarProfiles' => sub {
229 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100230 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietzc987fa82018-03-21 12:14:25 +0100231};
232
Marc Kupietz9f301572020-04-06 18:29:16 +0200233any '*/getWord' => sub {
234 my $self = shift;
235 my $w = $self->param("w");
236 my $rank = getWord($w);
237 my $status = 200;
238 if ($rank <= 0) {
239 $rank = -1;
240 $status = 404;
241 }
242 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
243};
244
245any '/getWord' => sub {
246 my $self = shift;
247 my $w = $self->param("w");
248 my $rank = getWord($w);
249 my $status = 200;
250 if ($rank <= 0) {
251 $rank = -1;
252 $status = 404;
253 }
254 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
255};
256
Marc Kupietz98ed1c02019-08-02 15:05:37 +0200257any '/getSimilarity' => sub {
258 my $self = shift;
259 my $w1 = $self->param("w1");
260 my $w2 = $self->param("w2");
261 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
262};
263
264any '*/getSimilarity' => sub {
265 my $self = shift;
266 my $w1 = $self->param("w1");
267 my $w2 = $self->param("w2");
268 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
269};
270
Marc Kupietz3576c622023-11-05 08:51:58 +0100271any '*/getDowntimeCalendar' => sub {
272 my $self = shift;
273 my $calendar = getDowntimeCalendar($DOWNTIME_CALENDAR_URL);
274 $self->render(data => $calendar, format=>'text');
275};
276
277any '/getDowntimeCalendar' => sub {
278 my $self = shift;
279 my $calendar = getDowntimeCalendar($DOWNTIME_CALENDAR_URL);
280 $self->render(data => $calendar, format=>'text');
281};
282
Marc Kupietzb36bc742023-11-05 17:46:11 +0100283any '/getVocabSize' => sub {
284 my $self = shift;
285 $self->render(data => getVocabSize(), format=>'json');
286};
287
288any '*/getVocabSize' => sub {
289 my $self = shift;
290 $self->render(data => getVocabSize(), format=>'json');
291};
292
Marc Kupietzaf708c22023-11-05 11:20:20 +0100293any '/getVersion' => sub {
294 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100295 $self->render(data => to_json($VERSION), format=>'json');
Marc Kupietzaf708c22023-11-05 11:20:20 +0100296};
297
298any '*/getVersion' => sub {
299 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100300 $self->render(data => to_json($VERSION), format=>'json');
Marc Kupietzaf708c22023-11-05 11:20:20 +0100301};
302
Marc Kupietzdea505e2023-11-05 11:42:36 +0100303any '/getModelName' => sub {
304 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100305 $self->render(data => to_json($title), format=>'json');
Marc Kupietzdea505e2023-11-05 11:42:36 +0100306};
307
308any '*/getModelName' => sub {
309 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100310 $self->render(data => to_json($title), format=>'json');
Marc Kupietzdea505e2023-11-05 11:42:36 +0100311};
312
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100313get '*/img/*' => sub {
314 my $c = shift;
315 my $url = $c->req->url;
316 $url =~ s@/derekovecs@@g;
317 $c->app->log->info("GET: " . $url);
318 $c->reply->static($url);
319};
320
Marc Kupietzdc22b982015-10-09 09:19:34 +0200321get '/' => sub {
322 my $c = shift;
Marc Kupietza5f60042017-05-04 10:38:12 +0200323 $c->app->log->info("get: ".$c->req->url->to_abs);
Marc Kupietzdc22b982015-10-09 09:19:34 +0200324 my $word=$c->param('word');
Marc Kupietz2da2a812019-02-21 14:17:35 +0100325 my $no_nbs=$c->param('n') || ($opt_m? 50 : 100);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100326 my $no_iterations=$c->param('N') || 2000;
Marc Kupietzd4227392016-03-01 16:45:12 +0100327 my $perplexity=$c->param('perplexity') || 20;
Marc Kupietzc4d62f82016-03-01 11:04:24 +0100328 my $epsilon=$c->param('epsilon') || 5;
Marc Kupietzd7aea722016-03-02 11:59:12 +0100329 my $som=$c->param('som') || 0;
Marc Kupietza2e64502016-04-27 09:53:51 +0200330 my $searchBaseVocabFirst=$c->param('sbf') || 0;
Marc Kupietz6d9a6782016-03-23 17:25:25 +0100331 my $sort=$c->param('sort') || 0;
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100332 my $csv=$c->param('csv') || 0;
Marc Kupietzb613b052016-04-28 14:11:59 +0200333 my $json=$c->param('json') || 0;
Marc Kupietzdb2dc7e2017-12-02 12:04:03 +0100334 my $cutoff=$c->param('cutoff') || 500000;
Marc Kupietzd91212f2017-11-13 10:05:09 +0100335 my $dedupe=$c->param('dedupe') || 0;
Marc Kupietzac707b32018-12-20 11:36:38 +0100336 my $nosp=$c->param('nosp') || 0;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100337 my $res;
Marc Kupietz7b2cbeb2016-02-25 11:22:00 +0100338 my @lists;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100339 my @collocations;
Marc Kupietzcddc8482019-12-04 08:57:33 +0100340 if(defined($word) && $word !~ /^\s*$/) {
Marc Kupietz7bc85fd2016-02-24 11:42:41 +0100341 $c->inactivity_timeout(300);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100342 $word =~ s/\s+/ /g;
Marc Kupietz3082fd02019-01-09 14:54:06 +0100343 if($opt_m && $word !~ /\|/) {
344 $word .= "|$word";
345 }
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100346 for my $w (split(' *\| *', $word)) {
Marc Kupietz3082fd02019-01-09 14:54:06 +0100347 if($opt_m) {
348 if($searchBaseVocabFirst) {
349 $searchBaseVocabFirst=0;
350 } else {
351 $searchBaseVocabFirst=1;
352 }
353 }
354 if ($cache{$w.$cutoff.$no_nbs.$sort.$dedupe,$searchBaseVocabFirst}) {
Marc Kupietz15987412017-11-07 15:56:58 +0100355 $c->app->log->info("Getting $w results from cache");
Marc Kupietz3082fd02019-01-09 14:54:06 +0100356 $res = $cache{$w.$cutoff.$no_nbs.$sort.$dedupe.$searchBaseVocabFirst}
Marc Kupietza5b90152016-03-15 17:39:19 +0100357 } else {
Marc Kupietz15987412017-11-07 15:56:58 +0100358 $c->app->log->info('Looking for neighbours of '.$w);
359 if($opt_i) {
Marc Kupietzac707b32018-12-20 11:36:38 +0100360 $res = get_neighbours(encode("iso-8859-1", $w), $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100361 } else {
Marc Kupietzac707b32018-12-20 11:36:38 +0100362 $res = get_neighbours($w, $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100363 }
Marc Kupietz2dd2dd72017-12-01 22:08:14 +0100364 $cache{$w.$cutoff.$no_nbs.$sort.$dedupe} = $res;
Marc Kupietza5b90152016-03-15 17:39:19 +0100365 }
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100366 push(@lists, $res->{paradigmatic});
Marc Kupietz15987412017-11-07 15:56:58 +0100367 }
368 }
Marc Kupietz56844a22019-08-02 15:12:19 +0200369
Marc Kupietz000ad862016-02-26 14:59:12 +0100370 $word =~ s/ *\| */ | /g;
Marc Kupietzb613b052016-04-28 14:11:59 +0200371 if($json) {
372 return $c->render(json => {word => $word, list => \@lists, collocators=>$res->{syntagmatic}});
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100373 } elsif($csv) {
374 my $csv_data="";
375 for (my $i=0; $i <= $no_nbs; $i++) {
376 $csv_data .= $res->{paradigmatic}->[$i]->{word} . ", ";
377 }
378 for (my $i=0; $i < $no_nbs; $i++) {
379 $csv_data .= $res->{syntagmatic}->[$i]->{word} . ", ";
380 }
381 chop $csv_data;
382 chop $csv_data;
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100383 $csv_data .= "\n";
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100384 return $c->render(text=>$csv_data);
Marc Kupietzb613b052016-04-28 14:11:59 +0200385 } else {
Marc Kupietzd7760b42019-02-21 09:01:44 +0100386 my $distantWords="";
387 if(!defined($word) || $word !~ /^\s*$/) {
388 $distantWords = getBiggestMergedDifferences();
389 }
Marc Kupietz9ae184c2023-11-04 18:21:54 +0100390 $c->render(
391 template => "index",
392 title => $title,
393 word => $word,
394 distantWords => $distantWords,
395 cutoff => $cutoff,
396 no_nbs => $no_nbs,
397 no_iterations => $no_iterations,
398 epsilon => $epsilon,
399 perplexity => $perplexity,
400 show_som => $som,
401 searchBaseVocabFirst => $searchBaseVocabFirst,
402 sort => $sort,
403 training_args => $training_args,
404 mergedEnd => $mergedEnd,
405 haveSProfiles => $have_sprofiles,
406 dedupe => $dedupe,
407 marked => \%marked,
408 lists => \@lists,
Marc Kupietzeacc63f2023-11-06 15:39:13 +0100409 collocators => $res->{syntagmatic},
410 version => $VERSION,
Marc Kupietz49c5cf02024-08-06 10:30:44 +0200411 korap_url => $KORAP_URL,
Marc Kupietz9ae184c2023-11-04 18:21:54 +0100412 );
Marc Kupietzb613b052016-04-28 14:11:59 +0200413 }
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100414} => "paradigmaticAndSyntagmaticNbs";
Marc Kupietzdc22b982015-10-09 09:19:34 +0200415
Marc Kupietz30ca4342017-11-22 21:21:20 +0100416helper(bitvec2window => sub {
417 my ($self, $n) = @_;
418 my $str = unpack("B32", pack("N", $n));
419 $str =~ s/^\d{22}//;
420 $str =~ s/^(\d{5})/$1x/;
421 $str =~ s/0/ยท/g;
422 $str =~ s/1/+/g;
423 return $str;
424 });
425
Marc Kupietz6017daf2022-05-19 09:22:49 +0200426if(app->config->{Piwik} && app->config->{Piwik}->{url}) {
Marc Kupietz3e3e3262022-04-12 23:11:45 +0200427 hook(
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100428 after_render => sub {
429 my $c = shift;
430
431 # Only track valid routes
432 my $route = $c->current_route or return;
433
434 # This won't forward personalized information
435 my $hash = {
Marc Kupietz251de9f2020-01-14 16:12:05 +0100436 action_url => $c->req->url->to_abs,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100437 action_name => $route,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100438 ua => $c->req->headers->user_agent,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100439 urlref => '',
440 send_image => 0,
441 dnt => 0,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100442 cip => $c->remote_addr,
443 lang => $c->req->headers->accept_language,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100444 uid => $c->random_string('piwik_rand_id')
445 };
Marc Kupietz251de9f2020-01-14 16:12:05 +0100446 # $c->app->log->info("PIWIK: counting " . $hash->{action_url} . "\nremote:" . $c->remote_addr);
447 # $c->app->log->info("PIWIK: tag " . $c->piwik_tag);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100448
449 # Send track
450 $c->piwik->api_p(Track => $hash)->wait;
Marc Kupietz251de9f2020-01-14 16:12:05 +0100451
452 # $c->app->log->info("PIWIK: counted.");
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100453 }
Marc Kupietz3e3e3262022-04-12 23:11:45 +0200454 );
455}
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100456
Marc Kupietzbf9bac02022-04-11 21:16:47 +0200457app->renderer->paths([app->home->rel_file('../templates')]);
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200458app->start;
459#$daemon->run;
Marc Kupietz95104512019-12-05 10:13:05 +0100460# app->start;
Marc Kupietzdc22b982015-10-09 09:19:34 +0200461
Marc Kupietz95104512019-12-05 10:13:05 +0100462# exit;