blob: 193038128d824d6325b09501ee0b1760d47573c2 [file] [log] [blame]
Marc Kupietz66bbd2d2022-07-19 17:53:49 +02001#!/usr/bin/env perl
2
Marc Kupietzb3d425d2024-11-15 16:23:21 +01003our $VERSION = '0.93.2';
Marc Kupietzbf9bac02022-04-11 21:16:47 +02004
Marc Kupietzc82b15f2022-07-19 17:36:27 +02005use IDS::DeReKoVecs::Read;
Marc Kupietzdc22b982015-10-09 09:19:34 +02006use Mojolicious::Lite;
Marc Kupietzc4893362016-02-25 08:04:46 +01007use Mojo::JSON qw(decode_json encode_json to_json);
Marc Kupietz30ca4342017-11-22 21:21:20 +01008use base 'Mojolicious::Plugin';
9
Marc Kupietz247500f2015-10-09 11:29:01 +020010use Encode qw(decode encode);
Marc Kupietza5b90152016-03-15 17:39:19 +010011use Getopt::Std;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020012#use Mojo::Server::Daemon;
Marc Kupietzffef9302017-11-07 15:58:01 +010013use Cwd;
Marc Kupietz66bfd952017-12-11 09:59:45 +010014
Marc Kupietzbf9bac02022-04-11 21:16:47 +020015my $mojo_config = $ENV{MOJO_CONFIG} // '../derekovecs-server.conf';
Marc Kupietzc0d41872021-02-25 16:33:22 +010016plugin Config => {file => $mojo_config};
17
Marc K20476c72021-03-11 12:18:01 +010018my $DEFAULT_VECS = app->config->{w2v}->{vecs} // "../models/dereko-2021-i.vecs";
Marc Kupietz49c5cf02024-08-06 10:30:44 +020019my $KORAP_URL = app->config->{w2v}->{korap_url} // "https://korap.ids-mannheim.de";
20
Marc Kupietzc0d41872021-02-25 16:33:22 +010021my $DEFAULT_NET_NAME = "";
22if ($DEFAULT_VECS=~ /\.vecs/) {
23 $DEFAULT_NET_NAME = $DEFAULT_VECS;
24 $DEFAULT_NET_NAME =~ s/\.vecs/.net/;
25}
Marc Kupietz9ae184c2023-11-04 18:21:54 +010026my $DEFAULT_NET = app->config->{w2v}->{net} // $DEFAULT_NET_NAME;
27my $DOWNTIME_CALENDAR_URL = app->config->{downtime_calendar_url} // '';
Marc Kupietz3eeec652024-11-18 18:30:04 +010028my $COMPARE_TO = app->config->{w2v}->{compare_to} // '';
29
Marc Kupietzffef9302017-11-07 15:58:01 +010030app->static->paths->[0] = getcwd;
31
Marc Kupietz1b856fa2019-12-07 23:01:43 +010032plugin 'Piwik';
Marc Kupietz2b8d44a2019-12-09 10:38:16 +010033plugin "RemoteAddr";
Marc Kupietz1b856fa2019-12-07 23:01:43 +010034plugin 'Util::RandomString' => {
35 piwik_rand_id => {
36 alphabet => '0123456789abcdef',
37 length => 16
38 }
39};
40
Marc Kupietzd4227392016-03-01 16:45:12 +010041plugin 'Log::Access';
Marc Kupietzb3422c12017-07-04 14:12:11 +020042plugin "RequestBase";
Marc Kupietz95104512019-12-05 10:13:05 +010043#plugin 'AutoReload';
44plugin Localize => {
45 dict => {
46 _ => sub { $_->locale },
47 },
Marc Kupietzbf9bac02022-04-11 21:16:47 +020048 resources => ['../derekovecs-server.dict']
Marc Kupietz95104512019-12-05 10:13:05 +010049};
Marc Kupietza5b90152016-03-15 17:39:19 +010050our $opt_i = 0; # latin1-input?
51our $opt_l = undef;
Marc Kupietza2e64502016-04-27 09:53:51 +020052our $opt_m;
Marc Kupietz6ed81872016-04-27 14:04:04 +020053our $opt_M;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020054our $opt_n = $DEFAULT_NET;
Marc Kupietz43ee87e2016-04-25 10:50:08 +020055our $opt_d;
Marc Kupietzfa194262018-06-05 09:39:32 +020056our $opt_D;
Marc Kupietzda7b3492023-02-27 20:10:59 +010057our $opt_G;
Marc Kupietzbf9bac02022-04-11 21:16:47 +020058
59our $mergedEnd=0;
60our %cache;
61our %cccache; # classic collocator cache
62our %spcache; # similar profile cache
63our $opt_p = 5676;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020064our $opt_C;
Marc Kupietza5b90152016-03-15 17:39:19 +010065
Marc Kupietz6ed81872016-04-27 14:04:04 +020066my %marked;
Marc Kupietzc053d972019-01-10 10:41:51 +010067my $title="";
Marc Kupietz793413b2016-04-02 21:48:57 +020068my $training_args="";
69
Marc Kupietz846e1492024-09-11 19:10:31 +020070sub usage {
Marc Kupietzc82b15f2022-07-19 17:36:27 +020071 print STDERR <<EOF;
72non-server mode usage: MOJO_CONFIG=`pwd`/example.conf $0 [-h] [-d <file>]
73-h : this (help) message
74-d file : dump binary vecs as ascii text to <file>
75
76server-mode invocation:
77
78MOJO_CONFIG=`pwd`/example.conf morbo $0
79EOF
80 exit;
81}
Marc Kupietz6ed81872016-04-27 14:04:04 +020082
Marc Kupietz846e1492024-09-11 19:10:31 +020083getopts('d:D:Gil:p:m:n:M:Ch') or usage();
84
Marc Kupietz6ed81872016-04-27 14:04:04 +020085if($opt_M) {
Marc Kupietzed930212016-04-27 15:42:38 +020086 open my $handle, '<:encoding(UTF-8)', $opt_M
87 or die "Can't open '$opt_M' for reading: $!";
88 while(<$handle>) {
Marc Kupietz6ed81872016-04-27 14:04:04 +020089 foreach my $mw (split /\s+/) {
90 $marked{$mw}=1
91 }
92 }
Marc Kupietzed930212016-04-27 15:42:38 +020093 close($handle);
Marc Kupietz6ed81872016-04-27 14:04:04 +020094}
Marc Kupietza5b90152016-03-15 17:39:19 +010095
Marc Kupietze8e3ded2020-07-13 17:53:56 +020096my $vecs_name = (@ARGV > 0 && -r $ARGV[0] ? $ARGV[0] : $DEFAULT_VECS);
97init_net($vecs_name, $opt_n, ($opt_i? 1 : 0), 1);
98if(open(FILE, "$vecs_name.args")) {
99 $training_args = <FILE>;
Marc Kupietz2cb667e2016-03-10 09:44:12 +0100100}
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200101close(FILE);
102$title = fname2corpusname($vecs_name);
Marc Kupietzdc22b982015-10-09 09:19:34 +0200103
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200104my $have_sprofiles = load_sprofiles($vecs_name);
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100105
Marc Kupietzc0d41872021-02-25 16:33:22 +0100106if (app->config->{w2v}->{merge}) {
107 $opt_m = app->config->{w2v}->{merge};
108}
109
Marc Kupietza2e64502016-04-27 09:53:51 +0200110if($opt_m) {
111 $mergedEnd = mergeVectors($opt_m);
Marc Kupietzc053d972019-01-10 10:41:51 +0100112 $title = "<span class=\"merged\">" . $title . "</span> vs. " . fname2corpusname($opt_m);
Marc Kupietza2e64502016-04-27 09:53:51 +0200113}
114
Marc Kupietze5568a02018-12-20 11:42:02 +0100115
Marc Kupietz43ee87e2016-04-25 10:50:08 +0200116if($opt_d) { # -d: dump vecs and exit
117 dump_vecs($opt_d);
118 exit;
119}
120
Marc Kupietzfa194262018-06-05 09:39:32 +0200121if($opt_D) { # -D: dump vecs for numpy and exit
122 dump_for_numpy($opt_D);
123 exit;
124}
125
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200126#my $daemon = Mojo::Server::Daemon->new(
127# app => app,
128# listen => ['http://'.($opt_l ? $opt_l : '*').":$opt_p"]
129#);
Marc Kupietza5b90152016-03-15 17:39:19 +0100130
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200131if($opt_G) {
132 print "Filtering garbage\n";
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100133 filter_garbage();
Marc Kupietzc0d41872021-02-25 16:33:22 +0100134 print "Finished filtering garbage\n";
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200135}
136
Marc Kupietz554aff52017-11-09 14:42:09 +0100137get '*/js/*' => sub {
Marc Kupietzffef9302017-11-07 15:58:01 +0100138 my $c = shift;
139 my $url = $c->req->url;
Marc K20476c72021-03-11 12:18:01 +0100140 $url =~ s@/derekovecs/@/@g;
Marc Kupietzffef9302017-11-07 15:58:01 +0100141 $c->app->log->info("GET: " . $url);
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100142 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100143} => 'js';
Marc Kupietzffef9302017-11-07 15:58:01 +0100144
Marc Kupietza9270572018-03-17 15:17:07 +0100145get '*/css/*' => sub {
146 my $c = shift;
147 my $url = $c->req->url;
148 $url =~ s@/derekovecs/@/@g;
149 $c->app->log->info("GET: " . $url);
150 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100151} => 'css';
Marc Kupietza9270572018-03-17 15:17:07 +0100152
Marc Kupietzc053d972019-01-10 10:41:51 +0100153sub fname2corpusname {
154 ($_) = @_;
155 s@.*/@@;
Marc Kupietz86b50292019-02-17 21:03:59 +0100156 s@\.en@-en@;
Marc Kupietzc053d972019-01-10 10:41:51 +0100157 s@\..*@@;
158 return $_;
159}
160
Marc Kupietzcb43e492019-12-03 10:07:53 +0100161sub getWord {
162 ($_) = @_;
163 if ($_ =~ /^\d+/) {
164 return $_;
165 } else {
166 return getWordNumber($_);
167 }
168}
169
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100170
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100171post '/derekovecs/getVecsByRanks' => sub {
Marc Kupietz66bfd952017-12-11 09:59:45 +0100172 my $self = shift;
173 my $vec = getVecs($self->req->json);
174 $self->render(json => $vec);
175};
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100176
Marc Kupietzf6080012021-03-12 09:14:42 +0100177any '*/getCollocationAssociation' => sub {
178 my $self = shift;
179 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
180} => 'getCollocationAssociation';
181
182any '/getCollocationAssociation' => sub {
183 my $self = shift;
184 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
185} => 'getCollocationAssociation1';
186
Marc Kupietze13a3552018-01-25 08:48:34 +0100187any '*/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100188 my $self = shift;
Marc Kupietz3eeec652024-11-18 18:30:04 +0100189 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w",) : $self->req->json),
190 $COMPARE_TO), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100191} => 'getClassicCollocators1';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100192
Marc Kupietze13a3552018-01-25 08:48:34 +0100193any '/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100194 my $self = shift;
Marc Kupietz3eeec652024-11-18 18:30:04 +0100195 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json),
196 $COMPARE_TO), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100197} => 'getClassicCollocators';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100198
Marc Kupietzd7760b42019-02-21 09:01:44 +0100199any '/getBiggestVocabDistances' => sub {
200 my $self = shift;
201 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100202} => 'getBiggestVocabDistances1';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100203
204any '*/getBiggestVocabDistances' => sub {
205 my $self = shift;
206 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100207} => 'getBiggestVocabDistances';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100208
Marc Kupietz33c79d32019-08-02 15:11:23 +0200209any '*/getPosWiseW2VCollocators' => sub {
210 my $self = shift;
Marc Kupietz0ab97392024-12-10 16:16:32 +0100211 my $format = $self->param("format") || 'json';
212 $self->render(data => getPosWiseW2VCollocators($self->param("w"),
Marc Kupietz33c79d32019-08-02 15:11:23 +0200213 ($self->param("max")? $self->param("max") : 200),
214 ($self->param("cutoff")? $self->param("cutoff") :750000),
Marc Kupietz0ab97392024-12-10 16:16:32 +0100215 ($self->param("threshold")? $self->param("threshold") : 0.2),
216 $format),
217 format=>$format);
Marc Kupietz33c79d32019-08-02 15:11:23 +0200218};
219
220any '/getPosWiseW2VCollocators' => sub {
221 my $self = shift;
Marc Kupietz0ab97392024-12-10 16:16:32 +0100222 my $format = $self->param("format") || 'json';
223 $self->render(data => getPosWiseW2VCollocators($self->param("w"),
Marc Kupietz33c79d32019-08-02 15:11:23 +0200224 ($self->param("max")? $self->param("max") : 200),
225 ($self->param("cutoff")? $self->param("cutoff") : 750000),
Marc Kupietz0ab97392024-12-10 16:16:32 +0100226 ($self->param("threshold")? $self->param("threshold") : 0.2),
227 $format),
228 format=>$format);
Marc Kupietz33c79d32019-08-02 15:11:23 +0200229};
230
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100231any '*/getSimilarProfiles' => sub {
232 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100233 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100234};
235
Marc Kupietzc987fa82018-03-21 12:14:25 +0100236any '/getSimilarProfiles' => sub {
237 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100238 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietzc987fa82018-03-21 12:14:25 +0100239};
240
Marc Kupietz9f301572020-04-06 18:29:16 +0200241any '*/getWord' => sub {
242 my $self = shift;
243 my $w = $self->param("w");
244 my $rank = getWord($w);
245 my $status = 200;
246 if ($rank <= 0) {
247 $rank = -1;
248 $status = 404;
249 }
250 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
251};
252
253any '/getWord' => sub {
254 my $self = shift;
255 my $w = $self->param("w");
256 my $rank = getWord($w);
257 my $status = 200;
258 if ($rank <= 0) {
259 $rank = -1;
260 $status = 404;
261 }
262 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
263};
264
Marc Kupietz98ed1c02019-08-02 15:05:37 +0200265any '/getSimilarity' => sub {
266 my $self = shift;
267 my $w1 = $self->param("w1");
268 my $w2 = $self->param("w2");
269 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
270};
271
272any '*/getSimilarity' => sub {
273 my $self = shift;
274 my $w1 = $self->param("w1");
275 my $w2 = $self->param("w2");
276 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
277};
278
Marc Kupietz3576c622023-11-05 08:51:58 +0100279any '*/getDowntimeCalendar' => sub {
280 my $self = shift;
281 my $calendar = getDowntimeCalendar($DOWNTIME_CALENDAR_URL);
282 $self->render(data => $calendar, format=>'text');
283};
284
285any '/getDowntimeCalendar' => sub {
286 my $self = shift;
287 my $calendar = getDowntimeCalendar($DOWNTIME_CALENDAR_URL);
288 $self->render(data => $calendar, format=>'text');
289};
290
Marc Kupietzb36bc742023-11-05 17:46:11 +0100291any '/getVocabSize' => sub {
292 my $self = shift;
293 $self->render(data => getVocabSize(), format=>'json');
294};
295
296any '*/getVocabSize' => sub {
297 my $self = shift;
298 $self->render(data => getVocabSize(), format=>'json');
299};
300
Marc Kupietzaf708c22023-11-05 11:20:20 +0100301any '/getVersion' => sub {
302 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100303 $self->render(data => to_json($VERSION), format=>'json');
Marc Kupietzaf708c22023-11-05 11:20:20 +0100304};
305
306any '*/getVersion' => sub {
307 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100308 $self->render(data => to_json($VERSION), format=>'json');
Marc Kupietzaf708c22023-11-05 11:20:20 +0100309};
310
Marc Kupietzdea505e2023-11-05 11:42:36 +0100311any '/getModelName' => sub {
312 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100313 $self->render(data => to_json($title), format=>'json');
Marc Kupietzdea505e2023-11-05 11:42:36 +0100314};
315
316any '*/getModelName' => sub {
317 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100318 $self->render(data => to_json($title), format=>'json');
Marc Kupietzdea505e2023-11-05 11:42:36 +0100319};
320
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100321get '*/img/*' => sub {
322 my $c = shift;
323 my $url = $c->req->url;
324 $url =~ s@/derekovecs@@g;
325 $c->app->log->info("GET: " . $url);
326 $c->reply->static($url);
327};
328
Marc Kupietzdc22b982015-10-09 09:19:34 +0200329get '/' => sub {
330 my $c = shift;
Marc Kupietza5f60042017-05-04 10:38:12 +0200331 $c->app->log->info("get: ".$c->req->url->to_abs);
Marc Kupietzdc22b982015-10-09 09:19:34 +0200332 my $word=$c->param('word');
Marc Kupietz2da2a812019-02-21 14:17:35 +0100333 my $no_nbs=$c->param('n') || ($opt_m? 50 : 100);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100334 my $no_iterations=$c->param('N') || 2000;
Marc Kupietzd4227392016-03-01 16:45:12 +0100335 my $perplexity=$c->param('perplexity') || 20;
Marc Kupietzc4d62f82016-03-01 11:04:24 +0100336 my $epsilon=$c->param('epsilon') || 5;
Marc Kupietzd7aea722016-03-02 11:59:12 +0100337 my $som=$c->param('som') || 0;
Marc Kupietza2e64502016-04-27 09:53:51 +0200338 my $searchBaseVocabFirst=$c->param('sbf') || 0;
Marc Kupietz6d9a6782016-03-23 17:25:25 +0100339 my $sort=$c->param('sort') || 0;
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100340 my $csv=$c->param('csv') || 0;
Marc Kupietzb613b052016-04-28 14:11:59 +0200341 my $json=$c->param('json') || 0;
Marc Kupietzdb2dc7e2017-12-02 12:04:03 +0100342 my $cutoff=$c->param('cutoff') || 500000;
Marc Kupietzd91212f2017-11-13 10:05:09 +0100343 my $dedupe=$c->param('dedupe') || 0;
Marc Kupietzac707b32018-12-20 11:36:38 +0100344 my $nosp=$c->param('nosp') || 0;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100345 my $res;
Marc Kupietz7b2cbeb2016-02-25 11:22:00 +0100346 my @lists;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100347 my @collocations;
Marc Kupietzcddc8482019-12-04 08:57:33 +0100348 if(defined($word) && $word !~ /^\s*$/) {
Marc Kupietz7bc85fd2016-02-24 11:42:41 +0100349 $c->inactivity_timeout(300);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100350 $word =~ s/\s+/ /g;
Marc Kupietz3082fd02019-01-09 14:54:06 +0100351 if($opt_m && $word !~ /\|/) {
352 $word .= "|$word";
353 }
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100354 for my $w (split(' *\| *', $word)) {
Marc Kupietz3082fd02019-01-09 14:54:06 +0100355 if($opt_m) {
356 if($searchBaseVocabFirst) {
357 $searchBaseVocabFirst=0;
358 } else {
359 $searchBaseVocabFirst=1;
360 }
361 }
362 if ($cache{$w.$cutoff.$no_nbs.$sort.$dedupe,$searchBaseVocabFirst}) {
Marc Kupietz15987412017-11-07 15:56:58 +0100363 $c->app->log->info("Getting $w results from cache");
Marc Kupietz3082fd02019-01-09 14:54:06 +0100364 $res = $cache{$w.$cutoff.$no_nbs.$sort.$dedupe.$searchBaseVocabFirst}
Marc Kupietza5b90152016-03-15 17:39:19 +0100365 } else {
Marc Kupietz15987412017-11-07 15:56:58 +0100366 $c->app->log->info('Looking for neighbours of '.$w);
367 if($opt_i) {
Marc Kupietzac707b32018-12-20 11:36:38 +0100368 $res = get_neighbours(encode("iso-8859-1", $w), $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100369 } else {
Marc Kupietzac707b32018-12-20 11:36:38 +0100370 $res = get_neighbours($w, $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100371 }
Marc Kupietz2dd2dd72017-12-01 22:08:14 +0100372 $cache{$w.$cutoff.$no_nbs.$sort.$dedupe} = $res;
Marc Kupietza5b90152016-03-15 17:39:19 +0100373 }
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100374 push(@lists, $res->{paradigmatic});
Marc Kupietz15987412017-11-07 15:56:58 +0100375 }
376 }
Marc Kupietz56844a22019-08-02 15:12:19 +0200377
Marc Kupietz000ad862016-02-26 14:59:12 +0100378 $word =~ s/ *\| */ | /g;
Marc Kupietzb613b052016-04-28 14:11:59 +0200379 if($json) {
380 return $c->render(json => {word => $word, list => \@lists, collocators=>$res->{syntagmatic}});
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100381 } elsif($csv) {
382 my $csv_data="";
383 for (my $i=0; $i <= $no_nbs; $i++) {
384 $csv_data .= $res->{paradigmatic}->[$i]->{word} . ", ";
385 }
386 for (my $i=0; $i < $no_nbs; $i++) {
387 $csv_data .= $res->{syntagmatic}->[$i]->{word} . ", ";
388 }
389 chop $csv_data;
390 chop $csv_data;
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100391 $csv_data .= "\n";
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100392 return $c->render(text=>$csv_data);
Marc Kupietzb613b052016-04-28 14:11:59 +0200393 } else {
Marc Kupietzd7760b42019-02-21 09:01:44 +0100394 my $distantWords="";
395 if(!defined($word) || $word !~ /^\s*$/) {
396 $distantWords = getBiggestMergedDifferences();
397 }
Marc Kupietz9ae184c2023-11-04 18:21:54 +0100398 $c->render(
399 template => "index",
400 title => $title,
401 word => $word,
402 distantWords => $distantWords,
403 cutoff => $cutoff,
404 no_nbs => $no_nbs,
405 no_iterations => $no_iterations,
406 epsilon => $epsilon,
407 perplexity => $perplexity,
408 show_som => $som,
409 searchBaseVocabFirst => $searchBaseVocabFirst,
410 sort => $sort,
411 training_args => $training_args,
412 mergedEnd => $mergedEnd,
413 haveSProfiles => $have_sprofiles,
414 dedupe => $dedupe,
415 marked => \%marked,
416 lists => \@lists,
Marc Kupietzeacc63f2023-11-06 15:39:13 +0100417 collocators => $res->{syntagmatic},
418 version => $VERSION,
Marc Kupietz49c5cf02024-08-06 10:30:44 +0200419 korap_url => $KORAP_URL,
Marc Kupietz9ae184c2023-11-04 18:21:54 +0100420 );
Marc Kupietzb613b052016-04-28 14:11:59 +0200421 }
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100422} => "paradigmaticAndSyntagmaticNbs";
Marc Kupietzdc22b982015-10-09 09:19:34 +0200423
Marc Kupietz30ca4342017-11-22 21:21:20 +0100424helper(bitvec2window => sub {
425 my ($self, $n) = @_;
426 my $str = unpack("B32", pack("N", $n));
427 $str =~ s/^\d{22}//;
428 $str =~ s/^(\d{5})/$1x/;
429 $str =~ s/0/ยท/g;
430 $str =~ s/1/+/g;
431 return $str;
432 });
433
Marc Kupietz6017daf2022-05-19 09:22:49 +0200434if(app->config->{Piwik} && app->config->{Piwik}->{url}) {
Marc Kupietz3e3e3262022-04-12 23:11:45 +0200435 hook(
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100436 after_render => sub {
437 my $c = shift;
438
439 # Only track valid routes
440 my $route = $c->current_route or return;
441
442 # This won't forward personalized information
443 my $hash = {
Marc Kupietz251de9f2020-01-14 16:12:05 +0100444 action_url => $c->req->url->to_abs,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100445 action_name => $route,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100446 ua => $c->req->headers->user_agent,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100447 urlref => '',
448 send_image => 0,
449 dnt => 0,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100450 cip => $c->remote_addr,
451 lang => $c->req->headers->accept_language,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100452 uid => $c->random_string('piwik_rand_id')
453 };
Marc Kupietz251de9f2020-01-14 16:12:05 +0100454 # $c->app->log->info("PIWIK: counting " . $hash->{action_url} . "\nremote:" . $c->remote_addr);
455 # $c->app->log->info("PIWIK: tag " . $c->piwik_tag);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100456
457 # Send track
458 $c->piwik->api_p(Track => $hash)->wait;
Marc Kupietz251de9f2020-01-14 16:12:05 +0100459
460 # $c->app->log->info("PIWIK: counted.");
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100461 }
Marc Kupietz3e3e3262022-04-12 23:11:45 +0200462 );
463}
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100464
Marc Kupietzbf9bac02022-04-11 21:16:47 +0200465app->renderer->paths([app->home->rel_file('../templates')]);
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200466app->start;
467#$daemon->run;
Marc Kupietz95104512019-12-05 10:13:05 +0100468# app->start;
Marc Kupietzdc22b982015-10-09 09:19:34 +0200469
Marc Kupietz95104512019-12-05 10:13:05 +0100470# exit;