blob: 3811858f69fc83e6e99133b2042e8c1390beb198 [file] [log] [blame]
Marc Kupietz66bbd2d2022-07-19 17:53:49 +02001#!/usr/bin/env perl
2
Marc Kupietzb3d425d2024-11-15 16:23:21 +01003our $VERSION = '0.93.2';
Marc Kupietzbf9bac02022-04-11 21:16:47 +02004
Marc Kupietzc82b15f2022-07-19 17:36:27 +02005use IDS::DeReKoVecs::Read;
Marc Kupietzdc22b982015-10-09 09:19:34 +02006use Mojolicious::Lite;
Marc Kupietzc4893362016-02-25 08:04:46 +01007use Mojo::JSON qw(decode_json encode_json to_json);
Marc Kupietz30ca4342017-11-22 21:21:20 +01008use base 'Mojolicious::Plugin';
9
Marc Kupietz247500f2015-10-09 11:29:01 +020010use Encode qw(decode encode);
Marc Kupietza5b90152016-03-15 17:39:19 +010011use Getopt::Std;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020012#use Mojo::Server::Daemon;
Marc Kupietzffef9302017-11-07 15:58:01 +010013use Cwd;
Marc Kupietz66bfd952017-12-11 09:59:45 +010014
Marc Kupietzbf9bac02022-04-11 21:16:47 +020015my $mojo_config = $ENV{MOJO_CONFIG} // '../derekovecs-server.conf';
Marc Kupietzc0d41872021-02-25 16:33:22 +010016plugin Config => {file => $mojo_config};
17
Marc K20476c72021-03-11 12:18:01 +010018my $DEFAULT_VECS = app->config->{w2v}->{vecs} // "../models/dereko-2021-i.vecs";
Marc Kupietz49c5cf02024-08-06 10:30:44 +020019my $KORAP_URL = app->config->{w2v}->{korap_url} // "https://korap.ids-mannheim.de";
20
Marc Kupietzc0d41872021-02-25 16:33:22 +010021my $DEFAULT_NET_NAME = "";
22if ($DEFAULT_VECS=~ /\.vecs/) {
23 $DEFAULT_NET_NAME = $DEFAULT_VECS;
24 $DEFAULT_NET_NAME =~ s/\.vecs/.net/;
25}
Marc Kupietz9ae184c2023-11-04 18:21:54 +010026my $DEFAULT_NET = app->config->{w2v}->{net} // $DEFAULT_NET_NAME;
27my $DOWNTIME_CALENDAR_URL = app->config->{downtime_calendar_url} // '';
Marc Kupietz3eeec652024-11-18 18:30:04 +010028my $COMPARE_TO = app->config->{w2v}->{compare_to} // '';
29
Marc Kupietzffef9302017-11-07 15:58:01 +010030app->static->paths->[0] = getcwd;
31
Marc Kupietz1b856fa2019-12-07 23:01:43 +010032plugin 'Piwik';
Marc Kupietz2b8d44a2019-12-09 10:38:16 +010033plugin "RemoteAddr";
Marc Kupietz1b856fa2019-12-07 23:01:43 +010034plugin 'Util::RandomString' => {
35 piwik_rand_id => {
36 alphabet => '0123456789abcdef',
37 length => 16
38 }
39};
40
Marc Kupietzd4227392016-03-01 16:45:12 +010041plugin 'Log::Access';
Marc Kupietzb3422c12017-07-04 14:12:11 +020042plugin "RequestBase";
Marc Kupietz95104512019-12-05 10:13:05 +010043#plugin 'AutoReload';
44plugin Localize => {
45 dict => {
46 _ => sub { $_->locale },
47 },
Marc Kupietzbf9bac02022-04-11 21:16:47 +020048 resources => ['../derekovecs-server.dict']
Marc Kupietz95104512019-12-05 10:13:05 +010049};
Marc Kupietza5b90152016-03-15 17:39:19 +010050our $opt_i = 0; # latin1-input?
51our $opt_l = undef;
Marc Kupietza2e64502016-04-27 09:53:51 +020052our $opt_m;
Marc Kupietz6ed81872016-04-27 14:04:04 +020053our $opt_M;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020054our $opt_n = $DEFAULT_NET;
Marc Kupietz43ee87e2016-04-25 10:50:08 +020055our $opt_d;
Marc Kupietzfa194262018-06-05 09:39:32 +020056our $opt_D;
Marc Kupietzda7b3492023-02-27 20:10:59 +010057our $opt_G;
Marc Kupietzbf9bac02022-04-11 21:16:47 +020058
59our $mergedEnd=0;
60our %cache;
61our %cccache; # classic collocator cache
62our %spcache; # similar profile cache
63our $opt_p = 5676;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020064our $opt_C;
Marc Kupietza5b90152016-03-15 17:39:19 +010065
Marc Kupietz6ed81872016-04-27 14:04:04 +020066my %marked;
Marc Kupietzc053d972019-01-10 10:41:51 +010067my $title="";
Marc Kupietz793413b2016-04-02 21:48:57 +020068my $training_args="";
69
Marc Kupietz846e1492024-09-11 19:10:31 +020070sub usage {
Marc Kupietzc82b15f2022-07-19 17:36:27 +020071 print STDERR <<EOF;
72non-server mode usage: MOJO_CONFIG=`pwd`/example.conf $0 [-h] [-d <file>]
73-h : this (help) message
74-d file : dump binary vecs as ascii text to <file>
75
76server-mode invocation:
77
78MOJO_CONFIG=`pwd`/example.conf morbo $0
79EOF
80 exit;
81}
Marc Kupietz6ed81872016-04-27 14:04:04 +020082
Marc Kupietz846e1492024-09-11 19:10:31 +020083getopts('d:D:Gil:p:m:n:M:Ch') or usage();
84
Marc Kupietz6ed81872016-04-27 14:04:04 +020085if($opt_M) {
Marc Kupietzed930212016-04-27 15:42:38 +020086 open my $handle, '<:encoding(UTF-8)', $opt_M
87 or die "Can't open '$opt_M' for reading: $!";
88 while(<$handle>) {
Marc Kupietz6ed81872016-04-27 14:04:04 +020089 foreach my $mw (split /\s+/) {
90 $marked{$mw}=1
91 }
92 }
Marc Kupietzed930212016-04-27 15:42:38 +020093 close($handle);
Marc Kupietz6ed81872016-04-27 14:04:04 +020094}
Marc Kupietza5b90152016-03-15 17:39:19 +010095
Marc Kupietze8e3ded2020-07-13 17:53:56 +020096my $vecs_name = (@ARGV > 0 && -r $ARGV[0] ? $ARGV[0] : $DEFAULT_VECS);
97init_net($vecs_name, $opt_n, ($opt_i? 1 : 0), 1);
98if(open(FILE, "$vecs_name.args")) {
99 $training_args = <FILE>;
Marc Kupietz2cb667e2016-03-10 09:44:12 +0100100}
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200101close(FILE);
102$title = fname2corpusname($vecs_name);
Marc Kupietzdc22b982015-10-09 09:19:34 +0200103
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200104my $have_sprofiles = load_sprofiles($vecs_name);
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100105
Marc Kupietzc0d41872021-02-25 16:33:22 +0100106if (app->config->{w2v}->{merge}) {
107 $opt_m = app->config->{w2v}->{merge};
108}
109
Marc Kupietza2e64502016-04-27 09:53:51 +0200110if($opt_m) {
111 $mergedEnd = mergeVectors($opt_m);
Marc Kupietzc053d972019-01-10 10:41:51 +0100112 $title = "<span class=\"merged\">" . $title . "</span> vs. " . fname2corpusname($opt_m);
Marc Kupietza2e64502016-04-27 09:53:51 +0200113}
114
Marc Kupietze5568a02018-12-20 11:42:02 +0100115
Marc Kupietz43ee87e2016-04-25 10:50:08 +0200116if($opt_d) { # -d: dump vecs and exit
117 dump_vecs($opt_d);
118 exit;
119}
120
Marc Kupietzfa194262018-06-05 09:39:32 +0200121if($opt_D) { # -D: dump vecs for numpy and exit
122 dump_for_numpy($opt_D);
123 exit;
124}
125
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200126#my $daemon = Mojo::Server::Daemon->new(
127# app => app,
128# listen => ['http://'.($opt_l ? $opt_l : '*').":$opt_p"]
129#);
Marc Kupietza5b90152016-03-15 17:39:19 +0100130
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200131if($opt_G) {
132 print "Filtering garbage\n";
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100133 filter_garbage();
Marc Kupietzc0d41872021-02-25 16:33:22 +0100134 print "Finished filtering garbage\n";
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200135}
136
Marc Kupietz554aff52017-11-09 14:42:09 +0100137get '*/js/*' => sub {
Marc Kupietzffef9302017-11-07 15:58:01 +0100138 my $c = shift;
139 my $url = $c->req->url;
Marc K20476c72021-03-11 12:18:01 +0100140 $url =~ s@/derekovecs/@/@g;
Marc Kupietzffef9302017-11-07 15:58:01 +0100141 $c->app->log->info("GET: " . $url);
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100142 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100143} => 'js';
Marc Kupietzffef9302017-11-07 15:58:01 +0100144
Marc Kupietza9270572018-03-17 15:17:07 +0100145get '*/css/*' => sub {
146 my $c = shift;
147 my $url = $c->req->url;
148 $url =~ s@/derekovecs/@/@g;
149 $c->app->log->info("GET: " . $url);
150 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100151} => 'css';
Marc Kupietza9270572018-03-17 15:17:07 +0100152
Marc Kupietzc053d972019-01-10 10:41:51 +0100153sub fname2corpusname {
154 ($_) = @_;
155 s@.*/@@;
Marc Kupietz86b50292019-02-17 21:03:59 +0100156 s@\.en@-en@;
Marc Kupietzc053d972019-01-10 10:41:51 +0100157 s@\..*@@;
158 return $_;
159}
160
Marc Kupietzcb43e492019-12-03 10:07:53 +0100161sub getWord {
162 ($_) = @_;
163 if ($_ =~ /^\d+/) {
164 return $_;
165 } else {
166 return getWordNumber($_);
167 }
168}
169
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100170
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100171post '/derekovecs/getVecsByRanks' => sub {
Marc Kupietz66bfd952017-12-11 09:59:45 +0100172 my $self = shift;
173 my $vec = getVecs($self->req->json);
174 $self->render(json => $vec);
175};
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100176
Marc Kupietzf6080012021-03-12 09:14:42 +0100177any '*/getCollocationAssociation' => sub {
178 my $self = shift;
179 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
180} => 'getCollocationAssociation';
181
182any '/getCollocationAssociation' => sub {
183 my $self = shift;
184 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
185} => 'getCollocationAssociation1';
186
Marc Kupietze13a3552018-01-25 08:48:34 +0100187any '*/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100188 my $self = shift;
Marc Kupietz3eeec652024-11-18 18:30:04 +0100189 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w",) : $self->req->json),
190 $COMPARE_TO), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100191} => 'getClassicCollocators1';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100192
Marc Kupietze13a3552018-01-25 08:48:34 +0100193any '/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100194 my $self = shift;
Marc Kupietz3eeec652024-11-18 18:30:04 +0100195 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json),
196 $COMPARE_TO), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100197} => 'getClassicCollocators';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100198
Marc Kupietzd7760b42019-02-21 09:01:44 +0100199any '/getBiggestVocabDistances' => sub {
200 my $self = shift;
201 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100202} => 'getBiggestVocabDistances1';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100203
204any '*/getBiggestVocabDistances' => sub {
205 my $self = shift;
206 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100207} => 'getBiggestVocabDistances';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100208
Marc Kupietz33c79d32019-08-02 15:11:23 +0200209any '*/getPosWiseW2VCollocators' => sub {
210 my $self = shift;
211 $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
212 ($self->param("max")? $self->param("max") : 200),
213 ($self->param("cutoff")? $self->param("cutoff") :750000),
214 ($self->param("threshold")? $self->param("threshold") : 0.2)),
215 format=>'tsv');
216};
217
218any '/getPosWiseW2VCollocators' => sub {
219 my $self = shift;
220 $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
221 ($self->param("max")? $self->param("max") : 200),
222 ($self->param("cutoff")? $self->param("cutoff") : 750000),
223 ($self->param("threshold")? $self->param("threshold") : 0.2)),
224 format=>'tsv');
225};
226
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100227any '*/getSimilarProfiles' => sub {
228 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100229 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100230};
231
Marc Kupietzc987fa82018-03-21 12:14:25 +0100232any '/getSimilarProfiles' => sub {
233 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100234 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietzc987fa82018-03-21 12:14:25 +0100235};
236
Marc Kupietz9f301572020-04-06 18:29:16 +0200237any '*/getWord' => sub {
238 my $self = shift;
239 my $w = $self->param("w");
240 my $rank = getWord($w);
241 my $status = 200;
242 if ($rank <= 0) {
243 $rank = -1;
244 $status = 404;
245 }
246 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
247};
248
249any '/getWord' => sub {
250 my $self = shift;
251 my $w = $self->param("w");
252 my $rank = getWord($w);
253 my $status = 200;
254 if ($rank <= 0) {
255 $rank = -1;
256 $status = 404;
257 }
258 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
259};
260
Marc Kupietz98ed1c02019-08-02 15:05:37 +0200261any '/getSimilarity' => sub {
262 my $self = shift;
263 my $w1 = $self->param("w1");
264 my $w2 = $self->param("w2");
265 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
266};
267
268any '*/getSimilarity' => sub {
269 my $self = shift;
270 my $w1 = $self->param("w1");
271 my $w2 = $self->param("w2");
272 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
273};
274
Marc Kupietz3576c622023-11-05 08:51:58 +0100275any '*/getDowntimeCalendar' => sub {
276 my $self = shift;
277 my $calendar = getDowntimeCalendar($DOWNTIME_CALENDAR_URL);
278 $self->render(data => $calendar, format=>'text');
279};
280
281any '/getDowntimeCalendar' => sub {
282 my $self = shift;
283 my $calendar = getDowntimeCalendar($DOWNTIME_CALENDAR_URL);
284 $self->render(data => $calendar, format=>'text');
285};
286
Marc Kupietzb36bc742023-11-05 17:46:11 +0100287any '/getVocabSize' => sub {
288 my $self = shift;
289 $self->render(data => getVocabSize(), format=>'json');
290};
291
292any '*/getVocabSize' => sub {
293 my $self = shift;
294 $self->render(data => getVocabSize(), format=>'json');
295};
296
Marc Kupietzaf708c22023-11-05 11:20:20 +0100297any '/getVersion' => sub {
298 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100299 $self->render(data => to_json($VERSION), format=>'json');
Marc Kupietzaf708c22023-11-05 11:20:20 +0100300};
301
302any '*/getVersion' => sub {
303 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100304 $self->render(data => to_json($VERSION), format=>'json');
Marc Kupietzaf708c22023-11-05 11:20:20 +0100305};
306
Marc Kupietzdea505e2023-11-05 11:42:36 +0100307any '/getModelName' => sub {
308 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100309 $self->render(data => to_json($title), format=>'json');
Marc Kupietzdea505e2023-11-05 11:42:36 +0100310};
311
312any '*/getModelName' => sub {
313 my $self = shift;
Marc Kupietzb36bc742023-11-05 17:46:11 +0100314 $self->render(data => to_json($title), format=>'json');
Marc Kupietzdea505e2023-11-05 11:42:36 +0100315};
316
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100317get '*/img/*' => sub {
318 my $c = shift;
319 my $url = $c->req->url;
320 $url =~ s@/derekovecs@@g;
321 $c->app->log->info("GET: " . $url);
322 $c->reply->static($url);
323};
324
Marc Kupietzdc22b982015-10-09 09:19:34 +0200325get '/' => sub {
326 my $c = shift;
Marc Kupietza5f60042017-05-04 10:38:12 +0200327 $c->app->log->info("get: ".$c->req->url->to_abs);
Marc Kupietzdc22b982015-10-09 09:19:34 +0200328 my $word=$c->param('word');
Marc Kupietz2da2a812019-02-21 14:17:35 +0100329 my $no_nbs=$c->param('n') || ($opt_m? 50 : 100);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100330 my $no_iterations=$c->param('N') || 2000;
Marc Kupietzd4227392016-03-01 16:45:12 +0100331 my $perplexity=$c->param('perplexity') || 20;
Marc Kupietzc4d62f82016-03-01 11:04:24 +0100332 my $epsilon=$c->param('epsilon') || 5;
Marc Kupietzd7aea722016-03-02 11:59:12 +0100333 my $som=$c->param('som') || 0;
Marc Kupietza2e64502016-04-27 09:53:51 +0200334 my $searchBaseVocabFirst=$c->param('sbf') || 0;
Marc Kupietz6d9a6782016-03-23 17:25:25 +0100335 my $sort=$c->param('sort') || 0;
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100336 my $csv=$c->param('csv') || 0;
Marc Kupietzb613b052016-04-28 14:11:59 +0200337 my $json=$c->param('json') || 0;
Marc Kupietzdb2dc7e2017-12-02 12:04:03 +0100338 my $cutoff=$c->param('cutoff') || 500000;
Marc Kupietzd91212f2017-11-13 10:05:09 +0100339 my $dedupe=$c->param('dedupe') || 0;
Marc Kupietzac707b32018-12-20 11:36:38 +0100340 my $nosp=$c->param('nosp') || 0;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100341 my $res;
Marc Kupietz7b2cbeb2016-02-25 11:22:00 +0100342 my @lists;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100343 my @collocations;
Marc Kupietzcddc8482019-12-04 08:57:33 +0100344 if(defined($word) && $word !~ /^\s*$/) {
Marc Kupietz7bc85fd2016-02-24 11:42:41 +0100345 $c->inactivity_timeout(300);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100346 $word =~ s/\s+/ /g;
Marc Kupietz3082fd02019-01-09 14:54:06 +0100347 if($opt_m && $word !~ /\|/) {
348 $word .= "|$word";
349 }
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100350 for my $w (split(' *\| *', $word)) {
Marc Kupietz3082fd02019-01-09 14:54:06 +0100351 if($opt_m) {
352 if($searchBaseVocabFirst) {
353 $searchBaseVocabFirst=0;
354 } else {
355 $searchBaseVocabFirst=1;
356 }
357 }
358 if ($cache{$w.$cutoff.$no_nbs.$sort.$dedupe,$searchBaseVocabFirst}) {
Marc Kupietz15987412017-11-07 15:56:58 +0100359 $c->app->log->info("Getting $w results from cache");
Marc Kupietz3082fd02019-01-09 14:54:06 +0100360 $res = $cache{$w.$cutoff.$no_nbs.$sort.$dedupe.$searchBaseVocabFirst}
Marc Kupietza5b90152016-03-15 17:39:19 +0100361 } else {
Marc Kupietz15987412017-11-07 15:56:58 +0100362 $c->app->log->info('Looking for neighbours of '.$w);
363 if($opt_i) {
Marc Kupietzac707b32018-12-20 11:36:38 +0100364 $res = get_neighbours(encode("iso-8859-1", $w), $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100365 } else {
Marc Kupietzac707b32018-12-20 11:36:38 +0100366 $res = get_neighbours($w, $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100367 }
Marc Kupietz2dd2dd72017-12-01 22:08:14 +0100368 $cache{$w.$cutoff.$no_nbs.$sort.$dedupe} = $res;
Marc Kupietza5b90152016-03-15 17:39:19 +0100369 }
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100370 push(@lists, $res->{paradigmatic});
Marc Kupietz15987412017-11-07 15:56:58 +0100371 }
372 }
Marc Kupietz56844a22019-08-02 15:12:19 +0200373
Marc Kupietz000ad862016-02-26 14:59:12 +0100374 $word =~ s/ *\| */ | /g;
Marc Kupietzb613b052016-04-28 14:11:59 +0200375 if($json) {
376 return $c->render(json => {word => $word, list => \@lists, collocators=>$res->{syntagmatic}});
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100377 } elsif($csv) {
378 my $csv_data="";
379 for (my $i=0; $i <= $no_nbs; $i++) {
380 $csv_data .= $res->{paradigmatic}->[$i]->{word} . ", ";
381 }
382 for (my $i=0; $i < $no_nbs; $i++) {
383 $csv_data .= $res->{syntagmatic}->[$i]->{word} . ", ";
384 }
385 chop $csv_data;
386 chop $csv_data;
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100387 $csv_data .= "\n";
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100388 return $c->render(text=>$csv_data);
Marc Kupietzb613b052016-04-28 14:11:59 +0200389 } else {
Marc Kupietzd7760b42019-02-21 09:01:44 +0100390 my $distantWords="";
391 if(!defined($word) || $word !~ /^\s*$/) {
392 $distantWords = getBiggestMergedDifferences();
393 }
Marc Kupietz9ae184c2023-11-04 18:21:54 +0100394 $c->render(
395 template => "index",
396 title => $title,
397 word => $word,
398 distantWords => $distantWords,
399 cutoff => $cutoff,
400 no_nbs => $no_nbs,
401 no_iterations => $no_iterations,
402 epsilon => $epsilon,
403 perplexity => $perplexity,
404 show_som => $som,
405 searchBaseVocabFirst => $searchBaseVocabFirst,
406 sort => $sort,
407 training_args => $training_args,
408 mergedEnd => $mergedEnd,
409 haveSProfiles => $have_sprofiles,
410 dedupe => $dedupe,
411 marked => \%marked,
412 lists => \@lists,
Marc Kupietzeacc63f2023-11-06 15:39:13 +0100413 collocators => $res->{syntagmatic},
414 version => $VERSION,
Marc Kupietz49c5cf02024-08-06 10:30:44 +0200415 korap_url => $KORAP_URL,
Marc Kupietz9ae184c2023-11-04 18:21:54 +0100416 );
Marc Kupietzb613b052016-04-28 14:11:59 +0200417 }
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100418} => "paradigmaticAndSyntagmaticNbs";
Marc Kupietzdc22b982015-10-09 09:19:34 +0200419
Marc Kupietz30ca4342017-11-22 21:21:20 +0100420helper(bitvec2window => sub {
421 my ($self, $n) = @_;
422 my $str = unpack("B32", pack("N", $n));
423 $str =~ s/^\d{22}//;
424 $str =~ s/^(\d{5})/$1x/;
425 $str =~ s/0/ยท/g;
426 $str =~ s/1/+/g;
427 return $str;
428 });
429
Marc Kupietz6017daf2022-05-19 09:22:49 +0200430if(app->config->{Piwik} && app->config->{Piwik}->{url}) {
Marc Kupietz3e3e3262022-04-12 23:11:45 +0200431 hook(
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100432 after_render => sub {
433 my $c = shift;
434
435 # Only track valid routes
436 my $route = $c->current_route or return;
437
438 # This won't forward personalized information
439 my $hash = {
Marc Kupietz251de9f2020-01-14 16:12:05 +0100440 action_url => $c->req->url->to_abs,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100441 action_name => $route,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100442 ua => $c->req->headers->user_agent,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100443 urlref => '',
444 send_image => 0,
445 dnt => 0,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100446 cip => $c->remote_addr,
447 lang => $c->req->headers->accept_language,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100448 uid => $c->random_string('piwik_rand_id')
449 };
Marc Kupietz251de9f2020-01-14 16:12:05 +0100450 # $c->app->log->info("PIWIK: counting " . $hash->{action_url} . "\nremote:" . $c->remote_addr);
451 # $c->app->log->info("PIWIK: tag " . $c->piwik_tag);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100452
453 # Send track
454 $c->piwik->api_p(Track => $hash)->wait;
Marc Kupietz251de9f2020-01-14 16:12:05 +0100455
456 # $c->app->log->info("PIWIK: counted.");
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100457 }
Marc Kupietz3e3e3262022-04-12 23:11:45 +0200458 );
459}
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100460
Marc Kupietzbf9bac02022-04-11 21:16:47 +0200461app->renderer->paths([app->home->rel_file('../templates')]);
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200462app->start;
463#$daemon->run;
Marc Kupietz95104512019-12-05 10:13:05 +0100464# app->start;
Marc Kupietzdc22b982015-10-09 09:19:34 +0200465
Marc Kupietz95104512019-12-05 10:13:05 +0100466# exit;