blob: 7df39d086ebb22d68824305f43a3bb589443565f [file] [log] [blame]
Marc Kupietzdc22b982015-10-09 09:19:34 +02001#!/usr/local/bin/perl
Marc Kupietz09a493b2021-03-15 15:10:48 +01002use Inline C => "./derekovecs-server.c" => CLEAN_AFTER_BUILD => 0, BUILD_NOISY => 1, ccflags => $Config{ccflags} . "-Wall -fno-rtti -O4 -I/usr/local/kl/include", libs => "-L/usr/local/kl/lib64 -l:libcollocatordb.so.1.3.0";
Marc Kupietza5f60042017-05-04 10:38:12 +02003#use Inline C => Config => BUILD_NOISY => 1, CFLAGS => $Config{cflags}." -O4 -mtune k9";
4#use Inline C => Config => CLEAN_AFTER_BUILD => 0, ccflags => $Config{ccflags}." -Ofast -march k8 -mtune k8 ";
Marc Kupietzdc22b982015-10-09 09:19:34 +02005use Mojolicious::Lite;
Marc Kupietzc4893362016-02-25 08:04:46 +01006use Mojo::JSON qw(decode_json encode_json to_json);
Marc Kupietz30ca4342017-11-22 21:21:20 +01007use base 'Mojolicious::Plugin';
8
Marc Kupietz247500f2015-10-09 11:29:01 +02009use Encode qw(decode encode);
Marc Kupietza5b90152016-03-15 17:39:19 +010010use Getopt::Std;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020011#use Mojo::Server::Daemon;
Marc Kupietzffef9302017-11-07 15:58:01 +010012use Cwd;
Marc Kupietz66bfd952017-12-11 09:59:45 +010013
Marc K20476c72021-03-11 12:18:01 +010014my $mojo_config = $ENV{MOJO_CONFIG} // 'derekovecs-server.conf';
Marc Kupietzc0d41872021-02-25 16:33:22 +010015plugin Config => {file => $mojo_config};
16
Marc K20476c72021-03-11 12:18:01 +010017my $DEFAULT_VECS = app->config->{w2v}->{vecs} // "../models/dereko-2021-i.vecs";
Marc Kupietzc0d41872021-02-25 16:33:22 +010018my $DEFAULT_NET_NAME = "";
19if ($DEFAULT_VECS=~ /\.vecs/) {
20 $DEFAULT_NET_NAME = $DEFAULT_VECS;
21 $DEFAULT_NET_NAME =~ s/\.vecs/.net/;
22}
23my $DEFAULT_NET = app->config->{w2v}->{net} // $DEFAULT_NET_NAME;
Marc Kupietz397ce852020-07-13 17:52:21 +020024
Marc Kupietzffef9302017-11-07 15:58:01 +010025app->static->paths->[0] = getcwd;
26
Marc Kupietz1b856fa2019-12-07 23:01:43 +010027plugin 'Piwik';
Marc Kupietz2b8d44a2019-12-09 10:38:16 +010028plugin "RemoteAddr";
Marc Kupietz1b856fa2019-12-07 23:01:43 +010029plugin 'Util::RandomString' => {
30 piwik_rand_id => {
31 alphabet => '0123456789abcdef',
32 length => 16
33 }
34};
35
Marc Kupietzd4227392016-03-01 16:45:12 +010036plugin 'Log::Access';
Marc Kupietzb3422c12017-07-04 14:12:11 +020037plugin "RequestBase";
Marc Kupietz95104512019-12-05 10:13:05 +010038#plugin 'AutoReload';
39plugin Localize => {
40 dict => {
41 _ => sub { $_->locale },
42 },
Marc K20476c72021-03-11 12:18:01 +010043 resources => ['derekovecs-server.dict']
Marc Kupietz95104512019-12-05 10:13:05 +010044};
Marc Kupietza5b90152016-03-15 17:39:19 +010045our $opt_i = 0; # latin1-input?
46our $opt_l = undef;
47our $opt_p = 5676;
Marc Kupietza2e64502016-04-27 09:53:51 +020048our $opt_m;
Marc Kupietz6ed81872016-04-27 14:04:04 +020049our $opt_M;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020050our $opt_n = $DEFAULT_NET;
Marc Kupietz43ee87e2016-04-25 10:50:08 +020051our $opt_d;
Marc Kupietzfa194262018-06-05 09:39:32 +020052our $opt_D;
Marc Kupietze8e3ded2020-07-13 17:53:56 +020053our $opt_G = 1;
54our $opt_C;
Marc Kupietza5b90152016-03-15 17:39:19 +010055
Marc Kupietz6ed81872016-04-27 14:04:04 +020056my %marked;
Marc Kupietzc053d972019-01-10 10:41:51 +010057my $title="";
Marc Kupietz793413b2016-04-02 21:48:57 +020058my $training_args="";
Marc Kupietza2e64502016-04-27 09:53:51 +020059my $mergedEnd=0;
Marc Kupietz15987412017-11-07 15:56:58 +010060my %cache;
Marc Kupietz19c68242018-03-12 09:42:21 +010061my %cccache; # classic collocator cache
Marc Kupietza51dcfa2018-03-19 16:22:05 +010062my %spcache; # similar profile cache
Marc Kupietz793413b2016-04-02 21:48:57 +020063
Marc Kupietz3080f172020-07-13 17:51:28 +020064getopts('d:D:Gil:p:m:n:M:C');
Marc Kupietz6ed81872016-04-27 14:04:04 +020065
66if($opt_M) {
Marc Kupietzed930212016-04-27 15:42:38 +020067 open my $handle, '<:encoding(UTF-8)', $opt_M
68 or die "Can't open '$opt_M' for reading: $!";
69 while(<$handle>) {
Marc Kupietz6ed81872016-04-27 14:04:04 +020070 foreach my $mw (split /\s+/) {
71 $marked{$mw}=1
72 }
73 }
Marc Kupietzed930212016-04-27 15:42:38 +020074 close($handle);
Marc Kupietz6ed81872016-04-27 14:04:04 +020075}
Marc Kupietza5b90152016-03-15 17:39:19 +010076
Marc Kupietze8e3ded2020-07-13 17:53:56 +020077my $vecs_name = (@ARGV > 0 && -r $ARGV[0] ? $ARGV[0] : $DEFAULT_VECS);
78init_net($vecs_name, $opt_n, ($opt_i? 1 : 0), 1);
79if(open(FILE, "$vecs_name.args")) {
80 $training_args = <FILE>;
Marc Kupietz2cb667e2016-03-10 09:44:12 +010081}
Marc Kupietze8e3ded2020-07-13 17:53:56 +020082close(FILE);
83$title = fname2corpusname($vecs_name);
Marc Kupietzdc22b982015-10-09 09:19:34 +020084
Marc Kupietze8e3ded2020-07-13 17:53:56 +020085my $have_sprofiles = load_sprofiles($vecs_name);
Marc Kupietza51dcfa2018-03-19 16:22:05 +010086
Marc Kupietzc0d41872021-02-25 16:33:22 +010087if (app->config->{w2v}->{merge}) {
88 $opt_m = app->config->{w2v}->{merge};
89}
90
Marc Kupietza2e64502016-04-27 09:53:51 +020091if($opt_m) {
92 $mergedEnd = mergeVectors($opt_m);
Marc Kupietzc053d972019-01-10 10:41:51 +010093 $title = "<span class=\"merged\">" . $title . "</span> vs. " . fname2corpusname($opt_m);
Marc Kupietza2e64502016-04-27 09:53:51 +020094}
95
Marc Kupietze5568a02018-12-20 11:42:02 +010096
Marc Kupietz43ee87e2016-04-25 10:50:08 +020097if($opt_d) { # -d: dump vecs and exit
98 dump_vecs($opt_d);
99 exit;
100}
101
Marc Kupietzfa194262018-06-05 09:39:32 +0200102if($opt_D) { # -D: dump vecs for numpy and exit
103 dump_for_numpy($opt_D);
104 exit;
105}
106
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200107#my $daemon = Mojo::Server::Daemon->new(
108# app => app,
109# listen => ['http://'.($opt_l ? $opt_l : '*').":$opt_p"]
110#);
Marc Kupietza5b90152016-03-15 17:39:19 +0100111
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200112if($opt_G) {
113 print "Filtering garbage\n";
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100114 filter_garbage();
Marc Kupietzc0d41872021-02-25 16:33:22 +0100115 print "Finished filtering garbage\n";
Marc Kupietz5c3887d2016-04-28 08:53:35 +0200116}
117
Marc Kupietz554aff52017-11-09 14:42:09 +0100118get '*/js/*' => sub {
Marc Kupietzffef9302017-11-07 15:58:01 +0100119 my $c = shift;
120 my $url = $c->req->url;
Marc K20476c72021-03-11 12:18:01 +0100121 $url =~ s@/derekovecs/@/@g;
Marc Kupietzffef9302017-11-07 15:58:01 +0100122 $c->app->log->info("GET: " . $url);
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100123 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100124} => 'js';
Marc Kupietzffef9302017-11-07 15:58:01 +0100125
Marc Kupietza9270572018-03-17 15:17:07 +0100126get '*/css/*' => sub {
127 my $c = shift;
128 my $url = $c->req->url;
129 $url =~ s@/derekovecs/@/@g;
130 $c->app->log->info("GET: " . $url);
131 $c->reply->static($url);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100132} => 'css';
Marc Kupietza9270572018-03-17 15:17:07 +0100133
Marc Kupietzc053d972019-01-10 10:41:51 +0100134sub fname2corpusname {
135 ($_) = @_;
136 s@.*/@@;
Marc Kupietz86b50292019-02-17 21:03:59 +0100137 s@\.en@-en@;
Marc Kupietzc053d972019-01-10 10:41:51 +0100138 s@\..*@@;
139 return $_;
140}
141
Marc Kupietzcb43e492019-12-03 10:07:53 +0100142sub getWord {
143 ($_) = @_;
144 if ($_ =~ /^\d+/) {
145 return $_;
146 } else {
147 return getWordNumber($_);
148 }
149}
150
Marc Kupietzf6080012021-03-12 09:14:42 +0100151sub getCollocationAssociation {
152 my ($c, $word, $collocate) = @_;
153 return getCollocationScores($word, $collocate)
154}
155
Marc Kupietz19c68242018-03-12 09:42:21 +0100156sub getClassicCollocatorsCached {
157 my ($c, $word) = @_;
Marc Kupietz81aeed22019-02-17 21:22:45 +0100158 my $s2 = "";
Marc Kupietz9ff3c992019-02-04 12:32:54 +0100159 if($word > $mergedEnd) {
160 $word-=$mergedEnd;
161 }
Marc Kupietz81aeed22019-02-17 21:22:45 +0100162
Marc Kupietz999ab8c2019-02-17 21:42:21 +0100163 if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference
Marc Kupietz0dd689e2020-01-14 16:07:18 +0100164 open PIPE, "GET http://corpora.ids-mannheim.de/openlab/derekovecs/getClassicCollocators?w=$word |";
Marc Kupietz999ab8c2019-02-17 21:42:21 +0100165 }
Marc Kupietz3080f172020-07-13 17:51:28 +0200166 if($opt_C || !$cccache{$word}) {
Marc Kupietz06d61292019-02-04 12:33:22 +0100167 $c->app->log->info("Getting classic collocates of $word.");
Marc Kupietz19c68242018-03-12 09:42:21 +0100168 $cccache{$word} = getClassicCollocators($word);
Marc Kupietz1d96a082019-02-18 09:29:06 +0100169 $cccache{$word} =~ s/:(-?)(nan|inf)/:"${1}${2}"/g;
Marc Kupietz19c68242018-03-12 09:42:21 +0100170 } else {
Marc Kupietz06d61292019-02-04 12:33:22 +0100171 $c->app->log->info("Getting classic collocates for $word from cache.");
Marc Kupietz19c68242018-03-12 09:42:21 +0100172 }
Marc Kupietz81aeed22019-02-17 21:22:45 +0100173 if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference
Marc Kupietz999ab8c2019-02-17 21:42:21 +0100174 while(<PIPE>) {
175 $s2 .= $_;
176 }
177 close(PIPE);
Marc Kupietz81aeed22019-02-17 21:22:45 +0100178 }
Marc Kupietz999ab8c2019-02-17 21:42:21 +0100179
Marc Kupietz81aeed22019-02-17 21:22:45 +0100180 if(length($s2) > 2000) {
181 my $d1 = decode_json($cccache{$word});
182 my $d2 = decode_json($s2);
183 my %d2ld;
Marc Kupietz001bffd2019-02-21 08:52:41 +0100184 my $minLd = 14;
Marc Kupietz81aeed22019-02-17 21:22:45 +0100185 foreach my $i (@{$d2->{collocates}}) {
186 $d2ld{$i->{word}}=$i->{ld};
Marc Kupietz001bffd2019-02-21 08:52:41 +0100187 $minLd=$i->{ld} if($i->{ld} < $minLd);
Marc Kupietz81aeed22019-02-17 21:22:45 +0100188 }
189 foreach my $i (@{$d1->{collocates}}) {
190 my $w = $i->{word};
Marc Kupietz001bffd2019-02-21 08:52:41 +0100191 $i->{delta} = $i->{ld} - (defined $d2ld{$w} ? $d2ld{$w} : $minLd-0.1);
Marc Kupietz81aeed22019-02-17 21:22:45 +0100192 }
193 return(encode_json($d1));
194 } else {
195 my $d1 = decode_json($cccache{$word});
196 foreach my $i (@{$d1->{collocates}}) {
197 $i->{delta} = 0;
198 }
199 return(encode_json($d1));
200 }
Marc Kupietz19c68242018-03-12 09:42:21 +0100201}
202
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100203sub getSimilarProfilesCached {
204 my ($c, $word) = @_;
205 if(!$spcache{$word}) {
206 $spcache{$word} = getSimilarProfiles($word);
207 } else {
208 $c->app->log->info("Getting similar profiles for $word from cache:");
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100209 }
210 return $spcache{$word};
211}
212
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100213post '/derekovecs/getVecsByRanks' => sub {
Marc Kupietz66bfd952017-12-11 09:59:45 +0100214 my $self = shift;
215 my $vec = getVecs($self->req->json);
216 $self->render(json => $vec);
217};
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100218
Marc Kupietzf6080012021-03-12 09:14:42 +0100219any '*/getCollocationAssociation' => sub {
220 my $self = shift;
221 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
222} => 'getCollocationAssociation';
223
224any '/getCollocationAssociation' => sub {
225 my $self = shift;
226 $self->render(data => getCollocationAssociation($self, getWord($self->param("w") ? $self->param("w") : $self->req->json), getWord($self->param("c"))), format=>'json');
227} => 'getCollocationAssociation1';
228
Marc Kupietze13a3552018-01-25 08:48:34 +0100229any '*/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100230 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100231 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100232} => 'getClassicCollocators1';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100233
Marc Kupietze13a3552018-01-25 08:48:34 +0100234any '/getClassicCollocators' => sub {
Marc Kupietze243efd2018-01-11 22:19:24 +0100235 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100236 $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100237} => 'getClassicCollocators';
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100238
Marc Kupietzd7760b42019-02-21 09:01:44 +0100239any '/getBiggestVocabDistances' => sub {
240 my $self = shift;
241 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100242} => 'getBiggestVocabDistances1';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100243
244any '*/getBiggestVocabDistances' => sub {
245 my $self = shift;
246 $self->render(data => getBiggestMergedDifferences(), format=>'json');
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100247} => 'getBiggestVocabDistances';
Marc Kupietzd7760b42019-02-21 09:01:44 +0100248
Marc Kupietz33c79d32019-08-02 15:11:23 +0200249any '*/getPosWiseW2VCollocators' => sub {
250 my $self = shift;
251 $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
252 ($self->param("max")? $self->param("max") : 200),
253 ($self->param("cutoff")? $self->param("cutoff") :750000),
254 ($self->param("threshold")? $self->param("threshold") : 0.2)),
255 format=>'tsv');
256};
257
258any '/getPosWiseW2VCollocators' => sub {
259 my $self = shift;
260 $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
261 ($self->param("max")? $self->param("max") : 200),
262 ($self->param("cutoff")? $self->param("cutoff") : 750000),
263 ($self->param("threshold")? $self->param("threshold") : 0.2)),
264 format=>'tsv');
265};
266
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100267any '*/getSimilarProfiles' => sub {
268 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100269 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietza51dcfa2018-03-19 16:22:05 +0100270};
271
Marc Kupietzc987fa82018-03-21 12:14:25 +0100272any '/getSimilarProfiles' => sub {
273 my $self = shift;
Marc Kupietzcb43e492019-12-03 10:07:53 +0100274 $self->render(data => getSimilarProfilesCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
Marc Kupietzc987fa82018-03-21 12:14:25 +0100275};
276
Marc Kupietz9f301572020-04-06 18:29:16 +0200277any '*/getWord' => sub {
278 my $self = shift;
279 my $w = $self->param("w");
280 my $rank = getWord($w);
281 my $status = 200;
282 if ($rank <= 0) {
283 $rank = -1;
284 $status = 404;
285 }
286 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
287};
288
289any '/getWord' => sub {
290 my $self = shift;
291 my $w = $self->param("w");
292 my $rank = getWord($w);
293 my $status = 200;
294 if ($rank <= 0) {
295 $rank = -1;
296 $status = 404;
297 }
298 $self->render(data => encode_json({word => $w, frequencyRank => $rank}), format => 'json', status => $status);
299};
300
Marc Kupietz98ed1c02019-08-02 15:05:37 +0200301any '/getSimilarity' => sub {
302 my $self = shift;
303 my $w1 = $self->param("w1");
304 my $w2 = $self->param("w2");
305 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
306};
307
308any '*/getSimilarity' => sub {
309 my $self = shift;
310 my $w1 = $self->param("w1");
311 my $w2 = $self->param("w2");
312 $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
313};
314
Marc Kupietzdf3d4b52017-11-29 16:57:27 +0100315get '*/img/*' => sub {
316 my $c = shift;
317 my $url = $c->req->url;
318 $url =~ s@/derekovecs@@g;
319 $c->app->log->info("GET: " . $url);
320 $c->reply->static($url);
321};
322
Marc Kupietzdc22b982015-10-09 09:19:34 +0200323get '/' => sub {
324 my $c = shift;
Marc Kupietza5f60042017-05-04 10:38:12 +0200325 $c->app->log->info("get: ".$c->req->url->to_abs);
Marc Kupietzdc22b982015-10-09 09:19:34 +0200326 my $word=$c->param('word');
Marc Kupietz2da2a812019-02-21 14:17:35 +0100327 my $no_nbs=$c->param('n') || ($opt_m? 50 : 100);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100328 my $no_iterations=$c->param('N') || 2000;
Marc Kupietzd4227392016-03-01 16:45:12 +0100329 my $perplexity=$c->param('perplexity') || 20;
Marc Kupietzc4d62f82016-03-01 11:04:24 +0100330 my $epsilon=$c->param('epsilon') || 5;
Marc Kupietzd7aea722016-03-02 11:59:12 +0100331 my $som=$c->param('som') || 0;
Marc Kupietza2e64502016-04-27 09:53:51 +0200332 my $searchBaseVocabFirst=$c->param('sbf') || 0;
Marc Kupietz6d9a6782016-03-23 17:25:25 +0100333 my $sort=$c->param('sort') || 0;
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100334 my $csv=$c->param('csv') || 0;
Marc Kupietzb613b052016-04-28 14:11:59 +0200335 my $json=$c->param('json') || 0;
Marc Kupietzdb2dc7e2017-12-02 12:04:03 +0100336 my $cutoff=$c->param('cutoff') || 500000;
Marc Kupietzd91212f2017-11-13 10:05:09 +0100337 my $dedupe=$c->param('dedupe') || 0;
Marc Kupietzac707b32018-12-20 11:36:38 +0100338 my $nosp=$c->param('nosp') || 0;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100339 my $res;
Marc Kupietz7b2cbeb2016-02-25 11:22:00 +0100340 my @lists;
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100341 my @collocations;
Marc Kupietzcddc8482019-12-04 08:57:33 +0100342 if(defined($word) && $word !~ /^\s*$/) {
Marc Kupietz7bc85fd2016-02-24 11:42:41 +0100343 $c->inactivity_timeout(300);
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100344 $word =~ s/\s+/ /g;
Marc Kupietz3082fd02019-01-09 14:54:06 +0100345 if($opt_m && $word !~ /\|/) {
346 $word .= "|$word";
347 }
Marc Kupietz44bee3c2016-02-25 16:26:29 +0100348 for my $w (split(' *\| *', $word)) {
Marc Kupietz3082fd02019-01-09 14:54:06 +0100349 if($opt_m) {
350 if($searchBaseVocabFirst) {
351 $searchBaseVocabFirst=0;
352 } else {
353 $searchBaseVocabFirst=1;
354 }
355 }
356 if ($cache{$w.$cutoff.$no_nbs.$sort.$dedupe,$searchBaseVocabFirst}) {
Marc Kupietz15987412017-11-07 15:56:58 +0100357 $c->app->log->info("Getting $w results from cache");
Marc Kupietz3082fd02019-01-09 14:54:06 +0100358 $res = $cache{$w.$cutoff.$no_nbs.$sort.$dedupe.$searchBaseVocabFirst}
Marc Kupietza5b90152016-03-15 17:39:19 +0100359 } else {
Marc Kupietz15987412017-11-07 15:56:58 +0100360 $c->app->log->info('Looking for neighbours of '.$w);
361 if($opt_i) {
Marc Kupietzac707b32018-12-20 11:36:38 +0100362 $res = get_neighbours(encode("iso-8859-1", $w), $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100363 } else {
Marc Kupietzac707b32018-12-20 11:36:38 +0100364 $res = get_neighbours($w, $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
Marc Kupietz15987412017-11-07 15:56:58 +0100365 }
Marc Kupietz2dd2dd72017-12-01 22:08:14 +0100366 $cache{$w.$cutoff.$no_nbs.$sort.$dedupe} = $res;
Marc Kupietza5b90152016-03-15 17:39:19 +0100367 }
Marc Kupietz6b2975c2016-03-18 21:59:33 +0100368 push(@lists, $res->{paradigmatic});
Marc Kupietz15987412017-11-07 15:56:58 +0100369 }
370 }
Marc Kupietz56844a22019-08-02 15:12:19 +0200371
Marc Kupietz000ad862016-02-26 14:59:12 +0100372 $word =~ s/ *\| */ | /g;
Marc Kupietzb613b052016-04-28 14:11:59 +0200373 if($json) {
374 return $c->render(json => {word => $word, list => \@lists, collocators=>$res->{syntagmatic}});
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100375 } elsif($csv) {
376 my $csv_data="";
377 for (my $i=0; $i <= $no_nbs; $i++) {
378 $csv_data .= $res->{paradigmatic}->[$i]->{word} . ", ";
379 }
380 for (my $i=0; $i < $no_nbs; $i++) {
381 $csv_data .= $res->{syntagmatic}->[$i]->{word} . ", ";
382 }
383 chop $csv_data;
384 chop $csv_data;
Marc Kupietz56dbabe2019-12-10 14:33:57 +0100385 $csv_data .= "\n";
Marc Kupietzc469f3b2017-11-13 14:07:36 +0100386 return $c->render(text=>$csv_data);
Marc Kupietzb613b052016-04-28 14:11:59 +0200387 } else {
Marc Kupietzd7760b42019-02-21 09:01:44 +0100388 my $distantWords="";
389 if(!defined($word) || $word !~ /^\s*$/) {
390 $distantWords = getBiggestMergedDifferences();
391 }
392 $c->render(template=>"index", title=>$title, word=>$word, distantWords=>$distantWords, cutoff=>$cutoff, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, searchBaseVocabFirst=>$searchBaseVocabFirst, sort=>$sort, training_args=>$training_args, mergedEnd=> $mergedEnd, haveSProfiles=> $have_sprofiles, dedupe=> $dedupe, marked=>\%marked, lists=> \@lists, collocators=> $res->{syntagmatic});
Marc Kupietzb613b052016-04-28 14:11:59 +0200393 }
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100394} => "paradigmaticAndSyntagmaticNbs";
Marc Kupietzdc22b982015-10-09 09:19:34 +0200395
Marc Kupietz30ca4342017-11-22 21:21:20 +0100396helper(bitvec2window => sub {
397 my ($self, $n) = @_;
398 my $str = unpack("B32", pack("N", $n));
399 $str =~ s/^\d{22}//;
400 $str =~ s/^(\d{5})/$1x/;
401 $str =~ s/0/ยท/g;
402 $str =~ s/1/+/g;
403 return $str;
404 });
405
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100406hook(
407 after_render => sub {
408 my $c = shift;
409
410 # Only track valid routes
411 my $route = $c->current_route or return;
412
413 # This won't forward personalized information
414 my $hash = {
Marc Kupietz251de9f2020-01-14 16:12:05 +0100415 action_url => $c->req->url->to_abs,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100416 action_name => $route,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100417 ua => $c->req->headers->user_agent,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100418 urlref => '',
419 send_image => 0,
420 dnt => 0,
Marc Kupietz251de9f2020-01-14 16:12:05 +0100421 cip => $c->remote_addr,
422 lang => $c->req->headers->accept_language,
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100423 uid => $c->random_string('piwik_rand_id')
424 };
Marc Kupietz251de9f2020-01-14 16:12:05 +0100425 # $c->app->log->info("PIWIK: counting " . $hash->{action_url} . "\nremote:" . $c->remote_addr);
426 # $c->app->log->info("PIWIK: tag " . $c->piwik_tag);
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100427
428 # Send track
429 $c->piwik->api_p(Track => $hash)->wait;
Marc Kupietz251de9f2020-01-14 16:12:05 +0100430
431 # $c->app->log->info("PIWIK: counted.");
Marc Kupietz1b856fa2019-12-07 23:01:43 +0100432 }
433);
434
Marc Kupietze8e3ded2020-07-13 17:53:56 +0200435app->start;
436#$daemon->run;
Marc Kupietz95104512019-12-05 10:13:05 +0100437# app->start;
Marc Kupietzdc22b982015-10-09 09:19:34 +0200438
Marc Kupietz95104512019-12-05 10:13:05 +0100439# exit;