blob: f4e7a9878e1d1f04b7eecd3e204450e913cde3bd [file] [log] [blame]
Marc Kupietzbf9bac02022-04-11 21:16:47 +02001use utf8;
2package IDS::DeReKoVecs::Read;
3use strict;
4use warnings;
5use Config;
6
7my $src_file = undef;
8
9our $mergedEnd=0;
10our %cache;
11our %cccache; # classic collocator cache
12our %spcache; # similar profile cache
13our $opt_p = 5676;
14our $opt_C;
15
16BEGIN {
17 $src_file = __FILE__;
18 $src_file =~ s/Read.pm/derekovecs-server.c/;
19}
20
21use Inline C => "$src_file" => CLEAN_AFTER_BUILD => 0, BUILD_NOISY => 1, ccflags => $Config{ccflags} . "-Wall -fno-rtti -O4 -I/usr/local/kl/include", libs => "-L/usr/local/kl/lib64 -l:libcollocatordb.so.1.3.0";
22#use Inline C => Config => BUILD_NOISY => 1, CFLAGS => $Config{cflags}." -O4 -mtune k9";
23#use Inline C => Config => CLEAN_AFTER_BUILD => 0, ccflags => $Config{ccflags}." -Ofast -march k8 -mtune k8 ";
24
25use Mojo::JSON qw(decode_json encode_json to_json);
26use Exporter qw(import);
27
Marc Kupietzc82b15f2022-07-19 17:36:27 +020028our @EXPORT = qw(init_net load_sprofiles getCollocationAssociation getClassicCollocatorsCached getSimilarProfiles getSimilarProfilesCached getBiggestMergedDifferences filter_garbage get_neighbours getWordNumber dump_vecs dump_for_numpy);
Marc Kupietzbf9bac02022-04-11 21:16:47 +020029
30
31sub getCollocationAssociation {
32 my ($c, $word, $collocate) = @_;
33 return getCollocationScores($word, $collocate)
34}
35
36sub getClassicCollocatorsCached {
37 my ($c, $word) = @_;
38 my $s2 = "";
39 if($word > $mergedEnd) {
40 $word-=$mergedEnd;
41 }
42
43 if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference
44 open PIPE, "GET http://corpora.ids-mannheim.de/openlab/derekovecs/getClassicCollocators?w=$word |";
45 }
46 if($opt_C || !$cccache{$word}) {
47 $c->app->log->info("Getting classic collocates of $word.");
48 $cccache{$word} = getClassicCollocators($word);
49 $cccache{$word} =~ s/:(-?)(nan|inf)/:"${1}${2}"/g;
50 } else {
51 $c->app->log->info("Getting classic collocates for $word from cache.");
52 }
53 if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference
54 while(<PIPE>) {
55 $s2 .= $_;
56 }
57 close(PIPE);
58 }
59
60 if(length($s2) > 2000) {
61 my $d1 = decode_json($cccache{$word});
62 my $d2 = decode_json($s2);
63 my %d2ld;
64 my $minLd = 14;
65 foreach my $i (@{$d2->{collocates}}) {
66 $d2ld{$i->{word}}=$i->{ld};
67 $minLd=$i->{ld} if($i->{ld} < $minLd);
68 }
69 foreach my $i (@{$d1->{collocates}}) {
70 my $w = $i->{word};
71 $i->{delta} = $i->{ld} - (defined $d2ld{$w} ? $d2ld{$w} : $minLd-0.1);
72 }
73 return(encode_json($d1));
74 } else {
75 my $d1 = decode_json($cccache{$word});
76 foreach my $i (@{$d1->{collocates}}) {
77 $i->{delta} = 0;
78 }
79 return(encode_json($d1));
80 }
81}
82
83sub getSimilarProfilesCached {
84 my ($c, $word) = @_;
85 if(!$spcache{$word}) {
86 $spcache{$word} = getSimilarProfiles($word);
87 } else {
88 $c->app->log->info("Getting similar profiles for $word from cache:");
89 }
90 return $spcache{$word};
91}
92
93return 1;