|  | use utf8; | 
|  | package IDS::DeReKoVecs::Read; | 
|  | use LWP::Simple; | 
|  | use strict; | 
|  | use warnings; | 
|  | use Config; | 
|  |  | 
|  | my $src_file      = undef; | 
|  |  | 
|  | our $mergedEnd=0; | 
|  | our %cache; | 
|  | our %cccache; # classic collocator cache | 
|  | our %spcache; # similar profile cache | 
|  | our $opt_p = 5676; | 
|  | our $opt_C; | 
|  |  | 
|  | BEGIN { | 
|  | $src_file = __FILE__; | 
|  | $src_file =~ s/Read.pm/derekovecs-server.c/; | 
|  | } | 
|  |  | 
|  | use Inline C => "$src_file" => CLEAN_AFTER_BUILD => 0, BUILD_NOISY => 1, ccflags =>  $Config{ccflags} . "-Wall -Wno-unused-result -fno-rtti -O4 -I/usr/local/include", libs => "-L/usr/local/lib64 -l:libcollocatordb.so.1.3.0"; | 
|  | #use Inline C => Config => BUILD_NOISY => 1, CFLAGS => $Config{cflags}." -O4 -mtune k9"; | 
|  | #use Inline C => Config => CLEAN_AFTER_BUILD => 0, ccflags => $Config{ccflags}." -Ofast -march k8 -mtune k8 "; | 
|  |  | 
|  | use Mojo::JSON qw(decode_json encode_json to_json); | 
|  | use Exporter qw(import); | 
|  |  | 
|  | our @EXPORT = qw(init_net load_sprofiles getVocabSize getDowntimeCalendar getCollocationAssociation getClassicCollocatorsCached getSimilarProfiles getSimilarProfilesCached getBiggestMergedDifferences filter_garbage get_neighbours getWordNumber dump_vecs dump_for_numpy cos_similarity_as_json); | 
|  |  | 
|  | sub getDowntimeCalendar { | 
|  | my ($url) = @_; | 
|  | if ($url =~ m/^\s*$/) { | 
|  | return ""; | 
|  | } | 
|  | my $calendar = LWP::Simple::get($url); | 
|  | return $calendar; | 
|  | } | 
|  |  | 
|  | sub getCollocationAssociation { | 
|  | my ($c, $word, $collocate) = @_; | 
|  | return getCollocationScores($word, $collocate) | 
|  | } | 
|  |  | 
|  | sub getClassicCollocatorsCached { | 
|  | my ($c, $word) = @_; | 
|  | my $s2 = ""; | 
|  | if($word > $mergedEnd) { | 
|  | $word-=$mergedEnd; | 
|  | } | 
|  |  | 
|  | if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference | 
|  | open PIPE, "GET http://corpora.ids-mannheim.de/openlab/derekovecs/getClassicCollocators?w=$word  |"; | 
|  | } | 
|  | if($opt_C || !$cccache{$word}) { | 
|  | $c->app->log->info("Getting classic collocates of $word."); | 
|  | $cccache{$word} = getClassicCollocators($word); | 
|  | $cccache{$word} =~ s/:(-?)(nan|inf)/:"${1}${2}"/g; | 
|  | $cccache{$word} =~ s/"""/"\\""/g; | 
|  | } else { | 
|  | $c->app->log->info("Getting classic collocates for $word from cache."); | 
|  | } | 
|  | if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference | 
|  | while(<PIPE>) { | 
|  | $s2 .= $_; | 
|  | } | 
|  | close(PIPE); | 
|  | } | 
|  |  | 
|  | if(length($s2) > 2000) { | 
|  | my $d1 = decode_json($cccache{$word}); | 
|  | my $d2 = decode_json($s2); | 
|  | my %d2ld; | 
|  | my $minLd = 14; | 
|  | foreach my $i (@{$d2->{collocates}}) { | 
|  | $d2ld{$i->{word}}=$i->{ld}; | 
|  | $minLd=$i->{ld} if($i->{ld} < $minLd); | 
|  | } | 
|  | foreach my $i (@{$d1->{collocates}}) { | 
|  | my $w = $i->{word}; | 
|  | $i->{delta} = $i->{ld} - (defined $d2ld{$w} ? $d2ld{$w} : $minLd-0.1); | 
|  | } | 
|  | return(encode_json($d1)); | 
|  | } else { | 
|  | my $d1 = decode_json($cccache{$word}); | 
|  | foreach my $i (@{$d1->{collocates}}) { | 
|  | $i->{delta} = 0; | 
|  | } | 
|  | return(encode_json($d1)); | 
|  | } | 
|  | } | 
|  |  | 
|  | sub getSimilarProfilesCached { | 
|  | my ($c, $word) = @_; | 
|  | if(!$spcache{$word}) { | 
|  | $spcache{$word} = getSimilarProfiles($word); | 
|  | } else { | 
|  | $c->app->log->info("Getting similar profiles for $word from cache:"); | 
|  | } | 
|  | return $spcache{$word}; | 
|  | } | 
|  |  | 
|  | return 1; |