use utf8;
package IDS::DeReKoVecs::Read;
use LWP::Simple;
use strict;
use warnings;
use Config;

my $src_file      = undef;

our $mergedEnd=0;
our %cache;
our %cccache; # classic collocator cache
our %spcache; # similar profile cache
our $opt_p = 5676;
our $opt_C;

BEGIN {
  $src_file = __FILE__;
  $src_file =~ s/Read.pm/derekovecs-server.c/;
}

use Inline C => "$src_file" => CLEAN_AFTER_BUILD => 0, BUILD_NOISY => 1, ccflags =>  $Config{ccflags} . "-Wall -Wno-unused-result -fno-rtti -O4 -I/usr/local/include", libs => "-L/usr/local/lib64 -L/usr/local/lib -lcollocatordb";
#use Inline C => Config => BUILD_NOISY => 1, CFLAGS => $Config{cflags}." -O4 -mtune k9";
#use Inline C => Config => CLEAN_AFTER_BUILD => 0, ccflags => $Config{ccflags}." -Ofast -march k8 -mtune k8 ";

use Mojo::JSON qw(decode_json encode_json to_json);
use Exporter qw(import);

our @EXPORT = qw(init_net load_sprofiles getVocabSize getDowntimeCalendar getCollocationAssociation getClassicCollocatorsCached getSimilarProfiles getSimilarProfilesCached getBiggestMergedDifferences filter_garbage get_neighbours getWordNumber dump_vecs dump_for_numpy cos_similarity_as_json);

sub getDowntimeCalendar {
  my ($url) = @_;
  if ($url =~ m/^\s*$/) {
    return "";
  }
  my $calendar = LWP::Simple::get($url);
  return $calendar;
}

sub getCollocationAssociation {
  my ($c, $word, $collocate) = @_;
  return getCollocationScores($word, $collocate)
}

sub getClassicCollocatorsCached {
  my ($c, $word) = @_;
  my $s2 = "";
  if($word > $mergedEnd) {
    $word-=$mergedEnd;
  }

  if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference
      open PIPE, "GET http://corpora.ids-mannheim.de/openlab/derekovecs/getClassicCollocators?w=$word  |";
  }
  if($opt_C || !$cccache{$word}) {
    $c->app->log->info("Getting classic collocates of $word.");
    $cccache{$word} = getClassicCollocators($word);
    $cccache{$word} =~ s/:(-?)(nan|inf)/:"${1}${2}"/g;
    $cccache{$word} =~ s/"""/"\\""/g;
  } else {
    $c->app->log->info("Getting classic collocates for $word from cache.");
  }
  if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference
    while(<PIPE>) {
      $s2 .= $_;
    }
    close(PIPE);
  }

  if(length($s2) > 2000) {
    my $d1 = decode_json($cccache{$word});
    my $d2 = decode_json($s2);
    my %d2ld;
    my $minLd = 14;
    foreach my $i (@{$d2->{collocates}}) {
      $d2ld{$i->{word}}=$i->{ld};
      $minLd=$i->{ld} if($i->{ld} < $minLd);
    }
    foreach my $i (@{$d1->{collocates}}) {
      my $w = $i->{word};
      $i->{delta} = $i->{ld} - (defined $d2ld{$w} ? $d2ld{$w} : $minLd-0.1);
    }
    return(encode_json($d1));
  } else {
    my $d1 = decode_json($cccache{$word});
    foreach my $i (@{$d1->{collocates}}) {
      $i->{delta} = 0;
    }
    return(encode_json($d1));
  }
}

sub getSimilarProfilesCached {
  my ($c, $word) = @_;
  if(!$spcache{$word}) {
    $spcache{$word} = getSimilarProfiles($word);
  } else {
    $c->app->log->info("Getting similar profiles for $word from cache:");
  }
  return $spcache{$word};
}

return 1;
