blob: 7039b046fc7b1981ba47f9b2d16adb47960a8bd3 [file] [log] [blame]
#!/usr/bin/env perl
use Mojo::Base -strict;
use Mojo::JSON 'j';
use Mojo::Util qw/slurp spurt/;
use Pod::Usage;
####
# Remove xip and cnx foundries from legacy index files
# This needs the installation of the Mojolicious package:
# $ cpan install Mojolicious
####
our @ARGV;
my $COMM_FOUNDRIES = qr!(?:xip|cnx|connexor)!;
# Get file info from command line
my $file = $ARGV[0] or die pod2usage(1);
my $out_file = $ARGV[1] || 'clean_' . $file;
# Load file and jsonify
my $j = j(slurp $file);
# Read fields
my ($tokens, $stream);
# Clean tokens
sub _clean ($) {
return join ' ', grep { $_ !~ $COMM_FOUNDRIES }
split / /, $_
};
# Legacy index file
if ($tokens = $j->{fields}->[1]) {
# Strip annotation info
foreach (qw/layerInfo foundries/) {
$tokens->{$_} = _clean $tokens->{$_} if $tokens->{$_};
};
# Read data
$stream = $tokens->{data};
}
# New index file
elsif ($tokens = $j->{data}) {
# Strip annotation info
foreach (qw/layerInfos foundries/) {
$tokens->{$_} = _clean $tokens->{$_} if $tokens->{$_};
};
# Read data
$stream = $tokens->{stream};
};
# Clean data from xip and cnx
my $clean_data = [];
foreach my $token (@$stream) {
my $clean_token = [];
foreach my $term (@$token) {
if ($term !~ /^(?:(?:<>|<|>|@|-):)?$COMM_FOUNDRIES/o) {
push @$clean_token, $term;
};
};
push @$clean_data, $clean_token;
};
# Legacy index file
if ($tokens->{data}) {
$tokens->{data} = $clean_data;
}
# New index file
elsif ($tokens->{stream}) {
$tokens->{stream} = $clean_data;
};
# Write file
spurt j($j), $out_file;
__END__
=pod
=head1 NAME
strip_commercial_annotations
=head1 SYNOPSIS
perl strip_commercial_annotations my_file.json [my_clean_file.json]
=cut