| #!/usr/bin/env perl |
| use Mojo::Base -strict; |
| use Mojo::JSON 'j'; |
| use Mojo::Util qw/slurp spurt/; |
| use Pod::Usage; |
| |
| #### |
| # Remove xip and cnx foundries from legacy index files |
| # This needs the installation of the Mojolicious package: |
| # $ cpan install Mojolicious |
| #### |
| |
| our @ARGV; |
| my $COMM_FOUNDRIES = qr!(?:xip|cnx|connexor)!; |
| |
| # Get file info from command line |
| my $file = $ARGV[0] or die pod2usage(1); |
| my $out_file = $ARGV[1] || 'clean_' . $file; |
| |
| # Load file and jsonify |
| my $j = j(slurp $file); |
| |
| # Read fields |
| my ($tokens, $stream); |
| |
| # Clean tokens |
| sub _clean ($) { |
| return join ' ', grep { $_ !~ $COMM_FOUNDRIES } |
| split / /, $_ |
| }; |
| |
| # Legacy index file |
| if ($tokens = $j->{fields}->[1]) { |
| |
| # Strip annotation info |
| foreach (qw/layerInfo foundries/) { |
| $tokens->{$_} = _clean $tokens->{$_} if $tokens->{$_}; |
| }; |
| |
| # Read data |
| $stream = $tokens->{data}; |
| } |
| |
| # New index file |
| elsif ($tokens = $j->{data}) { |
| # Strip annotation info |
| foreach (qw/layerInfos foundries/) { |
| $tokens->{$_} = _clean $tokens->{$_} if $tokens->{$_}; |
| }; |
| |
| # Read data |
| $stream = $tokens->{stream}; |
| }; |
| |
| # Clean data from xip and cnx |
| my $clean_data = []; |
| foreach my $token (@$stream) { |
| my $clean_token = []; |
| foreach my $term (@$token) { |
| if ($term !~ /^(?:(?:<>|<|>|@|-):)?$COMM_FOUNDRIES/o) { |
| push @$clean_token, $term; |
| }; |
| }; |
| push @$clean_data, $clean_token; |
| }; |
| |
| # Legacy index file |
| if ($tokens->{data}) { |
| $tokens->{data} = $clean_data; |
| } |
| |
| # New index file |
| elsif ($tokens->{stream}) { |
| $tokens->{stream} = $clean_data; |
| }; |
| |
| # Write file |
| spurt j($j), $out_file; |
| |
| __END__ |
| |
| =pod |
| |
| =head1 NAME |
| |
| strip_commercial_annotations |
| |
| =head1 SYNOPSIS |
| |
| perl strip_commercial_annotations my_file.json [my_clean_file.json] |
| |
| =cut |