| Nils Diewald | 5579179 | 2014-11-05 18:44:02 +0000 | [diff] [blame] | 1 | #!/usr/bin/env perl | 
 | 2 | use Mojo::Base -strict; | 
 | 3 | use Mojo::JSON 'j'; | 
 | 4 | use Mojo::Util qw/slurp spurt/; | 
 | 5 | use Pod::Usage; | 
 | 6 |  | 
 | 7 | #### | 
 | 8 | # Remove xip and cnx foundries from legacy index files | 
 | 9 | # This needs the installation of the Mojolicious package: | 
 | 10 | # $ cpan install Mojolicious | 
 | 11 | #### | 
 | 12 |  | 
 | 13 | our @ARGV; | 
 | 14 | my $COMM_FOUNDRIES = qr!(?:xip|cnx|connexor)!; | 
 | 15 |  | 
 | 16 | # Get file info from command line | 
 | 17 | my $file     = $ARGV[0] or die pod2usage(1); | 
 | 18 | my $out_file = $ARGV[1] || 'clean_' . $file; | 
 | 19 |  | 
 | 20 | # Load file and jsonify | 
 | 21 | my $j = j(slurp $file); | 
 | 22 |  | 
 | 23 | # Read fields | 
 | 24 | my ($tokens, $stream); | 
 | 25 |  | 
 | 26 | # Clean tokens | 
 | 27 | sub _clean ($) { | 
 | 28 |   return join ' ', grep { $_ !~ $COMM_FOUNDRIES } | 
 | 29 |     split / /, $_ | 
 | 30 | }; | 
 | 31 |  | 
 | 32 | # Legacy index file | 
 | 33 | if ($tokens = $j->{fields}->[1]) { | 
 | 34 |  | 
 | 35 |   # Strip annotation info | 
 | 36 |   foreach (qw/layerInfo foundries/) { | 
 | 37 |     $tokens->{$_} = _clean $tokens->{$_} if $tokens->{$_}; | 
 | 38 |   }; | 
 | 39 |  | 
 | 40 |   # Read data | 
 | 41 |   $stream = $tokens->{data}; | 
 | 42 | } | 
 | 43 |  | 
 | 44 | # New index file | 
 | 45 | elsif ($tokens = $j->{data}) { | 
 | 46 |   # Strip annotation info | 
 | 47 |   foreach (qw/layerInfos foundries/) { | 
 | 48 |     $tokens->{$_} = _clean $tokens->{$_} if $tokens->{$_}; | 
 | 49 |   }; | 
 | 50 |  | 
 | 51 |   # Read data | 
 | 52 |   $stream = $tokens->{stream}; | 
 | 53 | }; | 
 | 54 |  | 
 | 55 | # Clean data from xip and cnx | 
 | 56 | my $clean_data = []; | 
 | 57 | foreach my $token (@$stream) { | 
 | 58 |   my $clean_token = []; | 
 | 59 |   foreach my $term (@$token) { | 
 | 60 |     if ($term !~ /^(?:(?:<>|<|>|@|-):)?$COMM_FOUNDRIES/o) { | 
 | 61 |       push @$clean_token, $term; | 
 | 62 |     }; | 
 | 63 |   }; | 
 | 64 |   push @$clean_data, $clean_token; | 
 | 65 | }; | 
 | 66 |  | 
 | 67 | # Legacy index file | 
 | 68 | if ($tokens->{data}) { | 
 | 69 |   $tokens->{data} = $clean_data; | 
 | 70 | } | 
 | 71 |  | 
 | 72 | # New index file | 
 | 73 | elsif ($tokens->{stream}) { | 
 | 74 |   $tokens->{stream} = $clean_data; | 
 | 75 | }; | 
 | 76 |  | 
 | 77 | # Write file | 
 | 78 | spurt j($j), $out_file; | 
 | 79 |  | 
 | 80 | __END__ | 
 | 81 |  | 
 | 82 | =pod | 
 | 83 |  | 
 | 84 | =head1 NAME | 
 | 85 |  | 
 | 86 | strip_commercial_annotations | 
 | 87 |  | 
 | 88 | =head1 SYNOPSIS | 
 | 89 |  | 
 | 90 | perl strip_commercial_annotations my_file.json [my_clean_file.json] | 
 | 91 |  | 
 | 92 | =cut |