blob: 7039b046fc7b1981ba47f9b2d16adb47960a8bd3 [file] [log] [blame]
Nils Diewald55791792014-11-05 18:44:02 +00001#!/usr/bin/env perl
2use Mojo::Base -strict;
3use Mojo::JSON 'j';
4use Mojo::Util qw/slurp spurt/;
5use Pod::Usage;
6
7####
8# Remove xip and cnx foundries from legacy index files
9# This needs the installation of the Mojolicious package:
10# $ cpan install Mojolicious
11####
12
13our @ARGV;
14my $COMM_FOUNDRIES = qr!(?:xip|cnx|connexor)!;
15
16# Get file info from command line
17my $file = $ARGV[0] or die pod2usage(1);
18my $out_file = $ARGV[1] || 'clean_' . $file;
19
20# Load file and jsonify
21my $j = j(slurp $file);
22
23# Read fields
24my ($tokens, $stream);
25
26# Clean tokens
27sub _clean ($) {
28 return join ' ', grep { $_ !~ $COMM_FOUNDRIES }
29 split / /, $_
30};
31
32# Legacy index file
33if ($tokens = $j->{fields}->[1]) {
34
35 # Strip annotation info
36 foreach (qw/layerInfo foundries/) {
37 $tokens->{$_} = _clean $tokens->{$_} if $tokens->{$_};
38 };
39
40 # Read data
41 $stream = $tokens->{data};
42}
43
44# New index file
45elsif ($tokens = $j->{data}) {
46 # Strip annotation info
47 foreach (qw/layerInfos foundries/) {
48 $tokens->{$_} = _clean $tokens->{$_} if $tokens->{$_};
49 };
50
51 # Read data
52 $stream = $tokens->{stream};
53};
54
55# Clean data from xip and cnx
56my $clean_data = [];
57foreach my $token (@$stream) {
58 my $clean_token = [];
59 foreach my $term (@$token) {
60 if ($term !~ /^(?:(?:<>|<|>|@|-):)?$COMM_FOUNDRIES/o) {
61 push @$clean_token, $term;
62 };
63 };
64 push @$clean_data, $clean_token;
65};
66
67# Legacy index file
68if ($tokens->{data}) {
69 $tokens->{data} = $clean_data;
70}
71
72# New index file
73elsif ($tokens->{stream}) {
74 $tokens->{stream} = $clean_data;
75};
76
77# Write file
78spurt j($j), $out_file;
79
80__END__
81
82=pod
83
84=head1 NAME
85
86strip_commercial_annotations
87
88=head1 SYNOPSIS
89
90perl strip_commercial_annotations my_file.json [my_clean_file.json]
91
92=cut